Untitled
unknown
plain_text
a year ago
8.4 kB
5
Indexable
spark.read.table("WHS.MasterData_StoreChecking") .withColumn("CheckInAt", col("CheckInAt") + expr("INTERVAL 7 HOURS")) .filter(col("StoreTypeId").isin(17, 18, 23)) .createOrReplaceTempView("storechecking") spark.read.table("WHS.SaleChannel_Organization").createOrReplaceTempView("organization") val appuser = spark.read.table("whs.masterdata_appuser") appuser.createOrReplaceTempView("appuser") val td = spark.read.format("csv").option("multiLine", "true").option("header", "true").load("/raw_zone/excel_form/upload/mapping_to_doi/to_doi_saleout.csv") td.createOrReplaceTempView("td") val store = spark.read.table("WHS.MasterData_DMSStore") .filter(col("StoreTypeId").isin(17, 18, 23) && col("Code").rlike("^(TV|TN|TG|NT|CT|DN|BH|HCM)") && col("StatusId")=== 1 && col("DeletedAt").isNull ) .withColumn("CreatedAt", col("CreatedAt") + expr("INTERVAL 7 HOURS")) .withColumn("CreateDate", date_format(col("CreatedAt"), "yyyy-MM-dd")) .withColumn("StartDate", col("StartDate") + expr("INTERVAL 7 HOURS")) .withColumn("EndDate", col("EndDate") + expr("INTERVAL 7 HOURS")) .as("s") .select( col("s.Id"), col("s.CreatedAt"), col("s.CreateDate"), col("s.Code"), col("s.StoreTypeId"), col("s.StartDate"), col("s.EndDate"), col("s.CreatorId"), col("DeletedAt") ) // store.createOrReplaceTempView("store") val min_date = store.select( date_format(min(col("CreatedAt")), "yyyy-MM-dd") ).distinct() .first() .getString(0) // val max_date = AppConfig.date.substring(0, 4) + "-" + AppConfig.date.substring(4, 6) + "-" + AppConfig.date.substring(6) val max_date = "2024-07-01" val date = spark.read.format("avro").load("/raw_zone/full_load/date") .filter(col("date").between(min_date, max_date)) .withColumn("Month", month(col("date")).as("Month")) .withColumn("Quarter", quarter(col("date")).as("Month")) .withColumn("Year", year(col("date")).as("Month")) .withColumn("dt", date_format(col("date"), "yyyy-MM-dd")) .select( last_day(col("dt")).as("dt"), col("Month"), col("Quarter"), col("Year") ).distinct val date_quarter = date.filter(col("Month").isin(3, 6, 9, 12)) val date_year = date.filter(col("Month") === 12) val thang = date.as("d").join( store.as("s"), col("d.dt") >= col("s.CreateDate") && to_date(col("d.dt")) >= to_date(store("StartDate")) && to_date(col("d.dt")) < to_date(store("EndDate")) && col("s.DeletedAt").isNull,"left" ) .select( col("d.Month"), col("d.Quarter"), col("d.Year"), when(col("s.Code").like("TV%"), "BH1") .when(col("s.Code").rlike("^(TN|TG|NT|CT|DN|BH|HCM)"), "BH2") .as("SaleDepartment"), split(col("s.Code"), "\\.")(0).as("Organization"), col("s.StoreTypeId"), col("s.Id").as("StoreId"), col("s.CreatedAt"), col("s.StartDate"), col("s.EndDate"), col("s.CreatorId").as("SaleEmployeeId") ) val quy = date_quarter.as("d").join( store.as("s"), col("d.dt") >= col("s.CreateDate") && to_date(col("d.dt")) >= to_date(store("StartDate")) && to_date(col("d.dt")) < to_date(store("EndDate")), "left" ).select( lit(null).as("Month"), col("d.Quarter"), col("d.Year"), when(col("s.Code").like("TV%"), "BH1") .when(col("s.Code").rlike("^(TN|TG|NT|CT|DN|BH|HCM)"), "BH2") .as("SaleDepartment"), split(col("s.Code"), "\\.")(0).as("Organization"), col("s.StoreTypeId"), col("s.Id").as("StoreId"), col("s.CreatedAt"), col("s.StartDate"), col("s.EndDate"), col("s.CreatorId").as("SaleEmployeeId") ) val nam = date_year.as("d").join( store.as("s"), col("d.dt") >= col("s.CreateDate") && to_date(col("d.dt")) >= to_date(store("StartDate")) && to_date(col("d.dt")) < to_date(store("EndDate")), "left" ) .select( lit(null).as("Month"), lit(null).as("Quarter"), col("d.Year"), when(col("s.Code").like("TV%"), "BH1") .when(col("s.Code").rlike("^(TN|TG|NT|CT|DN|BH|HCM)"), "BH2") .as("SaleDepartment"), split(col("s.Code"), "\\.")(0).as("Organization"), col("s.StoreTypeId"), col("s.Id").as("StoreId"), col("s.CreatedAt"), col("s.StartDate"), col("s.EndDate"), col("s.CreatorId").as("SaleEmployeeId") ) val temp = thang.unionByName(quy).unionByName(nam) val all_store = temp.join( appuser, temp("SaleEmployeeId") === appuser("Id"), "left" ).join( td,td("Username") === appuser("Username"), "left" ).select( temp("Month"), temp("Quarter"), temp("Year"), temp("SaleDepartment"), temp("Organization"), temp("StoreTypeId"), temp("StoreId"), temp("CreatedAt"), temp("StartDate"), temp("EndDate"), td("Team_Name") ) val ghe_tham = spark.sql( """ select date_format(p.CheckInAt, "yyyy-MM") CheckInAt, year(p.CheckInAt) Year, quarter(p.CheckInAt) Quarter, month(p.CheckInAt) Month, p.StoreId, case when o.Code in ("TV01", "TV02", "TV03") then 'BH1' when o.Code rlike '^(TN|TG|NT|CT|DN|BH|HCM)' then 'BH2' end SaleDepartment, o.Code Organization, StoreTypeId, doi.Team_Name from storechecking p left join organization o on p.OrganizationId = o.Id LEFT JOIN appuser au on p.SaleEmployeeId = au.Id LEFT JOIN td doi on au.Username = doi.Username where o.Code in ('BH', 'TG', 'DN', 'TN', 'NT', 'CT', 'HCM') or o.Code like 'TV0%' """) val result = all_store.as("s") .join( ghe_tham.as("gt"), col("s.StoreId") === col("gt.StoreId") and col("s.Month") === col("gt.Month") and col("s.Quarter") === col("gt.Quarter") and col("s.Year") === col("gt.Year"), "full" ).select( coalesce(col("s.Month"), col("gt.Month")).as("Month"), coalesce(col("s.Quarter"), col("gt.Quarter")).as("Quarter"), coalesce(col("s.Year"), col("gt.Year")).as("Year"), coalesce(col("s.SaleDepartment"), col("gt.SaleDepartment")).as("SaleDepartment"), coalesce(col("s.Organization"), col("gt.Organization")).as("Organization"), coalesce(col("s.StoreTypeId"), col("gt.StoreTypeId")).as("StoreTypeId"), coalesce(col("s.StoreId"), col("gt.StoreId")).as("StoreId"), coalesce(col("gt.Team_Name"), col("s.Team_Name")).as("Team_Name"), when(col("gt.CheckInAt").isNull, 0).otherwise(1).as("is_ghe_tham") ) .groupBy( "Month", "Quarter", "Year", "SaleDepartment", "Organization", "StoreId", "StoreTypeId", "Team_Name" ).agg( sum(col("is_ghe_tham")).as("num_visit") ) result.createOrReplaceTempView("dai_ly_ghe_tham") val res = spark.sql( """ select -- view_loai_ghe_tham.Team_Name, view_loai_ghe_tham.Loai_ghe_tham, count(view_loai_ghe_tham.StoreId) from (select (case when group_ghe_tham.tong_luot_ghe_tham = 0 then 'Chưa được ghé thăm' when group_ghe_tham.tong_luot_ghe_tham = 1 then 'Ghé thăm 1 lần' when group_ghe_tham.tong_luot_ghe_tham = 2 then 'Ghé thăm 2 lần' when group_ghe_tham.tong_luot_ghe_tham = 3 then 'Ghé thăm 3 lần' when group_ghe_tham.tong_luot_ghe_tham > 3 then 'Ghé thăm > 3 lần' end) as Loai_ghe_tham, group_ghe_tham.StoreId, -- group_ghe_tham.Team_Name, group_ghe_tham.tong_luot_ghe_tham from (select dlgt.StoreId as StoreId, -- dlgt.Team_Name, sum(dlgt.num_visit) as tong_luot_ghe_tham from dai_ly_ghe_tham dlgt where dlgt.`Year` = 2024 and dlgt.`Month` = 1 and dlgt.SaleDepartment = 'BH2' and dlgt.StoreTypeId in (17, 18, 23) group by dlgt.StoreId -- , dlgt.Team_Name ) as group_ghe_tham ) as view_loai_ghe_tham group by view_loai_ghe_tham.Loai_ghe_tham """)
Editor is loading...
Leave a Comment