Untitled

 avatar
unknown
plain_text
a year ago
15 kB
24
Indexable
WITH
dwh_delta AS (
	-- Шаг 2
SELECT 	
			dc.craftsman_id,-- напишите код здесь AS craftsman_id,
			dc.craftsman_name,-- напишите код здесь AS craftsman_name,
			dc.craftsman_address,-- напишите код здесь AS craftsman_address,
			dc.craftsman_birthday,-- напишите код здесь AS craftsman_birthday,
			dc.craftsman_email,-- напишите код здесь AS craftsman_email,
			fo.order_id,-- напишите код здесь AS order_id,
			dp.product_id,-- напишите код здесь AS product_id,
			dp.product_price,-- напишите код здесь AS product_price,
			dp.product_type,-- напишите код здесь AS product_type,
			date_part('year', age(customer_birthday)) as customer_age,-- напишите код здесь AS customer_age,
			(fo.order_completion_date - fo.order_created_date) as diff_order_date,-- напишите код здесь AS diff_order_date, 
			fo.order_status,-- напишите код здесь AS order_status,
			to_char(fo.order_created_date, 'yyyy-mm') as report_period,-- напишите код здесь AS report_period,
			(case when crd.craftsman_id is null then null else 1 end) as exist_craftsman_id,-- напишите код здесь AS exist_craftsman_id,
			dc.load_dttm as craftsman_load_dttm,-- напишите код здесь AS craftsman_load_dttm,
			dcs.load_dttm as customers_load_dttm,-- напишите код здесь AS customers_load_dttm,
			dp.load_dttm as products_load_dttm-- напишите код здесь AS products_load_dttm
			FROM dwh.f_order fo 
				INNER JOIN dwh.d_craftsman dc on dc.craftsman_id = fo.craftsman_id -- напишите код здесь 
				INNER JOIN dwh.d_customer dcs ON dcs.customer_id = fo.customer_id-- напишите код здесь 
				INNER JOIN dwh.d_product dp ON dp.product_id = fo.product_id-- напишите код здесь 
				LEFT JOIN dwh.craftsman_report_datamart crd on crd.craftsman_id = fo.craftsman_id and fo.load_dttm > (SELECT COALESCE(MAX(load_dttm),'1900-01-01') FROM dwh.load_dates_craftsman_report_datamart) -- напишите код здесь 
					WHERE 
						  dc.load_dttm > (SELECT COALESCE(MAX(load_dttm),'1900-01-01') FROM dwh.load_dates_craftsman_report_datamart) or-- напишите код здесь  OR
						  dcs.load_dttm > (SELECT COALESCE(MAX(load_dttm),'1900-01-01') FROM dwh.load_dates_craftsman_report_datamart) or-- напишите код здесь  OR
						  dp.load_dttm > (SELECT COALESCE(MAX(load_dttm),'1900-01-01') FROM dwh.load_dates_craftsman_report_datamart)-- напишите код здесь;
),
dwh_update_delta AS (
	-- Шаг 3
   SELECT 	craftsman_id 
   FROM dwh_delta dd 
   where exist_craftsman_id is null
),
dwh_delta_insert_result AS (
	-- Шаг 4
     SELECT  
			T4.craftsman_id AS craftsman_id,
			T4.craftsman_name AS craftsman_name,
			T4.craftsman_address AS craftsman_address,
			T4.craftsman_birthday AS craftsman_birthday,
			T4.craftsman_email AS craftsman_email,
			T4.craftsman_money AS craftsman_money,
			T4.platform_money AS platform_money,
			T4.count_order AS count_order,
			T4.avg_price_order AS avg_price_order,
			T4.avg_age_customer AS avg_age_customer,
			T4.product_type AS top_product_category,
			T4.median_time_order_completed AS median_time_order_completed,
			T4.count_order_created AS count_order_created,
			T4.count_order_in_progress AS count_order_in_progress,
			T4.count_order_delivery AS count_order_delivery,
			T4.count_order_done AS count_order_done,
			T4.count_order_not_done AS count_order_not_done,
			T4.report_period AS report_period 
			FROM (
				SELECT 	-- в этой выборке объединяем две внутренние выборки по расчёту столбцов витрины и применяем оконную функцию, чтобы определить самую популярную категорию товаров
						*,
						RANK() OVER(PARTITION BY T2.craftsman_id ORDER BY count_product DESC) AS rank_count_product 
						FROM ( 
							SELECT -- в этой выборке делаем расчёт по большинству столбцов, так как все они требуют одной и той же группировки, кроме столбца с самой популярной категорией товаров у мастера. Для этого столбца сделаем отдельную выборку с другой группировкой и выполним join
								T1.craftsman_id,-- напишите код здесь,
								T1.craftsman_name,-- напишите код здесь,
								T1.craftsman_address,-- напишите код здесь,
								T1.craftsman_birthday,-- напишите код здесь,
								T1.craftsman_email,-- напишите код здесь,
								sum(product_price) * 0.9 as craftsman_money,-- напишите код здесь,
								sum(product_price) * 0.1 as platform_money,-- напишите код здесь,
								count(order_id) as count_order,-- напишите код здесь,
								avg(product_price) as avg_price_order,-- напишите код здесь,
								avg(customer_age) as avg_age_customer,-- напишите код здесь,
								percentile_cont(0.5) WITHIN GROUP(ORDER BY diff_order_date) as median_time_order_completed,-- напишите код здесь,
								sum(CASE WHEN order_status = 'created' THEN 1 ELSE 0 END) as count_order_created,-- напишите код здесь,
								sum(CASE WHEN order_status = 'in progress' THEN 1 ELSE 0 END) as count_order_in_progress,-- напишите код здесь,
								sum(CASE WHEN order_status = 'delivery' THEN 1 ELSE 0 END) as count_order_delivery,-- напишите код здесь,
								sum(CASE WHEN order_status = 'done' THEN 1 ELSE 0 END) as count_order_done,-- напишите код здесь,
								sum(CASE WHEN order_status != 'done' THEN 1 ELSE 0 END) as count_order_not_done,-- напишите код здесь,
								T1.report_period-- напишите код здесь
								FROM dwh_delta AS T1
									WHERE T1.exist_craftsman_id is null
										GROUP BY T1.craftsman_id, T1.craftsman_name, T1.craftsman_address, T1.craftsman_birthday, T1.craftsman_email, T1.report_period
							) AS T2 
								INNER JOIN (
									SELECT 	-- эта выборка поможет определить самый популярный товар у мастера. Это выборка не делается в предыдущем запросе, так как нужна другая группировка. Для данных этой выборки можно применить оконную функцию, которая и покажет самую популярную категорию товаров у мастера
											count(product_id) as count_product,-- напишите код здесь,
											dd.craftsman_id as craftsman_id_for_product_type,-- напишите код здесь,
											dd.product_type-- напишите код здесь
											FROM dwh_delta AS dd
												GROUP BY dd.craftsman_id, dd.product_type
													ORDER BY count_product DESC) AS T3 ON T2.craftsman_id = T3.craftsman_id_for_product_type
				) AS T4 WHERE T4.rank_count_product = 1 ORDER BY report_period -- условие помогает оставить в выборке первую по популярности категорию товаров
),
dwh_delta_update_result AS (
	-- Шаг 5
    SELECT 
			T4.craftsman_id AS craftsman_id,
			T4.craftsman_name AS craftsman_name,
			T4.craftsman_address AS craftsman_address,
			T4.craftsman_birthday AS craftsman_birthday,
			T4.craftsman_email AS craftsman_email,
			T4.craftsman_money AS craftsman_money,
			T4.platform_money AS platform_money,
			T4.count_order AS count_order,
			T4.avg_price_order AS avg_price_order,
			T4.avg_age_customer AS avg_age_customer,
			T4.product_type AS top_product_category,
			T4.median_time_order_completed AS median_time_order_completed,
			T4.count_order_created AS count_order_created,
			T4.count_order_in_progress AS count_order_in_progress,
			T4.count_order_delivery AS count_order_delivery, 
			T4.count_order_done AS count_order_done, 
			T4.count_order_not_done AS count_order_not_done,
			T4.report_period AS report_period 
			FROM (
				SELECT 	-- в этой выборке объединяем две внутренние выборки по расчёту столбцов витрины и применяем оконную функцию, чтобы определить самую популярную категорию товаров
						*,
						RANK() OVER(PARTITION BY T2.craftsman_id ORDER BY count_product DESC) AS rank_count_product 
						FROM (
							SELECT -- в этой выборке делаем расчёт по большинству столбцов, так как все они требуют одной и той же группировки, кроме столбца с самой популярной категорией товаров у мастера. Для этого столбца сделаем отдельную выборку с другой группировкой и выполним join
								T1.craftsman_id AS craftsman_id,
								T1.craftsman_name AS craftsman_name,
								T1.craftsman_address AS craftsman_address,
								T1.craftsman_birthday AS craftsman_birthday,
								T1.craftsman_email AS craftsman_email,
								SUM(T1.product_price) - (SUM(T1.product_price) * 0.1) AS craftsman_money,
								SUM(T1.product_price) * 0.1 AS platform_money,
								COUNT(order_id) AS count_order,
								AVG(T1.product_price) AS avg_price_order,
								AVG(T1.customer_age) AS avg_age_customer,
								PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY diff_order_date) AS median_time_order_completed,
								SUM(CASE WHEN T1.order_status = 'created' THEN 1 ELSE 0 END) AS count_order_created, 
								SUM(CASE WHEN T1.order_status = 'in progress' THEN 1 ELSE 0 END) AS count_order_in_progress, 
								SUM(CASE WHEN T1.order_status = 'delivery' THEN 1 ELSE 0 END) AS count_order_delivery, 
								SUM(CASE WHEN T1.order_status = 'done' THEN 1 ELSE 0 END) AS count_order_done, 
								SUM(CASE WHEN T1.order_status != 'done' THEN 1 ELSE 0 END) AS count_order_not_done,
								T1.report_period AS report_period
								FROM (
									SELECT 	-- в этой выборке достаём из DWH обновлённые или новые данные по мастерам, которые уже присутствуют в витрине
											dc.craftsman_id AS craftsman_id,
											dc.craftsman_name AS craftsman_name,
											dc.craftsman_address AS craftsman_address,
											dc.craftsman_birthday AS craftsman_birthday,
											dc.craftsman_email AS craftsman_email,
											fo.order_id AS order_id,
											dp.product_id AS product_id,
											dp.product_price AS product_price,
											dp.product_type AS product_type,
											date_part('year', age(customer_birthday)) as customer_age,-- напишите код здесь AS customer_age,
											(order_completion_date - order_created_date) AS diff_order_date,-- напишите код здесь AS diff_order_date,
											fo.order_status AS order_status, 
											to_char(fo.order_created_date, 'yyyy-mm')  AS report_period-- напишите код здесь AS report_period
											FROM dwh.f_order fo 
												INNER JOIN dwh.d_craftsman dc ON fo.craftsman_id = dc.craftsman_id 
												INNER JOIN dwh.d_customer dcs ON fo.customer_id = dcs.customer_id 
												INNER JOIN dwh.d_product dp ON fo.product_id = dp.product_id
												INNER JOIN dwh_update_delta ud ON fo.craftsman_id = ud.craftsman_id
								) AS T1
									GROUP BY 1,2,3,4,5,17-- напишите код здесь
							) AS T2 
								INNER JOIN (
									SELECT 	-- эта выборка поможет определить самый популярный товар у мастера. Выборка не делается в предыдущем запросе, так как нужна другая группировка. Для данных этой выборки можно применить оконную функцию, которая и покажет самую популярную категорию товаров у мастера
											dd.craftsman_id as craftsman_id_for_product_type,-- напишите код здесь AS craftsman_id_for_product_type, 
											dd.product_type,-- напишите код здесь, 
											count(product_id) as count_product-- напишите код здесь AS count_product
											FROM dwh_delta AS dd
												GROUP BY dd.craftsman_id, dd.product_type
													ORDER BY count_product DESC) AS T3 ON T2.craftsman_id = T3.craftsman_id_for_product_type
				) AS T4 WHERE T4.rank_count_product = 1 ORDER BY report_period
),
insert_delta AS (
	-- Шаг 6

 select craftsman_id,craftsman_name,craftsman_address,craftsman_birthday,craftsman_email,craftsman_money,platform_money,count_order,avg_price_order,avg_age_customer,median_time_order_completed,top_product_category,count_order_created,count_order_in_progress,count_order_delivery,count_order_done,count_order_not_done,report_period
from dwh_delta_insert_result 

),
update_delta AS (
	-- Шаг 7
    select craftsman_id,
			craftsman_name,
			craftsman_address,
			craftsman_birthday,
			craftsman_email,
			craftsman_money,
			platform_money,
			count_order,
			avg_price_order,
			avg_age_customer,
			median_time_order_completed,
			top_product_category,
			count_order_created,
			count_order_in_progress,
			count_order_delivery,
			count_order_done,
			count_order_not_done,
			report_period 
    from (SELECT 
			craftsman_id,
			craftsman_name,
			craftsman_address,
			craftsman_birthday,
			craftsman_email,
			craftsman_money,
			platform_money,
			count_order,
			avg_price_order,
			avg_age_customer,
			median_time_order_completed,
			top_product_category,
			count_order_created,
			count_order_in_progress,
			count_order_delivery,
			count_order_done,
			count_order_not_done,
			report_period 
		FROM dwh_delta_update_result) updates
),
insert_load_date AS (
	-- Шаг 8
    select GREATEST(COALESCE(MAX(craftsman_load_dttm),max(customers_load_dttm),max(products_load_dttm),NOW()))
    FROM dwh_delta
)
SELECT 'increment datamart';
Editor is loading...
Leave a Comment