Untitled
unknown
plain_text
2 years ago
15 kB
27
Indexable
WITH
dwh_delta AS (
-- Шаг 2
SELECT
dc.craftsman_id,-- напишите код здесь AS craftsman_id,
dc.craftsman_name,-- напишите код здесь AS craftsman_name,
dc.craftsman_address,-- напишите код здесь AS craftsman_address,
dc.craftsman_birthday,-- напишите код здесь AS craftsman_birthday,
dc.craftsman_email,-- напишите код здесь AS craftsman_email,
fo.order_id,-- напишите код здесь AS order_id,
dp.product_id,-- напишите код здесь AS product_id,
dp.product_price,-- напишите код здесь AS product_price,
dp.product_type,-- напишите код здесь AS product_type,
date_part('year', age(customer_birthday)) as customer_age,-- напишите код здесь AS customer_age,
(fo.order_completion_date - fo.order_created_date) as diff_order_date,-- напишите код здесь AS diff_order_date,
fo.order_status,-- напишите код здесь AS order_status,
to_char(fo.order_created_date, 'yyyy-mm') as report_period,-- напишите код здесь AS report_period,
(case when crd.craftsman_id is null then null else 1 end) as exist_craftsman_id,-- напишите код здесь AS exist_craftsman_id,
dc.load_dttm as craftsman_load_dttm,-- напишите код здесь AS craftsman_load_dttm,
dcs.load_dttm as customers_load_dttm,-- напишите код здесь AS customers_load_dttm,
dp.load_dttm as products_load_dttm-- напишите код здесь AS products_load_dttm
FROM dwh.f_order fo
INNER JOIN dwh.d_craftsman dc on dc.craftsman_id = fo.craftsman_id -- напишите код здесь
INNER JOIN dwh.d_customer dcs ON dcs.customer_id = fo.customer_id-- напишите код здесь
INNER JOIN dwh.d_product dp ON dp.product_id = fo.product_id-- напишите код здесь
LEFT JOIN dwh.craftsman_report_datamart crd on crd.craftsman_id = fo.craftsman_id and fo.load_dttm > (SELECT COALESCE(MAX(load_dttm),'1900-01-01') FROM dwh.load_dates_craftsman_report_datamart) -- напишите код здесь
WHERE
dc.load_dttm > (SELECT COALESCE(MAX(load_dttm),'1900-01-01') FROM dwh.load_dates_craftsman_report_datamart) or-- напишите код здесь OR
dcs.load_dttm > (SELECT COALESCE(MAX(load_dttm),'1900-01-01') FROM dwh.load_dates_craftsman_report_datamart) or-- напишите код здесь OR
dp.load_dttm > (SELECT COALESCE(MAX(load_dttm),'1900-01-01') FROM dwh.load_dates_craftsman_report_datamart)-- напишите код здесь;
),
dwh_update_delta AS (
-- Шаг 3
SELECT craftsman_id
FROM dwh_delta dd
where exist_craftsman_id is null
),
dwh_delta_insert_result AS (
-- Шаг 4
SELECT
T4.craftsman_id AS craftsman_id,
T4.craftsman_name AS craftsman_name,
T4.craftsman_address AS craftsman_address,
T4.craftsman_birthday AS craftsman_birthday,
T4.craftsman_email AS craftsman_email,
T4.craftsman_money AS craftsman_money,
T4.platform_money AS platform_money,
T4.count_order AS count_order,
T4.avg_price_order AS avg_price_order,
T4.avg_age_customer AS avg_age_customer,
T4.product_type AS top_product_category,
T4.median_time_order_completed AS median_time_order_completed,
T4.count_order_created AS count_order_created,
T4.count_order_in_progress AS count_order_in_progress,
T4.count_order_delivery AS count_order_delivery,
T4.count_order_done AS count_order_done,
T4.count_order_not_done AS count_order_not_done,
T4.report_period AS report_period
FROM (
SELECT -- в этой выборке объединяем две внутренние выборки по расчёту столбцов витрины и применяем оконную функцию, чтобы определить самую популярную категорию товаров
*,
RANK() OVER(PARTITION BY T2.craftsman_id ORDER BY count_product DESC) AS rank_count_product
FROM (
SELECT -- в этой выборке делаем расчёт по большинству столбцов, так как все они требуют одной и той же группировки, кроме столбца с самой популярной категорией товаров у мастера. Для этого столбца сделаем отдельную выборку с другой группировкой и выполним join
T1.craftsman_id,-- напишите код здесь,
T1.craftsman_name,-- напишите код здесь,
T1.craftsman_address,-- напишите код здесь,
T1.craftsman_birthday,-- напишите код здесь,
T1.craftsman_email,-- напишите код здесь,
sum(product_price) * 0.9 as craftsman_money,-- напишите код здесь,
sum(product_price) * 0.1 as platform_money,-- напишите код здесь,
count(order_id) as count_order,-- напишите код здесь,
avg(product_price) as avg_price_order,-- напишите код здесь,
avg(customer_age) as avg_age_customer,-- напишите код здесь,
percentile_cont(0.5) WITHIN GROUP(ORDER BY diff_order_date) as median_time_order_completed,-- напишите код здесь,
sum(CASE WHEN order_status = 'created' THEN 1 ELSE 0 END) as count_order_created,-- напишите код здесь,
sum(CASE WHEN order_status = 'in progress' THEN 1 ELSE 0 END) as count_order_in_progress,-- напишите код здесь,
sum(CASE WHEN order_status = 'delivery' THEN 1 ELSE 0 END) as count_order_delivery,-- напишите код здесь,
sum(CASE WHEN order_status = 'done' THEN 1 ELSE 0 END) as count_order_done,-- напишите код здесь,
sum(CASE WHEN order_status != 'done' THEN 1 ELSE 0 END) as count_order_not_done,-- напишите код здесь,
T1.report_period-- напишите код здесь
FROM dwh_delta AS T1
WHERE T1.exist_craftsman_id is null
GROUP BY T1.craftsman_id, T1.craftsman_name, T1.craftsman_address, T1.craftsman_birthday, T1.craftsman_email, T1.report_period
) AS T2
INNER JOIN (
SELECT -- эта выборка поможет определить самый популярный товар у мастера. Это выборка не делается в предыдущем запросе, так как нужна другая группировка. Для данных этой выборки можно применить оконную функцию, которая и покажет самую популярную категорию товаров у мастера
count(product_id) as count_product,-- напишите код здесь,
dd.craftsman_id as craftsman_id_for_product_type,-- напишите код здесь,
dd.product_type-- напишите код здесь
FROM dwh_delta AS dd
GROUP BY dd.craftsman_id, dd.product_type
ORDER BY count_product DESC) AS T3 ON T2.craftsman_id = T3.craftsman_id_for_product_type
) AS T4 WHERE T4.rank_count_product = 1 ORDER BY report_period -- условие помогает оставить в выборке первую по популярности категорию товаров
),
dwh_delta_update_result AS (
-- Шаг 5
SELECT
T4.craftsman_id AS craftsman_id,
T4.craftsman_name AS craftsman_name,
T4.craftsman_address AS craftsman_address,
T4.craftsman_birthday AS craftsman_birthday,
T4.craftsman_email AS craftsman_email,
T4.craftsman_money AS craftsman_money,
T4.platform_money AS platform_money,
T4.count_order AS count_order,
T4.avg_price_order AS avg_price_order,
T4.avg_age_customer AS avg_age_customer,
T4.product_type AS top_product_category,
T4.median_time_order_completed AS median_time_order_completed,
T4.count_order_created AS count_order_created,
T4.count_order_in_progress AS count_order_in_progress,
T4.count_order_delivery AS count_order_delivery,
T4.count_order_done AS count_order_done,
T4.count_order_not_done AS count_order_not_done,
T4.report_period AS report_period
FROM (
SELECT -- в этой выборке объединяем две внутренние выборки по расчёту столбцов витрины и применяем оконную функцию, чтобы определить самую популярную категорию товаров
*,
RANK() OVER(PARTITION BY T2.craftsman_id ORDER BY count_product DESC) AS rank_count_product
FROM (
SELECT -- в этой выборке делаем расчёт по большинству столбцов, так как все они требуют одной и той же группировки, кроме столбца с самой популярной категорией товаров у мастера. Для этого столбца сделаем отдельную выборку с другой группировкой и выполним join
T1.craftsman_id AS craftsman_id,
T1.craftsman_name AS craftsman_name,
T1.craftsman_address AS craftsman_address,
T1.craftsman_birthday AS craftsman_birthday,
T1.craftsman_email AS craftsman_email,
SUM(T1.product_price) - (SUM(T1.product_price) * 0.1) AS craftsman_money,
SUM(T1.product_price) * 0.1 AS platform_money,
COUNT(order_id) AS count_order,
AVG(T1.product_price) AS avg_price_order,
AVG(T1.customer_age) AS avg_age_customer,
PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY diff_order_date) AS median_time_order_completed,
SUM(CASE WHEN T1.order_status = 'created' THEN 1 ELSE 0 END) AS count_order_created,
SUM(CASE WHEN T1.order_status = 'in progress' THEN 1 ELSE 0 END) AS count_order_in_progress,
SUM(CASE WHEN T1.order_status = 'delivery' THEN 1 ELSE 0 END) AS count_order_delivery,
SUM(CASE WHEN T1.order_status = 'done' THEN 1 ELSE 0 END) AS count_order_done,
SUM(CASE WHEN T1.order_status != 'done' THEN 1 ELSE 0 END) AS count_order_not_done,
T1.report_period AS report_period
FROM (
SELECT -- в этой выборке достаём из DWH обновлённые или новые данные по мастерам, которые уже присутствуют в витрине
dc.craftsman_id AS craftsman_id,
dc.craftsman_name AS craftsman_name,
dc.craftsman_address AS craftsman_address,
dc.craftsman_birthday AS craftsman_birthday,
dc.craftsman_email AS craftsman_email,
fo.order_id AS order_id,
dp.product_id AS product_id,
dp.product_price AS product_price,
dp.product_type AS product_type,
date_part('year', age(customer_birthday)) as customer_age,-- напишите код здесь AS customer_age,
(order_completion_date - order_created_date) AS diff_order_date,-- напишите код здесь AS diff_order_date,
fo.order_status AS order_status,
to_char(fo.order_created_date, 'yyyy-mm') AS report_period-- напишите код здесь AS report_period
FROM dwh.f_order fo
INNER JOIN dwh.d_craftsman dc ON fo.craftsman_id = dc.craftsman_id
INNER JOIN dwh.d_customer dcs ON fo.customer_id = dcs.customer_id
INNER JOIN dwh.d_product dp ON fo.product_id = dp.product_id
INNER JOIN dwh_update_delta ud ON fo.craftsman_id = ud.craftsman_id
) AS T1
GROUP BY 1,2,3,4,5,17-- напишите код здесь
) AS T2
INNER JOIN (
SELECT -- эта выборка поможет определить самый популярный товар у мастера. Выборка не делается в предыдущем запросе, так как нужна другая группировка. Для данных этой выборки можно применить оконную функцию, которая и покажет самую популярную категорию товаров у мастера
dd.craftsman_id as craftsman_id_for_product_type,-- напишите код здесь AS craftsman_id_for_product_type,
dd.product_type,-- напишите код здесь,
count(product_id) as count_product-- напишите код здесь AS count_product
FROM dwh_delta AS dd
GROUP BY dd.craftsman_id, dd.product_type
ORDER BY count_product DESC) AS T3 ON T2.craftsman_id = T3.craftsman_id_for_product_type
) AS T4 WHERE T4.rank_count_product = 1 ORDER BY report_period
),
insert_delta AS (
-- Шаг 6
select craftsman_id,craftsman_name,craftsman_address,craftsman_birthday,craftsman_email,craftsman_money,platform_money,count_order,avg_price_order,avg_age_customer,median_time_order_completed,top_product_category,count_order_created,count_order_in_progress,count_order_delivery,count_order_done,count_order_not_done,report_period
from dwh_delta_insert_result
),
update_delta AS (
-- Шаг 7
select craftsman_id,
craftsman_name,
craftsman_address,
craftsman_birthday,
craftsman_email,
craftsman_money,
platform_money,
count_order,
avg_price_order,
avg_age_customer,
median_time_order_completed,
top_product_category,
count_order_created,
count_order_in_progress,
count_order_delivery,
count_order_done,
count_order_not_done,
report_period
from (SELECT
craftsman_id,
craftsman_name,
craftsman_address,
craftsman_birthday,
craftsman_email,
craftsman_money,
platform_money,
count_order,
avg_price_order,
avg_age_customer,
median_time_order_completed,
top_product_category,
count_order_created,
count_order_in_progress,
count_order_delivery,
count_order_done,
count_order_not_done,
report_period
FROM dwh_delta_update_result) updates
),
insert_load_date AS (
-- Шаг 8
select GREATEST(COALESCE(MAX(craftsman_load_dttm),max(customers_load_dttm),max(products_load_dttm),NOW()))
FROM dwh_delta
)
SELECT 'increment datamart';Editor is loading...
Leave a Comment