Untitled

mail@pastecode.io avatar
unknown
plain_text
2 years ago
10 kB
2
Indexable
Never
===================================== RENDER ADS  =============================================
--render_ad_cost_20230111_p1
set vogon.output.format=parquet;
set vogon.output.save.option.compression=gzip;
set vogon.output.save.mode=overwrite;
set vogon.output.location=/tmp/mydata/render_ad_cost_20230111_p11;
SET vogon.output.save.token=424ecebc-ea36-4a62-9053-5663b39763bb;
Select case when instr(requrl, '?') > 0 then SUBSTRING(requrl, 1, instr(requrl, '?')-1)
when instr(requrl, '#') > 0 then SUBSTRING(requrl, 1, instr(requrl, '#')-1)
else requrl end cleaned_url,pvid,count(1) RA_Log,sum(cbdp/1000) Cost
from cm.prebid_rendered_ad A inner join  cm.customer_master B on A.prvapiid=B.customer_id
where ts Between '2022120100'  and '2022120723' and 
B.partner_id = '8PRVCXX19' and pvid in (4,294,313,351)
group by 1,2



-- render_ad_cost_20221220_p2
set vogon.output.format=parquet;
set vogon.output.save.option.compression=gzip;
set vogon.output.save.mode=overwrite;
set vogon.output.location=/tmp/mydata/render_ad_cost_20221220_p22;
SET vogon.output.save.token=424ecebc-ea36-4a62-9053-5663b39763bb;
Select case when instr(requrl, '?') > 0 then SUBSTRING(requrl, 1, instr(requrl, '?')-1)
when instr(requrl, '#') > 0 then SUBSTRING(requrl, 1, instr(requrl, '#')-1)
else requrl end cleaned_url,pvid,count(1) RA_Log,sum(cbdp/1000) Cost
from cm.prebid_rendered_ad A inner join  cm.customer_master B on A.prvapiid=B.customer_id
where ts Between '2022120800'  and '2022121423' and B.partner_id = '8PRVCXX19' 
and pvid in (4,294,313,351)
group by 1,2


-- to run
-- render_ad_cost_20221220_union_v1
set vogon.output.format=parquet;
set vogon.output.save.option.compression=gzip;
set vogon.output.save.mode=overwrite;
set vogon.output.location=/tmp/mydata/render_ad_cost_20221220_union_v1;
SET vogon.output.save.token=424ecebc-ea36-4a62-9053-5663b39763bb;
SELECT cleaned_url, pvid,RA_Log, Cost
FROM `parquet`.`/tmp/mydata/render_ad_cost_20230111_p11`
UNION ALL
SELECT cleaned_url, pvid,RA_Log, Cost
FROM `parquet`.`/tmp/mydata/render_ad_cost_20221220_p22`



set vogon.output.format=parquet;
set vogon.output.save.option.compression=gzip;
set vogon.output.save.mode=overwrite;
set vogon.output.location=/tmp/mydata/render_ad_cost_20221220_union_v1_group;
SET vogon.output.save.token=424ecebc-ea36-4a62-9053-5663b39763bb;
SELECT cleaned_url, pvid,sum(RA_Log) RA_Log, sum(Cost) Cost
FROM `parquet`.`/tmp/mydata/render_ad_cost_20221220_union_v1`
group by 1,2

=====================================  KWD IMP =============================================


-- kwd imp

-- kwd dump part 1
-- kwd_dump_14days_urls_20221220
set vogon.output.format=parquet;
set vogon.output.save.option.compression=gzip;
set vogon.output.save.mode=overwrite;
set vogon.output.location=/tmp/mydata/kwd_dump_14days_urls_20221220_p1;
SET vogon.output.save.token=424ecebc-ea36-4a62-9053-5663b39763bb;
SELECT case when instr(publisher_url, '?') > 0 then SUBSTRING(publisher_url, 1, instr(publisher_url, '?')-1)
when instr(publisher_url, '#') > 0 then SUBSTRING(publisher_url, 1, instr(publisher_url, '#')-1)
else publisher_url end cleaned_url,bidder_id, kwt, COUNT(1) as kwd_imp
FROM keyword_dump
where ts Between '2022120100'  and '2022120723' and partner_id = '8PRVCXX19' 
and bidder_id in (4,294,313,351)
GROUP BY 1,2,3




-- kwd dump part 2
set vogon.output.format=parquet;
set vogon.output.save.option.compression=gzip;
set vogon.output.save.mode=overwrite;
set vogon.output.location=/tmp/mydata/kwd_dump_14days_urls_20221220_p2;
SET vogon.output.save.token=424ecebc-ea36-4a62-9053-5663b39763bb;
SELECT case when instr(publisher_url, '?') > 0 then SUBSTRING(publisher_url, 1, instr(publisher_url, '?')-1)
when instr(publisher_url, '#') > 0 then SUBSTRING(publisher_url, 1, instr(publisher_url, '#')-1)
else publisher_url end cleaned_url,bidder_id, kwt, COUNT(1) as kwd_imp
FROM keyword_dump
where ts Between  '2022120800'  and '2022121423' and partner_id = '8PRVCXX19' 
and bidder_id in (4,294,313,351)
GROUP BY 1,2,3


-- kwd_dump_7days_urls_union
set vogon.output.format=parquet;
set vogon.output.save.option.compression=gzip;
set vogon.output.save.mode=overwrite;
set vogon.output.location=/tmp/mydata/kwd_dump_7days_urls_union;
SET vogon.output.save.token=424ecebc-ea36-4a62-9053-5663b39763bb;
SELECT cleaned_url, bidder_id, kwd_imp
FROM `parquet`.`/tmp/mydata/kwd_dump_14days_urls_20221220_p1`
UNION ALL
SELECT cleaned_url, bidder_id, kwd_imp
FROM `parquet`.`/tmp/mydata/kwd_dump_14days_urls_20221220_p2`



-- kwd_dump_14days_urls_final_v1
set vogon.output.format=parquet;
set vogon.output.save.option.compression=gzip;
set vogon.output.save.mode=overwrite;
set vogon.output.location=/tmp/mydata/kwd_dump_14days_urls_final_v1;
SET vogon.output.save.token=424ecebc-ea36-4a62-9053-5663b39763bb;
SELECT cleaned_url, bidder_id, SUM(kwd_imp) as kwd_imp
FROM `parquet`.`/tmp/mydata/kwd_dump_7days_urls_union`
GROUP BY 1,2

-- ===================================== KWD CLICK =============================================

===================================== ADCLICK  =============================================
-- adclick_14days_urls_20221220_v2
set vogon.output.format=parquet;
set vogon.output.save.option.compression=gzip;
set vogon.output.save.mode=overwrite;
set vogon.output.location=/tmp/mydata/adclick_14days_urls_20221220_v2;
SET vogon.output.save.token=424ecebc-ea36-4a62-9053-5663b39763bb;
SELECT case when instr(publisher_url, '?') > 0 then SUBSTRING(publisher_url, 1, instr(publisher_url, '?')-1)
when instr(publisher_url, '#') > 0 then SUBSTRING(publisher_url, 1, instr(publisher_url, '#')-1)
else publisher_url end cleaned_url,bidder_id,  COUNT(*) as ad_click, SUM(net_total_revenue) as revenue
FROM adclick
where ts Between '2022120100'  and '2022121423' and audit_spam_flag=false and 
partner_id = '8PRVCXX19' 
and bidder_id in (4,294,313,351)
GROUP BY 1,2

===================================== METRICS JOIN ===================

-- url_metrics_cost_union_20221221_v1
set vogon.output.format=parquet;
set vogon.output.save.option.compression=gzip;
set vogon.output.save.mode=overwrite;
set vogon.output.location=/tmp/mydata/url_metrics_cost_union_20221221_v1;
SET vogon.output.save.token=424ecebc-ea36-4a62-9053-5663b39763bb;
SELECT cleaned_url, 0  kwd_imp,0 kwd_click,0 ad_click,0 revenue, ra_log, cost
FROM `PARQUET`.`/tmp/mydata/render_ad_cost_20221220_union_v1_group`
UNION ALL
SELECT cleaned_url, kwd_imp,0 kwd_click,0 ad_click,0 revenue, 0 ra_log, 0 cost
FROM `PARQUET`.`/tmp/mydata/kwd_dump_14days_urls_final_v1`
UNION ALL
SELECT cleaned_url, 0 kwd_imp,0 kwd_click, ad_click, revenue, 0 ra_log, 0 cost
FROM `PARQUET`.`/tmp/mydata/adclick_14days_urls_20221220_v2`




-- final metrics
-- url_metrics_cost_20221221_final_v1
set vogon.output.format=parquet;
set vogon.output.save.option.compression=gzip;
set vogon.output.save.mode=overwrite;
set vogon.output.location=/tmp/mydata/url_metrics_cost_20221221_final_v1;
SET vogon.output.save.token=424ecebc-ea36-4a62-9053-5663b39763bb;
SELECT cleaned_url, SUM(kwd_imp) as kwd_imp,SUM(kwd_click) as kwd_click,SUM(ad_click) as ad_click,SUM(revenue) as revenue,
SUM(ra_log) as ra_log,SUM(cost) as cost
FROM `parquet`.`/tmp/mydata/url_metrics_cost_union_20221221_v1`
GROUP BY 1



set vogon.output.format=parquet;
set vogon.output.save.option.compression=gzip;
set vogon.output.save.mode=overwrite;
set vogon.output.location=/tmp/mydata/url_metrics_cost_20221221_finalize;
SET vogon.output.save.token=424ecebc-ea36-4a62-9053-5663b39763bb;
select a.cleaned_url, SUM(a.kwd_imp) as kwd_imp,SUM(a.ad_click) as ad_click,SUM(a.revenue) as revenue,
SUM(a.ra_log) as ra_log,SUM(a.cost) as cost
from `parquet`.`/tmp/mydata/url_metrics_cost_20221221_final_v1` a 
INNER JOIN `parquet`.`/tmp/mydata/kwd_dump_14days_urls_20221220_p1` as b
ON a.cleaned_url = b.cleaned_url
group by 1


===================================== IS_CRAWLED =============================================
-- crawl_filter_dump_cleaned_urls_20221222_v1
set vogon.output.format=parquet;
set vogon.output.save.option.compression=gzip;
set vogon.output.save.mode=overwrite;
set vogon.output.location=/tmp/mydata/crawl_filter_dump_cleaned_urls_20221222_v1;
SET vogon.output.save.token=424ecebc-ea36-4a62-9053-5663b39763bb;
select DISTINCT case when instr(url, '?') > 0 then SUBSTRING(url, 1, instr(url, '?')-1)
when instr(url, '#') > 0 then SUBSTRING(url, 1, instr(url, '#')-1)
else url end cleaned_url from cm.crawl_url_filter_dump
where ts >= '2022110100'


-- url_kwt_metrics_20221221_crawled_status
set vogon.output.format=parquet;
set vogon.output.save.option.compression=gzip;
set vogon.output.save.mode=overwrite;
set vogon.output.location=/tmp/mydata/url_kwt_metrics_20221221_crawled_status;
SET vogon.output.save.token=424ecebc-ea36-4a62-9053-5663b39763bb;
SELECT a.*, case when a.cleaned_url IS null then 0 else 1 end as is_crawled
FROM `parquet`.`/tmp/mydata/url_metrics_cost_20221221_finalize` as a
LEFT JOIN `parquet`.`/tmp/mydata/crawl_filter_dump_cleaned_urls_20221222_v1` as b
ON a.cleaned_url = b.cleaned_url



-- url_metrics_Cost_20221221_crawled_status_domain
set vogon.output.format=parquet;
set vogon.output.save.option.compression=gzip;
set vogon.output.save.mode=overwrite;
set vogon.output.location=/tmp/mydata/url_metrics_Cost_20221221_crawled_status_domain;
SET vogon.output.save.token=424ecebc-ea36-4a62-9053-5663b39763bb;
SELECT *, case when INSTR(cleaned_url,'https://')>0 then REPLACE(Split(cleaned_url,'/')[2],'www.','')
when INSTR(cleaned_url,'http://')>0 then REPLACE(Split(cleaned_url,'/')[2],'www.','')
else REPLACE(Split(cleaned_url,'/')[0],'www.','') end domain
FROM `parquet`.`/tmp/mydata/url_kwt_metrics_20221221_crawled_status`

===================================== BRANDSAFETY =============================================