Assignment Query

mail@pastecode.io avatar
unknown
pgsql
2 years ago
2.5 kB
2
Indexable
Never
--Query goal
/*Your goal is to write a PostgreSQL query 
that calculates weekly average backlog 
and returns result looking like this (not these numbers, just the structure):
week, backlog 2016-02-01, 5.5 2016-02-08, 9.2

Example: if a customer reached stage 1 on 2017-01-01 and stage 2 on 2017-01-08
, he appears in daily backlog on 
2017-01-02,2017-01-03,2017-01-04,2017-01-05,2017-01-06 and 2017-01-07

Important Assumption: It's mentioned in the assignment that for some 
*/

--Step 1
-- We need to generate a list of consecutive dates to calculate 
-- the backlog per day
with date_range as
(
select
	generate_series(min(state_1)
				, max(state_2)
                , interval '1 day')::date as backlog_date
from
	data),
--Calculate ids which should be included in the backlog and appropriate flags

prep_backlog as 
(
select
	data.*,
	--  If customer reaches stage #2 on the same day or the next day he never appears in the backlog
	--  All other cases should be in the backlog set at least once
case when  
 state_2::date-state_1::date <= 1 then 0 else 1 end as is_backlog,
	--  If customer doesn't reach stage #2 at all he's in the backlog until day 14 (after reaching stage 1), then we forget about this customer
	--  If customer reaches stage #2 later than 14 days after stage #1 he's appearing in backlog only until day 14
	--  Because in the final query we want day 14 to be inclusive I've added 15 day interval  
case when state_2 is null or 
	(state_2 is not null and state_2::date-state_1::date>14) 
	then 
	(state_1 + interval '15' day )::date
	else state_2::date
	end
	as state_2_date,
	state_1::date as state_1_date
from
	data
),

backlog as 
(
select
	backlog_date,
	prep_backlog.*
from
	prep_backlog
cross join date_range
where
	-- filter out only the customers to be included in backlog
	is_backlog = 1
	-- As per requirements when calculating the backlog we don't include 
	-- the dates where the transitions happened.
	-- For the specific circumstances mentioned state_2_date is calculated to be 
	-- 15 days after state_1 so that the filter captures up to the 14th date
	and backlog_date>state_1_date
	and backlog_date<state_2_date)
		

--Calculate the weekly average after calcuating the daily backlog counts
select
	date_trunc('week', backlog_date)::date as backlog_week_number,
	round(avg(customers), 1) as weekly_avg_backlog
from
	(
	select
		backlog_date,
		count( id) as customers
	from
		backlog
	group by
		1
) days
group by
	1
order by
	1