скрипт

 avatar
unknown
sql
a year ago
25 kB
12
Indexable
%%sql

with events as (
    select
        user_id,
        app_id,
        nullif(get_json_object(mobile_context, '$.androidIdfa'), '') as gaid,
        nullif(get_json_object(mobile_context, '$.appleIdfv'), '') as idfv,
        nullif(
            nullif(get_json_object(event_context, '$.guestia_id'), ''), 'null'
        ) as guestia_id,
        nginx_auid as auid,
        lower(coalesce(
            nullif(get_json_object(event_context, '$.context.market'), ''),
            nullif(get_json_object(event_context, '$.market'), ''),
            nullif(get_json_object(user_context, '$.market'), '')
        )) as market,
        device_created_ts as dts,
        created_at as ts,
        upper(
            if(region_iso_code in ('UA-40', 'UA-43'), 'RU', country_code)
        ) as ip_country_iata,
        upper(region_iso_code) as ip_region_iata,
        upper(city_iata) as ip_city_iata,
        city_name as ip_city_name,
        user_timezone,
        coalesce(
            nullif(get_json_object(event_context, '$.context.lang'), ''),
            nullif(get_json_object(user_context, '$.Language'), ''),
            user_lang
        ) as lang,
        lower(coalesce(
            nullif(get_json_object(event_context, '$.context.currency'), ''),
            nullif(get_json_object(event_context, '$.currency'), '')
        )) as currency,
        screen_resolution as screen_res,
        coalesce(
            device_brand,
            nullif(get_json_object(mobile_context, '$.deviceManufacturer'), '')
        ) as device_brand,
        nullif(
            get_json_object(mobile_context, '$.deviceModel'), ''
        ) as device_model,
        nullif(
            get_json_object(mobile_context, '$.osVersion'), ''
        ) as os_version,
        nullif(
            get_json_object(mobile_context, '$.networkType'), ''
        ) as network_type,
        nullif(
            nullif(
                nullif(get_json_object(mobile_context, '$.carrier'), ''), '--'
            ),
            '--, --'
        ) as carrier,
        isp,
        decode(
            nullif(get_json_object(event_context, '$.geo_info.VPN'), ''),
            'true', true,
            'false', false,
            null
        ) as vpn,
        nullif(
            get_json_object(mobile_version_context, '$.version'), ''
        ) as app_version,
        nullif(
            get_json_object(event_context, '$.geo_info.CountryFromAppstore'), ''
        ) as appstore_country,
        nullif(
            nullif(
                get_json_object(event_context, '$.geo_info.CountryFromMainSim'),
                ''
            ),
            '--'
        ) as mainsim_country,
        nullif(
            nullif(
                get_json_object(
                    event_context, '$.geo_info.CountryFromSecondSim'
                ),
                ''
            ),
            '--'
        ) as secondsim_country,
        nullif(marker, '\\\\N') as marker,
        nullif(
            if(
                event_type = 'general--deeplink--opened',
                get_json_object(event_context, '$.url'),
                null
            ),
            ''
        ) as deeplink,
        'sp' as event_type,
        'AS' as brand,
        case when event_type = 'general--app--launched' and get_json_object(event_context, '$.mode') = 'background' then 1 else 0 end as flag_back,
        pdate
    from sp.events
    where
        pdate = date('2024-07-16')
        and app_id in (
            'ios.ru.aviasales.app',
            'android.ru.aviasales',
            'ios.com.jetradar.app',
            'android.com.jetradar',
            'ios.com.jetradar.arabic'
        )
        and event_type not in (
            'flags--fetch',
            'flagr--fetch--failed',
            'flagr--flags--updated',
            'flagr--fetch--succeeded',
            'direction--feed_block--showed',
            'direction--trap_content--showed',
            'direction--explore_services--showed',
            'hotels--offer--showed',
            'mobile_app--Hotel Impression',
            'ticket_offers--block--showed',
            'search--first_tickets--arrived',
            'search--search--id_assigned',
            'schedule_listing--ticket--showed',
            'ticket--drawer_proposal--showed',
            'ticket--blet--responded',
            'subscription--entry_point--showed'
        )
        and nullif(user_id, '') is not null

    union all

    select
        user_id,
        app_id,
        nullif(get_json_object(mobile_context, '$.androidIdfa'), '') as gaid,
        nullif(get_json_object(mobile_context, '$.appleIdfv'), '') as idfv,
        nullif(
            nullif(get_json_object(event_context, '$.guestia_id'), ''), 'null'
        ) as guestia_id,
        nginx_auid as auid,
        lower(coalesce(
            nullif(get_json_object(event_context, '$.context.market'), ''),
            nullif(get_json_object(event_context, '$.market'), ''),
            nullif(get_json_object(user_context, '$.market'), '')
        )) as market,
        device_created_ts as dts,
        created_at as ts,
        upper(
            if(region_iso_code in ('UA-40', 'UA-43'), 'RU', country_code)
        ) as ip_country_iata,
        upper(region_iso_code) as ip_region_iata,
        upper(city_iata) as ip_city_iata,
        city_name as ip_city_name,
        user_timezone,
        coalesce(
            nullif(get_json_object(event_context, '$.context.lang'), ''),
            nullif(get_json_object(user_context, '$.Language'), ''),
            user_lang
        ) as lang,
        lower(coalesce(
            nullif(get_json_object(event_context, '$.context.currency'), ''),
            nullif(get_json_object(event_context, '$.currency'), '')
        )) as currency,
        screen_resolution as screen_res,
        coalesce(
            device_brand,
            nullif(get_json_object(mobile_context, '$.deviceManufacturer'), '')
        ) as device_brand,
        nullif(
            get_json_object(mobile_context, '$.deviceModel'), ''
        ) as device_model,
        nullif(
            get_json_object(mobile_context, '$.osVersion'), ''
        ) as os_version,
        nullif(
            get_json_object(mobile_context, '$.networkType'), ''
        ) as network_type,
        nullif(
            nullif(
                nullif(get_json_object(mobile_context, '$.carrier'), ''), '--'
            ),
            '--, --'
        ) as carrier,
        isp,
        decode(
            nullif(get_json_object(event_context, '$.geo_info.VPN'), ''),
            'true', true,
            'false', false,
            null
        ) as vpn,
        nullif(
            get_json_object(mobile_version_context, '$.version'), ''
        ) as app_version,
        nullif(
            get_json_object(event_context, '$.geo_info.CountryFromAppstore'), ''
        ) as appstore_country,
        nullif(
            nullif(
                get_json_object(event_context, '$.geo_info.CountryFromMainSim'),
                ''
            ),
            '--'
        ) as mainsim_country,
        nullif(
            nullif(
                get_json_object(
                    event_context, '$.geo_info.CountryFromSecondSim'
                ),
                ''
            ),
            '--'
        ) as secondsim_country,
        nullif(marker, '\\\\N') as marker,
        nullif(
            if(
                event_type = 'general--deeplink--opened',
                get_json_object(event_context, '$.url'),
                null
            ),
            ''
        ) as deeplink,
        'sp' as event_type,
        'WA' as brand,
       case when event_type = 'general--app--launched' and get_json_object(event_context, '$.mode') = 'background' then 1 else 0 end as flag_back,
       pdate
    from sp.events_wa
    where
        pdate = date('2024-07-16')
        and app_id in (
            'ios.com.jetradar.app',
            'android.com.jetradar'
        )
        and event_type not in (
            'flags--fetch',
            'flagr--fetch--failed',
            'flagr--flags--updated',
            'flagr--fetch--succeeded',
            'direction--feed_block--showed',
            'direction--trap_content--showed',
            'direction--explore_services--showed',
            'hotels--offer--showed',
            'mobile_app--Hotel Impression',
            'ticket_offers--block--showed',
            'search--first_tickets--arrived',
            'search--search--id_assigned',
            'schedule_listing--ticket--showed',
            'ticket--drawer_proposal--showed',
            'ticket--blet--responded',
            'subscription--entry_point--showed'
        )
        and nullif(user_id, '') is not null

    union all

    select
        mobile_token as user_id,
        case
            when
                referrer_host regexp '(iphone|ipad)'
                and referrer_host like '%aviasales%'
                then 'ios.ru.aviasales.app'
            when
                referrer_host like '%android%'
                and referrer_host like '%aviasales%'
                then 'android.ru.aviasales'
            when
                referrer_host regexp '(iphone|ipad)'
                and referrer_host like '%arabic%'
                then 'ios.com.jetradar.arabic'
            when
                referrer_host regexp '(iphone|ipad)'
                and referrer_host regexp '(jetradar|wayaway)'
                then 'ios.com.jetradar.app'
            when
                referrer_host like '%android%'
                and referrer_host regexp '(jetradar|wayaway)'
                then 'android.com.jetradar'
        end as app_id,
        cast(null as string) as gaid,
        cast(null as string) as idfv,
        user_id as guestia_id,
        auid,
        market,
        '3000-01-01' as dts,
        created_at as ts,
        upper(
            if(region_iso_code in ('UA-40', 'UA-43'), 'RU', country_code)
        ) as ip_country_iata,
        upper(region_iso_code) as ip_region_iata,
        upper(city_iata) as ip_city_iata,
        city_name as ip_city_name,
        cast(null as string) as user_timezone,
        locale as lang,
        lower(currency) as currency,
        cast(null as string) as screen_res,
        device_brand,
        nullif(
            get_json_object(mobile_meta, '$.device_model'), ''
        ) as device_model,
        nullif(get_json_object(mobile_meta, '$.os_version'), '') as os_version,
        nullif(get_json_object(mobile_meta, '$.network'), '') as network_type,
        nullif(
            nullif(
                nullif(get_json_object(mobile_meta, '$.carrier_name'), ''), '--'
            ),
            '--, --'
        ) as carrier,
        isp,
        cast(null as boolean) as vpn,
        nullif(get_json_object(mobile_meta, '$.version'), '') as app_version,
        cast(null as string) as appstore_country,
        cast(null as string) as mainsim_country,
        cast(null as string) as secondsim_country,
        nullif(marker, '\\\\N') as marker,
        cast(null as string) as deeplink,
        'search' as event_type,
        coalesce(brand, 'AS') as brand,
        0 as flag_back,
        pdate
    from avia.searches
    where
        pdate = date('2024-07-16')
        and not marked_as_bot
        and if(
            pdate >= '2023-06-01',
            user_platform in ('ios', 'android'),
            referrer_host regexp '(android|iphone|ipad)'
            and referrer_host regexp '(aviasales|jetradar|wayaway)'
        )
        and nullif(mobile_token, '') is not null

    union all

    select
        mobile_token as user_id,
        case
            when
                coalesce(
                    search_referrer_host, referrer_host
                ) regexp '(iphone|ipad)'
                and coalesce(
                    search_referrer_host, referrer_host
                ) like '%aviasales%'
                then 'ios.ru.aviasales.app'
            when
                coalesce(search_referrer_host, referrer_host) like '%android%'
                and coalesce(
                    search_referrer_host, referrer_host
                ) like '%aviasales%'
                then 'android.ru.aviasales'
            when
                coalesce(
                    search_referrer_host, referrer_host
                ) regexp '(iphone|ipad)'
                and coalesce(
                    search_referrer_host, referrer_host
                ) like '%arabic%'
                then 'ios.com.jetradar.arabic'
            when
                coalesce(
                    search_referrer_host, referrer_host
                ) regexp '(iphone|ipad)'
                and coalesce(
                    search_referrer_host, referrer_host
                ) regexp '(jetradar|wayaway)'
                then 'ios.com.jetradar.app'
            when
                coalesce(search_referrer_host, referrer_host) like '%android%'
                and coalesce(
                    search_referrer_host, referrer_host
                ) regexp '(jetradar|wayaway)'
                then 'android.com.jetradar'
        end as app_id,
        cast(null as string) as gaid,
        cast(null as string) as idfv,
        search_user_id as guestia_id,
        coalesce(search_auid, auid) as auid,
        market,
        '3000-01-01' as dts,
        created_at as ts,
        upper(
            if(region_iso_code in ('UA-40', 'UA-43'), 'RU', country_code)
        ) as ip_country_iata,
        upper(region_iso_code) as ip_region_iata,
        upper(city_iata) as ip_city_iata,
        city_name as ip_city_name,
        cast(null as string) as user_timezone,
        locale as lang,
        lower(currency) as currency,
        cast(null as string) as screen_res,
        device_brand,
        nullif(
            get_json_object(mobile_meta, '$.device_model'), ''
        ) as device_model,
        nullif(get_json_object(mobile_meta, '$.os_version'), '') as os_version,
        nullif(get_json_object(mobile_meta, '$.network'), '') as network_type,
        nullif(
            nullif(
                nullif(get_json_object(mobile_meta, '$.carrier_name'), ''), '--'
            ),
            '--, --'
        ) as carrier,
        cast(null as string) as isp,
        cast(null as boolean) as vpn,
        nullif(get_json_object(mobile_meta, '$.version'), '') as app_version,
        cast(null as string) as appstore_country,
        cast(null as string) as mainsim_country,
        cast(null as string) as secondsim_country,
        nullif(marker, '\\\\N') as marker,
        cast(null as string) as deeplink,
        'click' as event_type,
        coalesce(brand, 'AS') as brand,
        0 as flag_back,
        pdate
    from avia.clicks
    where
        pdate = date('2024-07-16')
        and if(
            pdate >= '2023-06-01',
            user_platform in ('ios', 'android'),
            coalesce(
                search_referrer_host, referrer_host
            ) regexp '(android|iphone|ipad)'
            and coalesce(
                search_referrer_host, referrer_host
            ) regexp '(aviasales|jetradar|wayaway)'
        )
        and nullif(mobile_token, '') is not null
),

sessions_raw as (
    select
        *,
        min(ts) over (
            partition by brand, user_id, session_n
            order by ts
            rows between unbounded preceding and unbounded following
        ) as start_ts,
        max(ts) over (
            partition by brand, user_id, session_n
            order by ts
            rows between unbounded preceding and unbounded following
        ) as end_ts,
        min(if(dts = '3000-01-01', null, dts)) over (
            partition by brand, user_id, session_n
            order by dts
            rows between unbounded preceding and unbounded following
        ) as start_dts,
        max(if(dts = '3000-01-01', null, dts)) over (
            partition by brand, user_id, session_n
            order by dts
            rows between unbounded preceding and unbounded following
        ) as end_dts
    from
        (
            select
                *,
                sum(
                    if(
                        unix_timestamp(ts)
                        - coalesce(unix_timestamp(prev_ts), 0)
                        > 30 * 60,
                        1,
                        0
                    )
                )
                    over (partition by brand, user_id order by ts)
                as session_n
            from
                (
                    select
                        *,
                        lag(ts)
                            over (partition by brand, user_id order by ts)
                        as prev_ts
                    from events
                )
        )
),

sessions as (
    select
        user_id,
        app_id,
        start_ts,
        end_ts,
        start_dts,
        end_dts,
        brand,
        pdate,
        concat_ws('_', user_id, cast(start_ts as string)) as session_id,
        first_value(market, true) over (
            partition by brand, user_id, session_n
            order by dts, ts
            rows between unbounded preceding and unbounded following
        ) as market,
        unix_timestamp(end_ts) - unix_timestamp(start_ts) as ses_length_sec,
        count(if(event_type = 'search', 1, null)) over (
            partition by brand, user_id, session_n
        ) as searches,
        count(if(event_type = 'click', 1, null)) over (
            partition by brand, user_id, session_n
        ) as clicks,
        first_value(gaid, true) over (
            partition by brand, user_id, session_n
            order by dts, ts
            rows between unbounded preceding and unbounded following
        ) as gaid,
        first_value(idfv, true) over (
            partition by brand, user_id, session_n
            order by dts, ts
            rows between unbounded preceding and unbounded following
        ) as idfv,
        first_value(guestia_id, true) over (
            partition by brand, user_id, session_n
            order by dts, ts
            rows between unbounded preceding and unbounded following
        ) as guestia_id,
        first_value(auid, true) over (
            partition by brand, user_id, session_n
            order by dts, ts
            rows between unbounded preceding and unbounded following
        ) as auid,
        first_value(ip_country_iata, true) over (
            partition by brand, user_id, session_n
            order by dts, ts
            rows between unbounded preceding and unbounded following
        ) as ip_country_iata,
        first_value(ip_region_iata, true) over (
            partition by brand, user_id, session_n
            order by dts, ts
            rows between unbounded preceding and unbounded following
        ) as ip_region_iata,
        first_value(ip_city_iata, true) over (
            partition by brand, user_id, session_n
            order by dts, ts
            rows between unbounded preceding and unbounded following
        ) as ip_city_iata,
        first_value(ip_city_name, true) over (
            partition by brand, user_id, session_n
            order by dts, ts
            rows between unbounded preceding and unbounded following
        ) as ip_city_name,
        first_value(user_timezone, true) over (
            partition by brand, user_id, session_n
            order by dts, ts
            rows between unbounded preceding and unbounded following
        ) as user_timezone,
        first_value(lang, true) over (
            partition by brand, user_id, session_n
            order by dts, ts
            rows between unbounded preceding and unbounded following
        ) as lang,
        first_value(currency, true) over (
            partition by brand, user_id, session_n
            order by dts, ts
            rows between unbounded preceding and unbounded following
        ) as currency,
        first_value(screen_res, true) over (
            partition by brand, user_id, session_n
            order by dts, ts
            rows between unbounded preceding and unbounded following
        ) as screen_res,
        first_value(device_brand, true) over (
            partition by brand, user_id, session_n
            order by dts, ts
            rows between unbounded preceding and unbounded following
        ) as device_brand,
        first_value(device_model, true) over (
            partition by brand, user_id, session_n
            order by dts, ts
            rows between unbounded preceding and unbounded following
        ) as device_model,
        first_value(os_version, true) over (
            partition by brand, user_id, session_n
            order by dts, ts
            rows between unbounded preceding and unbounded following
        ) as os_version,
        first_value(network_type, true) over (
            partition by brand, user_id, session_n
            order by dts, ts
            rows between unbounded preceding and unbounded following
        ) as network_type,
        first_value(carrier, true) over (
            partition by brand, user_id, session_n
            order by dts, ts
            rows between unbounded preceding and unbounded following
        ) as carrier,
        first_value(isp, true) over (
            partition by brand, user_id, session_n
            order by dts, ts
            rows between unbounded preceding and unbounded following
        ) as isp,
        first_value(vpn, true) over (
            partition by brand, user_id, session_n
            order by dts, ts
            rows between unbounded preceding and unbounded following
        ) as vpn,
        first_value(app_version, true) over (
            partition by brand, user_id, session_n
            order by dts, ts
            rows between unbounded preceding and unbounded following
        ) as app_version,
        first_value(appstore_country, true) over (
            partition by brand, user_id, session_n
            order by dts, ts
            rows between unbounded preceding and unbounded following
        ) as appstore_country,
        first_value(mainsim_country, true) over (
            partition by brand, user_id, session_n
            order by dts, ts
            rows between unbounded preceding and unbounded following
        ) as mainsim_country,
        first_value(secondsim_country, true) over (
            partition by brand, user_id, session_n
            order by dts, ts
            rows between unbounded preceding and unbounded following
        ) as secondsim_country,
        first_value(marker, true) over (
            partition by brand, user_id, session_n
            order by dts, ts
            rows between unbounded preceding and unbounded following
        ) as marker,
        first_value(deeplink, true) over (
            partition by brand, user_id, session_n
            order by dts, ts
            rows between unbounded preceding and unbounded following
        ) as deeplink,
        max(event_type) over (
            partition by brand, user_id, session_n
            order by dts, ts
            rows between unbounded preceding and unbounded following
        ) as max_event_type,
        row_number() over (
            partition by
                brand,
                user_id,
                session_n
            order by
                dts,
                ts
        ) = 1 as latest,
        max(flag_back) over (partition by brand, user_id, session_n order by dts, ts rows between unbounded preceding and unbounded following) as flag_back
    from sessions_raw
),

final as (
    select
        session_id,
        user_id,
        app_id,
        market,
        start_ts,
        end_ts,
        start_dts,
        end_dts,
        ses_length_sec,
        searches,
        clicks,
        gaid,
        idfv,
        auid,
        guestia_id,
        ip_country_iata,
        ip_region_iata,
        ip_city_iata,
        ip_city_name,
        user_timezone,
        currency,
        screen_res,
        device_brand,
        device_model,
        os_version,
        carrier,
        isp,
        vpn,
        app_version,
        appstore_country,
        mainsim_country,
        secondsim_country,
        marker,
        deeplink,
        brand,
        decode(
            lang,
            'русский',
            'ru',
            'English',
            'en',
            'o‘zbek',
            'uz',
            'azərbaycan',
            'az',
            lang
        ) as lang,
        case
            when network_type in ('wifi', 'offline') then network_type
            when network_type = 'disconnected' then 'offline'
            when network_type is null then null
            else 'mobile'
        end as network_type,
        if(max_event_type = 'sp', 'sp', 'avia') as source,
        now() as _updated_at,
        pdate
    from sessions
    where latest and flag_back = 0
)

select pdate, count(*), count(distinct user_id) 
from final
group by 1
Editor is loading...
Leave a Comment