Untitled

 avatar
unknown
sql
a month ago
2.3 kB
2
Indexable
WITH base_repos AS (
    SELECT repo_name 
    FROM `repo_data.repo_list`
),

-- Extract PR metadata from PullRequestEvent
pr_base AS (
    SELECT
        repo.name AS repo_name,
        CAST(JSON_EXTRACT_SCALAR(payload, '$.pull_request.number') AS INT64) AS pr_number,
        JSON_EXTRACT_SCALAR(payload, '$.pull_request.title') AS pr_title,
        JSON_EXTRACT_SCALAR(payload, '$.pull_request.state') AS pr_state,
        CAST(JSON_EXTRACT_SCALAR(payload, '$.pull_request.merged') AS BOOL) AS is_merged,
        JSON_EXTRACT_SCALAR(payload, '$.pull_request.body') AS description,
        JSON_EXTRACT_SCALAR(payload, '$.pull_request.created_at') AS pr_created_at,
        actor.login AS author
    FROM `githubarchive.month.*`
    WHERE 
        _TABLE_SUFFIX BETWEEN '201801' AND '202312'
        AND type = 'PullRequestEvent'
        AND JSON_EXTRACT_SCALAR(payload, '$.action') = 'opened'  -- Get only PR creation events
        AND repo.name IN (SELECT repo_name FROM base_repos)
),

-- Extract PR discussion comments from IssueCommentEvent
pr_comments AS (
    SELECT
        repo.name AS repo_name,
        CAST(JSON_EXTRACT_SCALAR(payload, '$.issue.number') AS INT64) AS pr_number,
        JSON_EXTRACT_SCALAR(payload, '$.comment.body') AS text,
        actor.login AS author,
        JSON_EXTRACT_SCALAR(payload, '$.comment.created_at') AS created_at
    FROM `githubarchive.month.*`
    WHERE 
        _TABLE_SUFFIX BETWEEN '201801' AND '202312'
        AND type = 'IssueCommentEvent'
        AND JSON_EXTRACT_SCALAR(payload, '$.issue.pull_request') IS NOT NULL  -- Ensure it's a PR comment
        AND repo.name IN (SELECT repo_name FROM base_repos)
)

-- Aggregate everything by PR
SELECT 
    pr.repo_name,
    pr.pr_number,
    pr.pr_title,
    pr.pr_state,
    pr.is_merged,
    pr.description,
    pr.pr_created_at,
    pr.author AS pr_author,
    ARRAY_AGG(
        STRUCT(
            pc.text AS text,
            pc.author AS author,
            pc.created_at AS created_at
        )
        ORDER BY pc.created_at ASC
    ) AS comments
FROM pr_base pr
LEFT JOIN pr_comments pc
    ON pr.repo_name = pc.repo_name AND pr.pr_number = pc.pr_number
GROUP BY 
    pr.repo_name, pr.pr_number, pr.pr_title, pr.pr_state, 
    pr.is_merged, pr.description, pr.pr_created_at, pr.author
ORDER BY pr.pr_created_at DESC
LIMIT 100;
Editor is loading...
Leave a Comment