RamaharTime distribution
    Updated 2023-01-25
    -- Only consider those developers who have done multiple pull requests. Taking the average frequency of each developers to measure for the distribution.

    With first_t as (select
    id,
    updatedat,
    author,
    row_number () over (partition by author order by updatedat asc) as row_number
    from near.beta.github_activity
    where updatedat::date >= '2019-01-01'),

    second_t as (select
    id,
    updatedat,
    author,
    row_number () over (partition by author order by updatedat asc) as row_number
    from near.beta.github_activity
    where updatedat::date >= '2019-01-01'),

    time_diff as (select
    a.author,
    timediff (day,a.updatedat,b.updatedat) as day_difference
    from first_t a join second_t b on a.author = b.author and b.row_number = a.row_number + 1
    ),

    filtered_freq as (select
    author,
    avg(day_difference) as avg_freq,
    median(day_difference) as median_freq
    from time_diff
    group by 1)

    select
    CASE
    when avg_freq = '0' then 'Within a day'
    when avg_freq between 1 and 7 then 'Within A week'
    when avg_freq between 7 and 14 then 'Within 2 weeks'
    Run a query to Download Data