Skip to content

Commit

Permalink
feat(dbt): add remaining PLN github-based metrics (#2484)
Browse files Browse the repository at this point in the history
* feat(dbt): add comments and releases dbt models

* feat(dbt): create event model for numbered prs and issues

* feat(dbt): time to pr merge code metric

* fix: rename metric

* fix(dbt): add comment threads to event table

* feat(dbt): add time to first response

* chore(dbt): add new metrics to mart models

* fix(dbt): linting error

* fix: missing var

* feat(sql-mesh): add comment counts

* feat(sql-mesh): add releases

* chore(sql-mesh): add metrics to factory

* feat(sql-mesh): add model for unioning parent-child github events

* fix: remove child events sqlmesh model
  • Loading branch information
ccerv1 authored Nov 21, 2024
1 parent 87480be commit 2d9c4f2
Show file tree
Hide file tree
Showing 10 changed files with 349 additions and 6 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
with pr_events as (
select
`number`,
`type`,
actor_id,
created_at,
LOWER(actor_login) as actor_login,
LOWER(repository_name) as repository_name,
CAST(repository_id as STRING) as to_artifact_source_id
from {{ ref('stg_github__pull_requests') }}
where `type` = 'PULL_REQUEST_OPENED'
),

merge_events as (
select
`number`,
`type`,
actor_id,
created_at,
LOWER(actor_login) as actor_login,
LOWER(repository_name) as repository_name,
CAST(repository_id as STRING) as to_artifact_source_id
from {{ ref('stg_github__pull_request_merge_events') }}
),

issue_events as (
select
`number`,
`type`,
actor_id,
created_at,
LOWER(actor_login) as actor_login,
LOWER(repository_name) as repository_name,
CAST(repository_id as STRING) as to_artifact_source_id
from {{ ref('stg_github__issues') }}
),

comment_events as (
select
`number`,
`type`,
actor_id,
created_at,
LOWER(actor_login) as actor_login,
LOWER(repository_name) as repository_name,
CAST(repository_id as STRING) as to_artifact_source_id
from {{ ref('stg_github__comments') }}
),

all_events as (
select
`number`,
`type`,
actor_login,
repository_name,
actor_id,
to_artifact_source_id,
created_at,
'GITHUB' as event_source
from pr_events
union all
select
`number`,
`type`,
actor_login,
repository_name,
actor_id,
to_artifact_source_id,
created_at,
'GITHUB' as event_source
from merge_events
union all
select
`number`,
`type`,
actor_login,
repository_name,
actor_id,
to_artifact_source_id,
created_at,
'GITHUB' as event_source
from issue_events
union all
select
`number`,
`type`,
actor_login,
repository_name,
actor_id,
to_artifact_source_id,
created_at,
'GITHUB' as event_source
from comment_events
)

select
'GITHUB' as event_source,
created_at as `time`,
`number`,
`type`,
actor_login,
repository_name,
actor_id,
to_artifact_source_id,
{{ oso_id("event_source", "to_artifact_source_id") }} as to_artifact_id
from all_events
where actor_login not like '%[bot]'
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,11 @@ where
'COMMIT_CODE',
'PULL_REQUEST_OPENED',
'PULL_REQUEST_MERGED',
'PULL_REQUEST_REVIEW_COMMENT',
'ISSUE_OPENED',
'ISSUE_CLOSED'
'ISSUE_CLOSED',
'ISSUE_COMMENT',
'RELEASE_PUBLISHED'
)
group by
events.project_id,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
with start_events as (
select
`number`,
actor_id as creator_id,
to_artifact_id,
`time` as created_at,
`type`
from {{ ref('int_github_pr_issue_threads') }}
where `type` in ('PULL_REQUEST_OPENED', 'ISSUE_OPENED')
),

response_events as (
select
`number`,
actor_id as responder_id,
to_artifact_id,
`time` as responded_at,
`type`
from {{ ref('int_github_pr_issue_threads') }}
where `type` in (
'PULL_REQUEST_MERGED',
'PULL_REQUEST_REVIEW_COMMENT',
'ISSUE_CLOSED',
'ISSUE_COMMENT'
)
),

time_to_first_response as (
select
start_events.number,
start_events.to_artifact_id,
start_events.created_at,
'GITHUB' as event_source,
min(resp.responded_at) as responded_at,
cast(
timestamp_diff(min(resp.responded_at), start_events.created_at, minute)
as float64
) / 60.0 / 24.0 as time_to_first_response_days
from start_events
inner join response_events as resp
on
start_events.number = resp.number
and start_events.to_artifact_id = resp.to_artifact_id
and start_events.creator_id != resp.responder_id
and (
(
start_events.`type` = 'ISSUE_OPENED'
and resp.`type` in (
'ISSUE_COMMENT', 'ISSUE_CLOSED'
)
)
or
(
start_events.`type` = 'PULL_REQUEST_OPENED'
and resp.`type` in (
'PULL_REQUEST_REVIEW_COMMENT', 'PULL_REQUEST_MERGED'
)
)
)
group by
start_events.number,
start_events.to_artifact_id,
start_events.created_at
),

time_to_first_response_events as (
select
responded_at as `time`,
to_artifact_id,
event_source,
time_to_first_response_days as amount
from time_to_first_response
)

select
artifacts_by_project.project_id,
time_to_first_response_events.event_source,
time_intervals.time_interval,
'time_to_first_response_days_average' as metric,
avg(time_to_first_response_events.amount) as amount
from time_to_first_response_events
left join {{ ref('artifacts_by_project_v1') }} as artifacts_by_project
on time_to_first_response_events.to_artifact_id = artifacts_by_project.artifact_id
cross join {{ ref('int_time_intervals') }} as time_intervals
where time_to_first_response_events.time >= time_intervals.start_date
group by
artifacts_by_project.project_id,
time_to_first_response_events.event_source,
time_intervals.time_interval
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
with pr_events as (
select
`number`,
to_artifact_id,
`time` as created_at
from {{ ref('int_github_pr_issue_threads') }}
where `type` = 'PULL_REQUEST_OPENED'
),

merge_events as (
select
`number`,
to_artifact_id,
`time` as merged_at
from {{ ref('int_github_pr_issue_threads') }}
where `type` = 'PULL_REQUEST_MERGED'
),

time_to_merge as (
select
pr.number,
pr.to_artifact_id,
pr.created_at,
m.merged_at,
'GITHUB' as event_source,
CAST(
TIMESTAMP_DIFF(m.merged_at, pr.created_at, minute)
as FLOAT64
) / 60.0 / 24.0 as time_to_merge_days
from pr_events as pr
inner join merge_events as m
on
pr.number = m.number
and pr.to_artifact_id = m.to_artifact_id
),

time_to_merge_events as (
select
merged_at as `time`,
to_artifact_id,
event_source,
time_to_merge_days as amount
from time_to_merge
)

select
artifacts_by_project.project_id,
time_to_merge_events.event_source,
time_intervals.time_interval,
'time_to_merge_days_average' as metric,
AVG(time_to_merge_events.amount) as amount
from time_to_merge_events
left join {{ ref('artifacts_by_project_v1') }} as artifacts_by_project
on time_to_merge_events.to_artifact_id = artifacts_by_project.artifact_id
cross join {{ ref('int_time_intervals') }} as time_intervals
where time_to_merge_events.time >= time_intervals.start_date
group by
artifacts_by_project.project_id,
time_to_merge_events.event_source,
time_intervals.time_interval
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ with metrics as (
from {{ ref('int_code_metric__fulltime_developers_average') }}
union all
select * from {{ ref('int_code_metric__new_contributors') }}
union all
select * from {{ ref('int_code_metric__time_to_first_response_days_average') }}
union all
select * from {{ ref('int_code_metric__time_to_merge_days_average') }}
),

aggs as (
Expand Down Expand Up @@ -66,6 +70,24 @@ aggs as (
else 0
end
) as closed_issue_count_6_months,
SUM(
case
when
metric in ('pull_request_review_comment_count', 'issue_comment_count')
and time_interval = '6 MONTHS'
then amount
else 0
end
) as comment_count_6_months,
SUM(
case
when
metric = 'release_published_count'
and time_interval = '6 MONTHS'
then amount
else 0
end
) as release_count_6_months,
SUM(
case
when
Expand Down Expand Up @@ -119,7 +141,25 @@ aggs as (
then amount
else 0
end
) as fulltime_developer_average_6_months
) as fulltime_developer_average_6_months,
SUM(
case
when
metric = 'time_to_first_response_days_average'
and time_interval = '6 MONTHS'
then amount
else 0
end
) as time_to_first_response_days_average_6_months,
SUM(
case
when
metric = 'time_to_merge_days_average'
and time_interval = '6 MONTHS'
then amount
else 0
end
) as time_to_merge_days_average_6_months
from metrics
group by
project_id,
Expand All @@ -138,7 +178,6 @@ repos as (
SUM(star_count) as star_count,
SUM(fork_count) as fork_count
from {{ ref('int_repo_metrics_by_project') }}
--WHERE r.is_fork = false
group by
project_id,
artifact_source
Expand Down Expand Up @@ -191,7 +230,11 @@ select
code_metrics.opened_pull_request_count_6_months,
code_metrics.merged_pull_request_count_6_months,
code_metrics.opened_issue_count_6_months,
code_metrics.closed_issue_count_6_months
code_metrics.closed_issue_count_6_months,
code_metrics.comment_count_6_months,
code_metrics.release_count_6_months,
code_metrics.time_to_first_response_days_average_6_months,
code_metrics.time_to_merge_days_average_6_months
from project_metadata
left join code_metrics
on
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,9 @@ select
opened_pull_request_count_6_months,
merged_pull_request_count_6_months,
opened_issue_count_6_months,
closed_issue_count_6_months
closed_issue_count_6_months,
comment_count_6_months,
release_count_6_months,
time_to_first_response_days_average_6_months,
time_to_merge_days_average_6_months
from {{ ref('int_code_metrics_by_project') }}
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ select distinct
) as review_comments,
JSON_VALUE(
pre.payload, "$.pull_request.author_association"
) as author_association
) as author_association,
JSON_VALUE(pre.payload, "$.number") as `number`
from pull_request_events as pre
where
JSON_VALUE(pre.payload, "$.pull_request.merged_at") is not null
Expand Down
8 changes: 8 additions & 0 deletions warehouse/metrics_mesh/models/metrics_factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,14 @@
ref="commits.sql",
time_aggregations=["daily", "weekly", "monthly"],
),
"comments": MetricQueryDef(
ref="comments.sql",
time_aggregations=["daily", "weekly", "monthly"],
),
"releases": MetricQueryDef(
ref="releases.sql",
time_aggregations=["daily", "weekly", "monthly"],
),
"forks": MetricQueryDef(
ref="forks.sql",
time_aggregations=["daily", "weekly", "monthly"],
Expand Down
Loading

0 comments on commit 2d9c4f2

Please sign in to comment.