-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Scrape reviewer count and publish on website (#101)
to showcase the people putting in the work to review and give feedback on PRs.
- Loading branch information
1 parent
335e750
commit d554906
Showing
7 changed files
with
383 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
name: Periodically scrape reviews | ||
|
||
on: | ||
workflow_dispatch: # Manual run | ||
schedule: | ||
- cron: '0 0 * * 0' | ||
|
||
jobs: | ||
scrape: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: Checkout repo | ||
uses: actions/checkout@v4 | ||
|
||
- name: Install depenencies | ||
run: pip install requests | ||
|
||
- name: Run Python script for scraping | ||
run: | | ||
cd scripts | ||
python scrape-for-reviews.py ${{ secrets.GITHUB_TOKEN }} | ||
- name: Check for changes | ||
id: git_diff | ||
run: | | ||
git config --global user.name 'github-actions[bot]' | ||
git config --global user.email 'github-actions[bot]@users.noreply.github.com' | ||
git add . | ||
if git diff-index --quiet HEAD; then | ||
echo "changes=false" >> $GITHUB_OUTPUT | ||
else | ||
echo "changes=true" >> $GITHUB_OUTPUT | ||
fi | ||
- name: Create Pull Request | ||
if: ${{ steps.git_diff.outputs.changes == 'true' }} | ||
uses: peter-evans/create-pull-request@v6 | ||
with: | ||
commit-message: "Update review counts" | ||
branch: automated/update-reviewer-counts | ||
title: "Update review counts" | ||
body: "Updates the lists of review counts" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,224 @@ | ||
import requests | ||
from collections import defaultdict | ||
import datetime | ||
import sys | ||
|
||
github_token = None | ||
|
||
# supply a github token in an arg avoid ratelimit, or don't, it's up to you | ||
if len(sys.argv) > 1: | ||
github_token = sys.argv[1] | ||
|
||
# Replace with the GitHub organization name | ||
github_org = "R2Northstar" | ||
|
||
# Base URL for GitHub API | ||
base_url = f"https://api.github.com/orgs/{github_org}" | ||
|
||
# Headers for authentication | ||
headers = {"Authorization": f"token {github_token}"} | ||
|
||
|
||
def get_repos(): | ||
"""Fetch all repositories for the organization, handling pagination.""" | ||
repos = [] | ||
page = 1 | ||
|
||
while True: | ||
url = f"{base_url}/repos?per_page=100&page={page}" | ||
response = requests.get(url, headers=headers) | ||
response.raise_for_status() | ||
page_repos = response.json() | ||
|
||
if not page_repos: # If the list is empty, we've reached the last page | ||
break | ||
|
||
repos.extend(page_repos) | ||
page += 1 | ||
|
||
return repos | ||
|
||
|
||
def get_pull_requests(repo_name): | ||
"""Fetch all pull requests in the repository, handling pagination.""" | ||
print(f"{repo_name=}") | ||
prs = [] | ||
page = 1 | ||
|
||
while True: | ||
print(f"{page=}") | ||
url = f"https://api.github.com/repos/{github_org}/{repo_name}/pulls?state=all&per_page=100&page={page}" | ||
response = requests.get(url, headers=headers) | ||
response.raise_for_status() | ||
page_prs = response.json() | ||
|
||
if not page_prs: # If the list is empty, we've reached the last page | ||
break | ||
|
||
prs.extend(page_prs) | ||
page += 1 | ||
|
||
return prs | ||
|
||
|
||
def get_reviews_for_pr(repo_name, pr_number): | ||
"""Fetch all reviews for a given pull request in a specific repository.""" | ||
url = f"https://api.github.com/repos/{github_org}/{repo_name}/pulls/{pr_number}/reviews" | ||
response = requests.get(url, headers=headers) | ||
response.raise_for_status() | ||
return response.json() | ||
|
||
|
||
# Fetch all repositories in the organization | ||
repos = get_repos() | ||
|
||
# Dictionary to store the count of reviews per user | ||
review_dict = defaultdict(list) | ||
|
||
|
||
def is_trivial_review(review_text: str): | ||
"""Perform a variety of checks to determine whether a review should be discarded due to not being extensive enough""" | ||
min_review_length = 30 | ||
if "lgtm" in review_text.lower(): | ||
return True | ||
|
||
if len(review_text) < min_review_length: | ||
return True | ||
|
||
return False | ||
|
||
|
||
for repo in repos: | ||
repo_name = repo["name"] | ||
prs = get_pull_requests(repo_name) | ||
|
||
for pr in prs: | ||
pr_number = pr["number"] | ||
reviews = get_reviews_for_pr(repo_name, pr_number) | ||
|
||
for review in reviews: | ||
if not review["user"] or not review["user"]["login"]: | ||
continue | ||
if review["body"] == "": | ||
# Current object is comment on a review not an actual review, skip | ||
continue | ||
if is_trivial_review(review["body"]): | ||
continue | ||
|
||
user = review["user"]["login"] | ||
review_dict[user].append( | ||
datetime.datetime.fromisoformat( | ||
review["submitted_at"].replace("Z", "+00:00") | ||
) | ||
) | ||
|
||
|
||
def filter_by_timeframe(reviews_dict, weeks=1): | ||
""" | ||
Filters out reviews older than `weeks` weeks. | ||
Additionally removes empty reviewer entries after filtering. | ||
""" | ||
# Apply the filter using a dictionary comprehension | ||
now = datetime.datetime.now(datetime.timezone.utc) | ||
filtered_review_counts = defaultdict( | ||
list, | ||
{ | ||
reviewer: [ | ||
review_time | ||
for review_time in reviews | ||
if now - review_time < datetime.timedelta(weeks=weeks) | ||
] | ||
for reviewer, reviews in reviews_dict.items() | ||
}, | ||
) | ||
|
||
# Remove empty entries | ||
filtered_review_counts = defaultdict( | ||
list, | ||
{ | ||
reviewer: reviews | ||
for reviewer, reviews in filtered_review_counts.items() | ||
if len(reviews) > 0 | ||
}, | ||
) | ||
|
||
return filtered_review_counts | ||
|
||
|
||
def sum_up_reviews(reviews_dict): | ||
"""Sum up review counts per reviewer""" | ||
return {k: len(v) for k, v, in reviews_dict.items()} | ||
|
||
|
||
def sort_alphabetically(reviews_dict): | ||
"""Sort alphabetivally by reviewer name""" | ||
sorted_reviewers = sorted( | ||
reviews_dict.items(), | ||
key=lambda item: item[0].lower(), | ||
) | ||
return sorted_reviewers | ||
|
||
|
||
# Generate TypeScript code | ||
def generate_typescript_code(sorted_review_counts, timeframe="total"): | ||
file_header_string = "// Auto-generated from Python script\n" | ||
|
||
definition_string = """ | ||
export interface ReviewCount { | ||
url?: string; | ||
name: string; | ||
count: number; | ||
} | ||
""" | ||
list_start_string = f"""export const review_counts_{timeframe}: ReviewCount[] = [""" | ||
list_end_string = """ | ||
] | ||
""" | ||
|
||
reviewer_list_string = "" | ||
for user, count in sorted_review_counts: | ||
reviewer_list_string += f""" | ||
{{ | ||
url: "https://github.com/{user}", | ||
name: "{user}", | ||
count: {count}, | ||
}},""" | ||
|
||
return ( | ||
file_header_string | ||
+ definition_string | ||
+ list_start_string | ||
+ reviewer_list_string | ||
+ list_end_string | ||
) | ||
|
||
|
||
# Total stats | ||
with open("../src/data/reviewer-count.ts", "w") as f: | ||
f.write( | ||
generate_typescript_code( | ||
sort_alphabetically(sum_up_reviews(review_dict)), "total" | ||
) | ||
) | ||
|
||
# Monthly stats | ||
with open("../src/data/reviewer-count-monthly.ts", "w") as f: | ||
f.write( | ||
generate_typescript_code( | ||
sort_alphabetically( | ||
sum_up_reviews(filter_by_timeframe(review_dict, weeks=4)) | ||
), | ||
"monthly", | ||
) | ||
) | ||
|
||
# Weekly stats | ||
with open("../src/data/reviewer-count-weekly.ts", "w") as f: | ||
f.write( | ||
generate_typescript_code( | ||
sort_alphabetically( | ||
sum_up_reviews(filter_by_timeframe(review_dict, weeks=1)) | ||
), | ||
"weekly", | ||
) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
--- | ||
import { review_counts_total } from "../data/reviewer-count"; | ||
import { review_counts_monthly } from "../data/reviewer-count-monthly"; | ||
import { review_counts_weekly } from "../data/reviewer-count-weekly"; | ||
--- | ||
|
||
<div class="reviewer-stats-flex"> | ||
<div> | ||
<h2>Weekly review count</h2> | ||
<table> | ||
<tr> | ||
<th>Reviewer</th> | ||
<th># of reviews</th> | ||
</tr> | ||
{ | ||
review_counts_weekly | ||
.sort((a, b) => b.count - a.count) | ||
.map((reviewer) => ( | ||
<tr> | ||
<td>{reviewer.name}</td> | ||
<td>{reviewer.count}</td> | ||
</tr> | ||
)) | ||
} | ||
</table> | ||
</div> | ||
<div> | ||
<h2>Monthly review count</h2> | ||
<table> | ||
<tr> | ||
<th>Reviewer</th> | ||
<th># of reviews</th> | ||
</tr> | ||
{ | ||
review_counts_monthly | ||
.sort((a, b) => b.count - a.count) | ||
.map((reviewer) => ( | ||
<tr> | ||
<td>{reviewer.name}</td> | ||
<td>{reviewer.count}</td> | ||
</tr> | ||
)) | ||
} | ||
</table> | ||
</div> | ||
</div> | ||
<div class="total-reviewers"> | ||
<div> | ||
<h2>Total review count</h2> | ||
<table> | ||
<tr> | ||
<th>Reviewer</th> | ||
<th># of reviews</th> | ||
</tr> | ||
{ | ||
review_counts_total | ||
.sort((a, b) => b.count - a.count) | ||
.filter((c) => c.count > 1) // Require at least 2 reviews to reduce table size | ||
.map((reviewer) => ( | ||
<tr> | ||
<td>{reviewer.name}</td> | ||
<td>{reviewer.count}</td> | ||
</tr> | ||
)) | ||
} | ||
</table> | ||
</div> | ||
</div> | ||
|
||
<style> | ||
.reviewer-stats-flex { | ||
display: flex; | ||
justify-content: center; | ||
flex-wrap: wrap; | ||
gap: 4em; | ||
} | ||
.total-reviewers { | ||
margin: 4em; | ||
display: flex; | ||
justify-content: center; | ||
} | ||
</style> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
// Auto-generated from Python script | ||
|
||
export interface ReviewCount { | ||
url?: string; | ||
name: string; | ||
count: number; | ||
} | ||
export const review_counts_monthly: ReviewCount[] = [ | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
// Auto-generated from Python script | ||
|
||
export interface ReviewCount { | ||
url?: string; | ||
name: string; | ||
count: number; | ||
} | ||
export const review_counts_weekly: ReviewCount[] = [ | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
// Auto-generated from Python script | ||
|
||
export interface ReviewCount { | ||
url?: string; | ||
name: string; | ||
count: number; | ||
} | ||
export const review_counts_total: ReviewCount[] = [ | ||
] |
Oops, something went wrong.