Skip to content

Commit

Permalink
Scrape reviewer count and publish on website (#101)
Browse files Browse the repository at this point in the history
to showcase the people putting in the work to review and give feedback on PRs.
  • Loading branch information
GeckoEidechse authored Oct 18, 2024
1 parent 335e750 commit d554906
Show file tree
Hide file tree
Showing 7 changed files with 383 additions and 0 deletions.
42 changes: 42 additions & 0 deletions .github/workflows/periodic-scrape-reviews.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
name: Periodically scrape reviews

on:
workflow_dispatch: # Manual run
schedule:
- cron: '0 0 * * 0'

jobs:
scrape:
runs-on: ubuntu-latest
steps:
- name: Checkout repo
uses: actions/checkout@v4

- name: Install depenencies
run: pip install requests

- name: Run Python script for scraping
run: |
cd scripts
python scrape-for-reviews.py ${{ secrets.GITHUB_TOKEN }}
- name: Check for changes
id: git_diff
run: |
git config --global user.name 'github-actions[bot]'
git config --global user.email 'github-actions[bot]@users.noreply.github.com'
git add .
if git diff-index --quiet HEAD; then
echo "changes=false" >> $GITHUB_OUTPUT
else
echo "changes=true" >> $GITHUB_OUTPUT
fi
- name: Create Pull Request
if: ${{ steps.git_diff.outputs.changes == 'true' }}
uses: peter-evans/create-pull-request@v6
with:
commit-message: "Update review counts"
branch: automated/update-reviewer-counts
title: "Update review counts"
body: "Updates the lists of review counts"
224 changes: 224 additions & 0 deletions scripts/scrape-for-reviews.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
import requests
from collections import defaultdict
import datetime
import sys

github_token = None

# supply a github token in an arg avoid ratelimit, or don't, it's up to you
if len(sys.argv) > 1:
github_token = sys.argv[1]

# Replace with the GitHub organization name
github_org = "R2Northstar"

# Base URL for GitHub API
base_url = f"https://api.github.com/orgs/{github_org}"

# Headers for authentication
headers = {"Authorization": f"token {github_token}"}


def get_repos():
"""Fetch all repositories for the organization, handling pagination."""
repos = []
page = 1

while True:
url = f"{base_url}/repos?per_page=100&page={page}"
response = requests.get(url, headers=headers)
response.raise_for_status()
page_repos = response.json()

if not page_repos: # If the list is empty, we've reached the last page
break

repos.extend(page_repos)
page += 1

return repos


def get_pull_requests(repo_name):
"""Fetch all pull requests in the repository, handling pagination."""
print(f"{repo_name=}")
prs = []
page = 1

while True:
print(f"{page=}")
url = f"https://api.github.com/repos/{github_org}/{repo_name}/pulls?state=all&per_page=100&page={page}"
response = requests.get(url, headers=headers)
response.raise_for_status()
page_prs = response.json()

if not page_prs: # If the list is empty, we've reached the last page
break

prs.extend(page_prs)
page += 1

return prs


def get_reviews_for_pr(repo_name, pr_number):
"""Fetch all reviews for a given pull request in a specific repository."""
url = f"https://api.github.com/repos/{github_org}/{repo_name}/pulls/{pr_number}/reviews"
response = requests.get(url, headers=headers)
response.raise_for_status()
return response.json()


# Fetch all repositories in the organization
repos = get_repos()

# Dictionary to store the count of reviews per user
review_dict = defaultdict(list)


def is_trivial_review(review_text: str):
"""Perform a variety of checks to determine whether a review should be discarded due to not being extensive enough"""
min_review_length = 30
if "lgtm" in review_text.lower():
return True

if len(review_text) < min_review_length:
return True

return False


for repo in repos:
repo_name = repo["name"]
prs = get_pull_requests(repo_name)

for pr in prs:
pr_number = pr["number"]
reviews = get_reviews_for_pr(repo_name, pr_number)

for review in reviews:
if not review["user"] or not review["user"]["login"]:
continue
if review["body"] == "":
# Current object is comment on a review not an actual review, skip
continue
if is_trivial_review(review["body"]):
continue

user = review["user"]["login"]
review_dict[user].append(
datetime.datetime.fromisoformat(
review["submitted_at"].replace("Z", "+00:00")
)
)


def filter_by_timeframe(reviews_dict, weeks=1):
"""
Filters out reviews older than `weeks` weeks.
Additionally removes empty reviewer entries after filtering.
"""
# Apply the filter using a dictionary comprehension
now = datetime.datetime.now(datetime.timezone.utc)
filtered_review_counts = defaultdict(
list,
{
reviewer: [
review_time
for review_time in reviews
if now - review_time < datetime.timedelta(weeks=weeks)
]
for reviewer, reviews in reviews_dict.items()
},
)

# Remove empty entries
filtered_review_counts = defaultdict(
list,
{
reviewer: reviews
for reviewer, reviews in filtered_review_counts.items()
if len(reviews) > 0
},
)

return filtered_review_counts


def sum_up_reviews(reviews_dict):
"""Sum up review counts per reviewer"""
return {k: len(v) for k, v, in reviews_dict.items()}


def sort_alphabetically(reviews_dict):
"""Sort alphabetivally by reviewer name"""
sorted_reviewers = sorted(
reviews_dict.items(),
key=lambda item: item[0].lower(),
)
return sorted_reviewers


# Generate TypeScript code
def generate_typescript_code(sorted_review_counts, timeframe="total"):
file_header_string = "// Auto-generated from Python script\n"

definition_string = """
export interface ReviewCount {
url?: string;
name: string;
count: number;
}
"""
list_start_string = f"""export const review_counts_{timeframe}: ReviewCount[] = ["""
list_end_string = """
]
"""

reviewer_list_string = ""
for user, count in sorted_review_counts:
reviewer_list_string += f"""
{{
url: "https://github.com/{user}",
name: "{user}",
count: {count},
}},"""

return (
file_header_string
+ definition_string
+ list_start_string
+ reviewer_list_string
+ list_end_string
)


# Total stats
with open("../src/data/reviewer-count.ts", "w") as f:
f.write(
generate_typescript_code(
sort_alphabetically(sum_up_reviews(review_dict)), "total"
)
)

# Monthly stats
with open("../src/data/reviewer-count-monthly.ts", "w") as f:
f.write(
generate_typescript_code(
sort_alphabetically(
sum_up_reviews(filter_by_timeframe(review_dict, weeks=4))
),
"monthly",
)
)

# Weekly stats
with open("../src/data/reviewer-count-weekly.ts", "w") as f:
f.write(
generate_typescript_code(
sort_alphabetically(
sum_up_reviews(filter_by_timeframe(review_dict, weeks=1))
),
"weekly",
)
)
82 changes: 82 additions & 0 deletions src/components/Reviewers.astro
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
---
import { review_counts_total } from "../data/reviewer-count";
import { review_counts_monthly } from "../data/reviewer-count-monthly";
import { review_counts_weekly } from "../data/reviewer-count-weekly";
---

<div class="reviewer-stats-flex">
<div>
<h2>Weekly review count</h2>
<table>
<tr>
<th>Reviewer</th>
<th># of reviews</th>
</tr>
{
review_counts_weekly
.sort((a, b) => b.count - a.count)
.map((reviewer) => (
<tr>
<td>{reviewer.name}</td>
<td>{reviewer.count}</td>
</tr>
))
}
</table>
</div>
<div>
<h2>Monthly review count</h2>
<table>
<tr>
<th>Reviewer</th>
<th># of reviews</th>
</tr>
{
review_counts_monthly
.sort((a, b) => b.count - a.count)
.map((reviewer) => (
<tr>
<td>{reviewer.name}</td>
<td>{reviewer.count}</td>
</tr>
))
}
</table>
</div>
</div>
<div class="total-reviewers">
<div>
<h2>Total review count</h2>
<table>
<tr>
<th>Reviewer</th>
<th># of reviews</th>
</tr>
{
review_counts_total
.sort((a, b) => b.count - a.count)
.filter((c) => c.count > 1) // Require at least 2 reviews to reduce table size
.map((reviewer) => (
<tr>
<td>{reviewer.name}</td>
<td>{reviewer.count}</td>
</tr>
))
}
</table>
</div>
</div>

<style>
.reviewer-stats-flex {
display: flex;
justify-content: center;
flex-wrap: wrap;
gap: 4em;
}
.total-reviewers {
margin: 4em;
display: flex;
justify-content: center;
}
</style>
9 changes: 9 additions & 0 deletions src/data/reviewer-count-monthly.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
// Auto-generated from Python script

export interface ReviewCount {
url?: string;
name: string;
count: number;
}
export const review_counts_monthly: ReviewCount[] = [
]
9 changes: 9 additions & 0 deletions src/data/reviewer-count-weekly.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
// Auto-generated from Python script

export interface ReviewCount {
url?: string;
name: string;
count: number;
}
export const review_counts_weekly: ReviewCount[] = [
]
9 changes: 9 additions & 0 deletions src/data/reviewer-count.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
// Auto-generated from Python script

export interface ReviewCount {
url?: string;
name: string;
count: number;
}
export const review_counts_total: ReviewCount[] = [
]
Loading

0 comments on commit d554906

Please sign in to comment.