Skip to content

Commit

Permalink
Merge pull request #66 from TogetherCrew/feature/link_pr_to_issue
Browse files Browse the repository at this point in the history
Link PR to Issue
  • Loading branch information
scientiststwin authored Mar 13, 2024
2 parents 88fd59c + 4b8080e commit 971bc2d
Show file tree
Hide file tree
Showing 7 changed files with 99 additions and 7 deletions.
36 changes: 29 additions & 7 deletions dags/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from airflow import DAG
from airflow.decorators import task
from github.github_api_helpers import (
extract_linked_issues_from_pr,
fetch_commit_files,
fetch_org_details,
get_all_commits,
Expand Down Expand Up @@ -181,6 +182,25 @@ def extract_pull_requests(data):
new_data = {"prs": prs, **data}
return new_data

@task
def extract_pull_request_linked_issues(data):
logging.info(f"All data from last stage: {data}")
repo = data["repo"]
prs = data["prs"]
owner = repo["owner"]["login"]
repo_name = repo["name"]

new_prs = []
for pr in prs:
pr_number = pr["number"]
linked_issues = extract_linked_issues_from_pr(
owner=owner, repo=repo_name, pull_number=pr_number
)
new_prs.append({**pr, "linked_issues": linked_issues})

new_data = {**data, "prs": new_prs}
return new_data

@task
def transform_pull_requests(data):
logging.info(f"All data from last stage: {data}")
Expand Down Expand Up @@ -515,11 +535,19 @@ def load_commits_files_changes(data):
transform_label = transform_labels.expand(data=labels)
load_label = load_labels.expand(data=transform_label)

issues = extract_issues.expand(data=repos)
transform_issue = transform_issues.expand(data=issues)
load_issue = load_issues.expand(data=transform_issue)
load_contributors >> load_issue
load_label >> load_issue

prs = extract_pull_requests.expand(data=repos)
transform_prs = transform_pull_requests.expand(data=prs)
prs_linked_issues = extract_pull_request_linked_issues.expand(data=prs)
transform_prs = transform_pull_requests.expand(data=prs_linked_issues)
load_prs = load_pull_requests.expand(data=transform_prs)
load_contributors >> load_prs
load_label >> load_prs
load_issue >> load_prs

pr_files_changes = extract_pull_request_files_changes.expand(data=prs)
transform_pr_files_changes = transform_pull_request_files_changes.expand(
Expand All @@ -529,12 +557,6 @@ def load_commits_files_changes(data):
data=transform_pr_files_changes
)

issues = extract_issues.expand(data=repos)
transform_issue = transform_issues.expand(data=issues)
load_issue = load_issues.expand(data=transform_issue)
load_contributors >> load_issue
load_label >> load_issue

pr_reviews = extract_pr_review.expand(data=prs)
transform_pr_review = transform_pr_review.expand(data=pr_reviews)
load_pr_review = load_pr_review.expand(data=transform_pr_review)
Expand Down
1 change: 1 addition & 0 deletions dags/github/github_api_helpers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from .labels import get_all_repo_labels
from .orgs import fetch_org_details, get_all_org_members
from .pull_requests import (
extract_linked_issues_from_pr,
get_all_comments_of_pull_request,
get_all_commits_of_pull_request,
get_all_pull_request_files,
Expand Down
19 changes: 19 additions & 0 deletions dags/github/github_api_helpers/issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,25 @@
from .smart_proxy import get


def fetch_issue(owner: str, repo: str, issue_number: int):
"""
Fetches a specific issue from a GitHub repository.
:param owner: The owner of the repository.
:param repo: The name of the repository.
:param issue_number: The number of the issue.
:return: The issue data.
"""
endpoint = f"https://api.github.com/repos/{owner}/{repo}/issues/{issue_number}"
response = get(endpoint)
response_data = response.json()

logging.info(
f"Fetched issue {issue_number} for {owner}/{repo}. Issue: {response_data}"
)
return response_data


# Issues
def fetch_issues(owner: str, repo: str, page: int, per_page: int = 100):
"""
Expand Down
35 changes: 35 additions & 0 deletions dags/github/github_api_helpers/pull_requests.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import logging

from bs4 import BeautifulSoup

from .issues import fetch_issue
from .smart_proxy import get


Expand Down Expand Up @@ -58,6 +61,38 @@ def get_all_pull_requests(owner: str, repo: str):
return all_pull_requests


def extract_issue_info_from_url(url):
splitted_url = url.split("/")
owner = splitted_url[-4]
repo = splitted_url[-3]
issue_number = splitted_url[-1]

return {"owner": owner, "repo": repo, "issue_number": issue_number}


def extract_linked_issues_from_pr(owner: str, repo: str, pull_number: int):
html_pr_url = f"https://github.com/{owner}/{repo}/pull/{pull_number}"
response = get(html_pr_url)
linked_issue = []

soup = BeautifulSoup(response.text, "html.parser")
html_linked_issues = soup.find_all(
"span",
class_="Truncate truncate-with-responsive-width my-1",
attrs={"data-view-component": "true"},
)
for html_linked_issue in html_linked_issues:
issue_url = html_linked_issue.find("a").get("href")
issue_data = extract_issue_info_from_url(issue_url)
issue_info = fetch_issue(
issue_data["owner"], issue_data["repo"], issue_data["issue_number"]
)

linked_issue.append(issue_info)

return linked_issue


def fetch_pull_requests_commits(
owner: str, repo: str, pull_number: int, page: int, per_page: int = 100
):
Expand Down
1 change: 1 addition & 0 deletions dags/github/neo4j_storage/neo4j_enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,4 @@ class Relationship(Enum):
AUTHORED_BY = "AUTHORED_BY"
IS_ON = "IS_ON"
CHANGED = "CHANGED"
LINKED = "LINKED"
13 changes: 13 additions & 0 deletions dags/github/neo4j_storage/pull_requests.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ def save_pull_request_to_neo4j(pr: dict, repository_id: str):
assignees = pr.pop("assignees", None)
requested_reviewers = pr.pop("requested_reviewers", None)
labels = pr.pop("labels", None)
linked_issues = pr.pop("linked_issues", None)
cleaned_pr = remove_nested_collections(pr)

if assignee:
Expand Down Expand Up @@ -56,6 +57,16 @@ def save_pull_request_to_neo4j(pr: dict, repository_id: str):
SET haslb.latestSavedAt = datetime()
"""

linked_issues_query = f"""
WITH pr
UNWIND $linked_issues as linked_issue
MERGE (i:{Node.Issue.value} {{id: linked_issue.id}})
SET i += linked_issue, i.latestSavedAt = datetime()
WITH pr, i
MERGE (pr)-[islinked:{Relationship.LINKED.value}]->(i)
SET islinked.latestSavedAt = datetime()
"""

with driver.session() as session:
session.execute_write(
lambda tx: tx.run(
Expand All @@ -74,6 +85,7 @@ def save_pull_request_to_neo4j(pr: dict, repository_id: str):
{ assignees_query }
{ requested_reviewers_query }
{ labels_query }
{ linked_issues_query }
""",
pr=cleaned_pr,
Expand All @@ -83,6 +95,7 @@ def save_pull_request_to_neo4j(pr: dict, repository_id: str):
assignees=assignees,
labels=labels,
requested_reviewers=requested_reviewers,
linked_issues=linked_issues,
)
)
driver.close()
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@ python-dotenv>=1.0.0, <2.0.0
urlextract==1.8.0
tc-hivemind-backend==1.1.3
traceloop-sdk==0.9.4
beautifulsoup4==4.12.3

0 comments on commit 971bc2d

Please sign in to comment.