diff --git a/.github/workflows/start.staging.yml b/.github/workflows/start.staging.yml new file mode 100644 index 00000000..842e3bda --- /dev/null +++ b/.github/workflows/start.staging.yml @@ -0,0 +1,9 @@ +name: Staging CI/CD Pipeline + +on: pull_request + +jobs: + ci: + uses: TogetherCrew/operations/.github/workflows/ci.yml@main + secrets: + CC_TEST_REPORTER_ID: ${{ secrets.CC_TEST_REPORTER_ID }} \ No newline at end of file diff --git a/.gitignore b/.gitignore index c73c1acb..63a32c88 100644 --- a/.gitignore +++ b/.gitignore @@ -55,6 +55,7 @@ coverage.xml .hypothesis/ .pytest_cache/ cover/ +coverage # Translations *.mo diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..56eba672 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,12 @@ +# It's recommended that we use `bullseye` for Python (alpine isn't suitable as it conflcts with numpy) +FROM python:3.10-bullseye AS base +WORKDIR /project +COPY . . +RUN pip3 install -r requirements.txt + +FROM base AS test +RUN chmod +x docker-entrypoint.sh +CMD ["./docker-entrypoint.sh"] + +FROM base AS prod +CMD ["echo", "aiflow dags should be running on airlfow container"] \ No newline at end of file diff --git a/Readme.md b/Readme.md index c51ca256..a357ed2d 100644 --- a/Readme.md +++ b/Readme.md @@ -2,17 +2,16 @@ ## Running the app - You can quickly launch the application using `Docker Compose`: ```bash docker-compose --profile flower up ``` -## Lint the code +## Lint the code The code can be linted by using the below command ```bash python -m black . -``` +``` \ No newline at end of file diff --git a/dags/businees-requremensts.txt b/dags/businees-requremensts.txt deleted file mode 100644 index 70fc4d76..00000000 --- a/dags/businees-requremensts.txt +++ /dev/null @@ -1,40 +0,0 @@ -1. PRs - 1. Title - OK - "title" field - 2. Description - OK - "title" field - 3. Author - OK - "user" field - 4. Date - OK - "created_at" field - 5. Commits (see below) ######## - OK - "_links.commits" - 6. Linked issues (see below) ######## - - 7. Comments (with author, date, reactions, and mentions) - -> 2ways -> 1. get all of the comments repository "_links.comments", 2. get all of the comments's PR - - "user" field - - "created_at" field - - "reactions" info - - "reactions.url" user reactions - 8. review_comments - - "user" field - - "created_at" field - - "reactions" info - - "reactions.url" user reactions - 9. status (open, in review, closed) - OK - "state" field - -2. Commits - 2. Description - "commit.message" field - 3. Author - OK - "author" field - 4. Date - OK - "commit.committer.date" field - 5. Scripts that were changed - OK - /repos/:owner/:repo/commits/:commit_sha - -3. Issues - 1. Title - OK - "title" field - 2. Description - OK - "body" field - 3. Author - OK - "user" field - 4. Date - OK - "created_at" field - 5. Assignees - OK - "assignees" field - 6. Labels - OK - "labels" field - 7. Projects - 8. Comments (with author, date, reactions and mentions) - -> 2ways -> 1. get all of the comments repository, 2. get all of the comments's PR - - "user" field - - "created_at" field - - "reactions" info - - "reactions.url" user reactions diff --git a/dags/github.py b/dags/github.py index 1667c186..fc848726 100644 --- a/dags/github.py +++ b/dags/github.py @@ -17,42 +17,41 @@ # under the License. """Example DAG demonstrating the usage of dynamic task mapping.""" from __future__ import annotations + from datetime import datetime, timedelta from airflow import DAG from airflow.decorators import task - from github_api_helpers import ( - get_all_repo_issues_and_prs_comments, - get_all_reviews_of_pull_request, - get_all_repo_review_comments, - get_all_repo_contributors, - get_all_pull_request_files, - get_all_pull_requests, - get_all_org_members, - get_all_repo_labels, - get_all_org_repos, - get_all_org_repos, - fetch_org_details, fetch_commit_files, + fetch_org_details, get_all_commits, get_all_issues, + get_all_org_members, + get_all_org_repos, + get_all_pull_request_files, + get_all_pull_requests, + get_all_repo_contributors, + get_all_repo_issues_and_prs_comments, + get_all_repo_labels, + get_all_repo_review_comments, + get_all_reviews_of_pull_request, ) from neo4j_storage import ( - save_commit_files_changes_to_neo4j, - save_repo_contributors_to_neo4j, - save_pr_files_changes_to_neo4j, - save_review_comment_to_neo4j, get_orgs_profile_from_neo4j, - save_pull_request_to_neo4j, - save_org_member_to_neo4j, save_comment_to_neo4j, + save_commit_files_changes_to_neo4j, save_commit_to_neo4j, - save_review_to_neo4j, save_issue_to_neo4j, save_label_to_neo4j, + save_org_member_to_neo4j, save_orgs_to_neo4j, + save_pr_files_changes_to_neo4j, + save_pull_request_to_neo4j, + save_repo_contributors_to_neo4j, save_repo_to_neo4j, + save_review_comment_to_neo4j, + save_review_to_neo4j, ) with DAG( @@ -67,18 +66,20 @@ def get_all_organization(): orgs = get_orgs_profile_from_neo4j() return orgs - #! for testing + # !for testing # toghether_crew_org = { # "id": 1, # "name": "TogetherCrew", - # "description": "TogetherCrew is a community of developers, designers, and creators who are passionate about building and learning together.", + # "description": """TogetherCrew is a community of developers, designers, and creators + # who are passionate about building and learning together.""", # "url": "", # "key": "" # } # rndao_org = { # "id": 2, # "name": "RnDAO", - # "description": "RnDAO is a community of developers, designers, and creators who are passionate about building and learning together.", + # "description": """RnDAO is a community of developers, designers, and creators + # who are passionate about building and learning together.""", # "url": "", # "key": "" # } diff --git a/dags/github_api_helpers/__init__.py b/dags/github_api_helpers/__init__.py index 586482aa..f709f6e5 100644 --- a/dags/github_api_helpers/__init__.py +++ b/dags/github_api_helpers/__init__.py @@ -1,16 +1,17 @@ -from .repos import get_all_org_repos, get_all_repo_contributors -from .commits import get_all_commits, fetch_commit_details, fetch_commit_files -from .issues import get_all_issues, get_all_comments_of_issue +# flake8: noqa +from .comments import get_all_repo_issues_and_prs_comments, get_all_repo_review_comments +from .commits import fetch_commit_details, fetch_commit_files, get_all_commits +from .issues import get_all_comments_of_issue, get_all_issues +from .labels import get_all_repo_labels +from .orgs import fetch_org_details, get_all_org_members from .pull_requests import ( - get_all_pull_requests, - get_all_commits_of_pull_request, get_all_comments_of_pull_request, - get_all_review_comments_of_pull_request, - get_all_reactions_of_review_comment, + get_all_commits_of_pull_request, + get_all_pull_request_files, + get_all_pull_requests, get_all_reactions_of_comment, + get_all_reactions_of_review_comment, + get_all_review_comments_of_pull_request, get_all_reviews_of_pull_request, - get_all_pull_request_files, ) -from .orgs import fetch_org_details, get_all_org_members -from .labels import get_all_repo_labels -from .comments import get_all_repo_review_comments, get_all_repo_issues_and_prs_comments +from .repos import get_all_org_repos, get_all_repo_contributors diff --git a/dags/github_api_helpers/comments.py b/dags/github_api_helpers/comments.py index dcb23358..8a07182f 100644 --- a/dags/github_api_helpers/comments.py +++ b/dags/github_api_helpers/comments.py @@ -1,4 +1,5 @@ import logging + from .smart_proxy import get @@ -45,7 +46,11 @@ def fetch_repo_review_comments_page( ) ) - logging.info(f"Found {len(updated_response_data)} comments for {owner}/{repo} on page {page}. comments: {updated_response_data}") + msg = f"Found {len(updated_response_data)}" + msg += f" review comments for {owner}/{repo} on page {page}." + msg += f" comments: {updated_response_data}" + logging.info(msg) + return updated_response_data @@ -117,7 +122,11 @@ def fetch_repo_issues_and_prs_comments_page( map(lambda x: {**x, **extract_type_from_comment_response(x)}, response_data) ) - logging.info(f"Found {len(updated_response_data)} comments for {owner}/{repo} on page {page}. comments: {updated_response_data}") + msg = f"Found {len(updated_response_data)}" + msg += f" comments for {owner}/{repo} on page {page}." + msg += f" comments: {updated_response_data}" + logging.info(msg) + return updated_response_data diff --git a/dags/github_api_helpers/commits.py b/dags/github_api_helpers/commits.py index c3133c22..d1dc487a 100644 --- a/dags/github_api_helpers/commits.py +++ b/dags/github_api_helpers/commits.py @@ -1,4 +1,5 @@ import logging + from .smart_proxy import get @@ -18,7 +19,9 @@ def fetch_commits(owner: str, repo: str, page: int, per_page: int = 100): response = get(endpoint, params=params) response_data = response.json() - logging.info(f"Found {len(response_data)} commits for {owner}/{repo} on page {page}. Commits: {response_data}") + logging.info( + f"Found {len(response_data)} commits for {owner}/{repo} on page {page}. Commits: {response_data}" + ) return response_data @@ -61,7 +64,9 @@ def fetch_commit_details(owner: str, repo: str, commit_sha: str): response = get(endpoint) response_data = response.json() - logging.info(f"Found details for commit {commit_sha} of {owner}/{repo}: {response_data}") + logging.info( + f"Found details for commit {commit_sha} of {owner}/{repo}: {response_data}" + ) return response_data @@ -77,7 +82,9 @@ def fetch_commit_files(owner: str, repo: str, sha: str): logging.info(f"Fetching files changed in commit {sha} of {owner}/{repo}...") commit_details = fetch_commit_details(owner, repo, sha) if "files" in commit_details: - logging.info(f"Found {len(commit_details['files'])} files changed in commit {sha} of {owner}/{repo}.") + logging.info( + f"Found {len(commit_details['files'])} files changed in commit {sha} of {owner}/{repo}." + ) return commit_details["files"] else: logging.info(f"No files changed in commit {sha} of {owner}/{repo}.") diff --git a/dags/github_api_helpers/issues.py b/dags/github_api_helpers/issues.py index 65c74c42..44d6f4c6 100644 --- a/dags/github_api_helpers/issues.py +++ b/dags/github_api_helpers/issues.py @@ -1,4 +1,5 @@ import logging + from .smart_proxy import get @@ -27,7 +28,9 @@ def fetch_issues(owner: str, repo: str, page: int, per_page: int = 100): issues = [issue for issue in response_data if "pull_request" not in issue] is_more_issues = len(response_data) == per_page - logging.info(f"Found {len(issues)} issues for {owner}/{repo} on page {page}. Issues: {issues}") + logging.info( + f"Found {len(issues)} issues for {owner}/{repo} on page {page}. Issues: {issues}" + ) return issues, is_more_issues @@ -78,7 +81,9 @@ def fetch_issue_comments( response = get(endpoint, params=params) response_data = response.json() - logging.info(f"Found {len(response_data)} comments for issue {issue_number} on page {page}. Comments: {response_data}") + logging.info( + f"Found {len(response_data)} comments for issue {issue_number} on page {page}. Comments: {response_data}" + ) return response_data @@ -96,11 +101,13 @@ def get_all_comments_of_issue(owner: str, repo: str, issue_number: int): current_page = 1 while True: logging.info(f"Fetching page {current_page} of comments...") - comments = fetch_pull_request_comments(owner, repo, issue_number, current_page) + comments = fetch_issue_comments(owner, repo, issue_number, current_page) if not comments: # Break the loop if no more comments are found break all_comments.extend(comments) current_page += 1 - - logging.info(f"Found a total of {len(all_comments)} comments for issue {issue_number}.") + + logging.info( + f"Found a total of {len(all_comments)} comments for issue {issue_number}." + ) return all_comments diff --git a/dags/github_api_helpers/labels.py b/dags/github_api_helpers/labels.py index f556d7a8..a903bc37 100644 --- a/dags/github_api_helpers/labels.py +++ b/dags/github_api_helpers/labels.py @@ -1,6 +1,8 @@ -from .smart_proxy import get import logging +from .smart_proxy import get + + def fetch_repo_labels_page(owner: str, repo: str, page: int, per_page: int = 100): """ Fetches the labels for a specific repository in GitHub. @@ -17,7 +19,9 @@ def fetch_repo_labels_page(owner: str, repo: str, page: int, per_page: int = 100 response = get(endpoint, params=params) response_data = response.json() - logging.info(f"Found {len(response_data)} labels for {owner}/{repo} on page {page}. Labels: {response_data}") + logging.info( + f"Found {len(response_data)} labels for {owner}/{repo} on page {page}. Labels: {response_data}" + ) return response_data diff --git a/dags/github_api_helpers/orgs.py b/dags/github_api_helpers/orgs.py index fff424a5..5bdc9874 100644 --- a/dags/github_api_helpers/orgs.py +++ b/dags/github_api_helpers/orgs.py @@ -1,4 +1,5 @@ import logging + from .smart_proxy import get @@ -34,7 +35,9 @@ def fetch_org_members_page(org: str, page: int, per_page: int = 100): response = get(endpoint, params=params) response_data = response.json() - logging.info(f"Found {len(response_data)} members for organization {org} on page {page}. Members: {response_data}") + logging.info( + f"Found {len(response_data)} members for organization {org} on page {page}. Members: {response_data}" + ) return response_data diff --git a/dags/github_api_helpers/pull_requests.py b/dags/github_api_helpers/pull_requests.py index 25c76979..8a86a4fb 100644 --- a/dags/github_api_helpers/pull_requests.py +++ b/dags/github_api_helpers/pull_requests.py @@ -1,4 +1,5 @@ import logging + from .smart_proxy import get @@ -22,7 +23,9 @@ def fetch_pull_requests(owner: str, repo: str, page: int, per_page: int = 100): response = get(endpoint, params=params) response_data = response.json() - logging.info(f"Found {len(response_data)} pull requests for {owner}/{repo} on page {page}. Pull requests: {response_data}") + logging.info( + f"Found {len(response_data)} pull requests for {owner}/{repo} on page {page}. Pull requests: {response_data}" + ) return response_data @@ -49,7 +52,9 @@ def get_all_pull_requests(owner: str, repo: str): all_pull_requests.extend(pull_requests) current_page += 1 - logging.info(f"Found a total of {len(all_pull_requests)} pull requests for {owner}/{repo}.") + logging.info( + f"Found a total of {len(all_pull_requests)} pull requests for {owner}/{repo}." + ) return all_pull_requests @@ -74,7 +79,9 @@ def fetch_pull_requests_commits( response = get(endpoint, params=params) response_data = response.json() - logging.info(f"Found {len(response_data)} commits for pull request {pull_number} on page {page}. Commits: {response_data}") + logging.info( + f"Found {len(response_data)} commits for pull request {pull_number} on page {page}. Commits: {response_data}" + ) return response_data @@ -101,7 +108,9 @@ def get_all_commits_of_pull_request(owner: str, repo: str, pull_number: int): all_commits.extend(commits) current_page += 1 - logging.info(f"Found a total of {len(all_commits)} commits for pull request {pull_number}.") + logging.info( + f"Found a total of {len(all_commits)} commits for pull request {pull_number}." + ) return all_commits @@ -125,7 +134,9 @@ def fetch_pull_request_comments( response = get(endpoint, params=params) response_data = response.json() - logging.info(f"Found {len(response_data)} comments for pull request {issue_number} on page {page}. Comments: {response_data}") + logging.info( + f"Found {len(response_data)} comments for pull request {issue_number} on page {page}. Comments: {response_data}" + ) return response_data @@ -149,7 +160,9 @@ def get_all_comments_of_pull_request(owner: str, repo: str, issue_number: int): all_comments.extend(comments) current_page += 1 - logging.info(f"Found a total of {len(all_comments)} comments for pull request {issue_number}.") + logging.info( + f"Found a total of {len(all_comments)} comments for pull request {issue_number}." + ) return all_comments @@ -173,7 +186,11 @@ def fetch_pull_request_review_comments( response = get(endpoint, params=params) response_data = response.json() - logging.info(f"Found {len(response_data)} review comments for pull request {pull_number} on page {page}. Comments: {response_data}") + msg = f"Found {len(response_data)} review comments" + msg += f"for pull request {pull_number} on page {page}." + msg += f"Comments: {response_data}" + logging.info(msg) + return response_data @@ -198,8 +215,10 @@ def get_all_review_comments_of_pull_request(owner: str, repo: str, pull_number: break all_comments.extend(comments) current_page += 1 - - logging.info(f"Found a total of {len(all_comments)} review comments for pull request {pull_number}.") + + logging.info( + f"Found a total of {len(all_comments)} review comments for pull request {pull_number}." + ) return all_comments @@ -221,7 +240,11 @@ def fetch_review_comment_reactions( response = get(endpoint, params=params) response_data = response.json() - logging.info(f"Found {len(response_data)} reactions for review comment {comment_id} on page {page}. Reactions: {response_data}") + msg = f"Found {len(response_data)} reactions" + msg += f"for review comment {comment_id} on page {page}." + msg += f"Reactions: {response_data}" + logging.info(msg) + return response_data @@ -245,7 +268,9 @@ def get_all_reactions_of_review_comment(owner: str, repo: str, comment_id: int): all_reactions.extend(reactions) current_page += 1 - logging.info(f"Found a total of {len(all_reactions)} reactions for review comment {comment_id}.") + logging.info( + f"Found a total of {len(all_reactions)} reactions for review comment {comment_id}." + ) return all_reactions @@ -263,14 +288,13 @@ def fetch_comment_reactions( :return: A list of reactions for the specified issue comment. """ endpoint = f"https://api.github.com/repos/{owner}/{repo}/issues/comments/{comment_id}/reactions" - headers = { - "Accept": "application/vnd.github.squirrel-girl-preview+json" - } # Custom media type is required params = {"page": page, "per_page": per_page} - response = get(endpoint, headers=headers, params=params) + response = get(endpoint, params=params) response_data = response.json() - logging.info(f"Found {len(response_data)} reactions for comment {comment_id} on page {page}. Reactions: {response_data}") + logging.info( + f"Found {len(response_data)} reactions for comment {comment_id} on page {page}. Reactions: {response_data}" + ) return response_data @@ -293,8 +317,10 @@ def get_all_reactions_of_comment(owner: str, repo: str, comment_id: int): break all_reactions.extend(reactions) current_page += 1 - - logging.info(f"Found a total of {len(all_reactions)} reactions for comment {comment_id}.") + + logging.info( + f"Found a total of {len(all_reactions)} reactions for comment {comment_id}." + ) return all_reactions @@ -318,7 +344,9 @@ def fetch_pull_request_reviews( response = get(endpoint, params=params) response_data = response.json() - logging.info(f"Found {len(response_data)} reviews for pull request {pull_number} on page {page}. Reviews: {response_data}") + logging.info( + f"Found {len(response_data)} reviews for pull request {pull_number} on page {page}. Reviews: {response_data}" + ) return response_data @@ -341,8 +369,10 @@ def get_all_reviews_of_pull_request(owner: str, repo: str, pull_number: int): break all_reviews.extend(reviews) current_page += 1 - - logging.info(f"Found a total of {len(all_reviews)} reviews for pull request {pull_number}.") + + logging.info( + f"Found a total of {len(all_reviews)} reviews for pull request {pull_number}." + ) return all_reviews @@ -365,7 +395,9 @@ def fetch_pull_request_files_page( response = get(endpoint, params=params) response_data = response.json() - logging.info(f"Found {len(response_data)} files for pull request {pull_number} on page {page}. Files: {response_data}") + logging.info( + f"Found {len(response_data)} files for pull request {pull_number} on page {page}. Files: {response_data}" + ) return response_data diff --git a/dags/github_api_helpers/repos.py b/dags/github_api_helpers/repos.py index 0f075b89..781b0d49 100644 --- a/dags/github_api_helpers/repos.py +++ b/dags/github_api_helpers/repos.py @@ -1,4 +1,5 @@ import logging + from .smart_proxy import get @@ -17,7 +18,9 @@ def fetch_org_repos_page(org_name: str, page: int, per_page: int = 100): response = get(endpoint, params=params) response_data = response.json() - logging.info(f"Found {len(response_data)} repos for organization {org_name} on page {page}. Repos: {response_data}") + logging.info( + f"Found {len(response_data)} repos for organization {org_name} on page {page}. Repos: {response_data}" + ) return response_data @@ -42,7 +45,9 @@ def get_all_org_repos(org_name: str): all_repos.extend(repos) current_page += 1 - logging.info(f"Found a total of {len(all_repos)} repos for organization {org_name}.") + logging.info( + f"Found a total of {len(all_repos)} repos for organization {org_name}." + ) return all_repos @@ -62,7 +67,9 @@ def fetch_repo_contributors_page(owner: str, repo: str, page: int, per_page: int response = get(endpoint, params=params) response_data = response.json() - logging.info(f"Found {len(response_data)} contributors for {owner}/{repo} on page {page}. Contributors: {response_data}") + logging.info( + f"Found {len(response_data)} contributors for {owner}/{repo} on page {page}. Contributors: {response_data}" + ) return response_data @@ -88,5 +95,7 @@ def get_all_repo_contributors(owner: str, repo: str): all_contributors.extend(contributors) current_page += 1 - logging.info(f"Found a total of {len(all_contributors)} contributors for {owner}/{repo}.") + logging.info( + f"Found a total of {len(all_contributors)} contributors for {owner}/{repo}." + ) return all_contributors diff --git a/dags/github_api_helpers/smart_proxy.py b/dags/github_api_helpers/smart_proxy.py index d6f8efd6..e1d2804d 100644 --- a/dags/github_api_helpers/smart_proxy.py +++ b/dags/github_api_helpers/smart_proxy.py @@ -1,7 +1,8 @@ import logging -import requests import random +import requests + def get(url: str, params=None): """ @@ -10,7 +11,7 @@ def get(url: str, params=None): :param url: URL for the new :class:`Request` object. :param params: (optional) Dictionary, list of tuples or bytes to send in the query string for the :class:`Request`. - :param \*\*kwargs: Optional arguments that ``request`` takes. + :param **kwargs: Optional arguments that ``request`` takes. :return: :class:`Response ` object :rtype: requests.Response """ diff --git a/dags/github_old_version.py b/dags/github_old_version.py deleted file mode 100644 index 1770656f..00000000 --- a/dags/github_old_version.py +++ /dev/null @@ -1,177 +0,0 @@ -from airflow import DAG -from airflow.operators.python_operator import PythonOperator -from datetime import datetime, timedelta -import requests - - -default_args = { - "owner": "MohammadTwin", - "start_date": datetime(2023, 11, 8), - "retries": 1, - "retry_delay": timedelta(minutes=1), -} - -dag = DAG( - "github_old_version", - default_args=default_args, - description="GitHub Data Extraction DAG", - schedule_interval=None, - catchup=False, -) - - -def get_github_repos(ti): - endpoint = "https://api.github.com/orgs/TogetherCrew/repos" - - response = requests.get(endpoint) - response_data = response.json() - - print("[response_data] ", response_data) - ti.xcom_push(key="github_repos", value=response_data) - - -get_repos_task = PythonOperator( - task_id="get_github_repos", - python_callable=get_github_repos, - provide_context=True, - dag=dag, -) - - -def get_pull_requests(owner: str, repo: str): - endpoint = f"https://api.github.com/repos/{owner}/{repo}/pulls" - - params = {"per_page": 100, "page": 1, "state": "all"} - response = requests.get(endpoint, params=params) - response_data = response.json() - - return response_data - - -def extract_pull_requests(ti): - prs_data = {} - - github_repos = ti.xcom_pull(key="github_repos", task_ids="get_github_repos") - for repo in github_repos: - prs = get_pull_requests(owner=repo["owner"]["login"], repo=repo["name"]) - prs_data[repo["id"]] = prs - - ti.xcom_push(key="github_prs", value=github_repos) - return prs_data - - -def transform_pull_requests(ti): - return None - - -def load_pull_requests(ti): - print("Loaded PR data into the destination:") - - -task_extract_pull_requests = PythonOperator( - task_id="extract_pull_requests", - python_callable=extract_pull_requests, - provide_context=True, - dag=dag, -) - -task_transform_pull_requests = PythonOperator( - task_id="transform_pull_requests", - python_callable=transform_pull_requests, - provide_context=True, - dag=dag, -) - -task_load_pull_requests = PythonOperator( - task_id="load_pull_requests", - python_callable=load_pull_requests, - provide_context=True, - dag=dag, -) - -( - get_repos_task - >> task_extract_pull_requests - >> task_transform_pull_requests - >> task_load_pull_requests -) - - -def extract_commits(ti): - github_repos = ti.xcom_pull(key="github_repos", task_ids="get_github_repos") - for repo in github_repos: - print("\n[repo] ", repo) - - return None - - -def transform_commits(ti): - return None - - -def load_commits(ti): - print("Loaded Commit data into the destination:") - - -task_extract_commits = PythonOperator( - task_id="extract_commits", - python_callable=extract_commits, - provide_context=True, - dag=dag, -) - -task_transform_commits = PythonOperator( - task_id="transform_commits", - python_callable=transform_commits, - provide_context=True, - dag=dag, -) - -task_load_commits = PythonOperator( - task_id="load_commits", - python_callable=load_commits, - provide_context=True, - dag=dag, -) - -get_repos_task >> task_extract_commits >> task_transform_commits >> task_load_commits - - -def extract_issues(ti): - github_repos = ti.xcom_pull(key="github_repos", task_ids="get_github_repos") - for repo in github_repos: - print("\n[repo] ", repo) - - return None - - -def transform_issues(ti): - return None - - -def load_issues(ti): - print("Loaded issues data into the destination:") - - -task_extract_issues = PythonOperator( - task_id="extract_issues", - python_callable=extract_issues, - provide_context=True, - dag=dag, -) - -task_transform_issues = PythonOperator( - task_id="transform_issues", - python_callable=transform_issues, - provide_context=True, - dag=dag, -) - -task_load_issues = PythonOperator( - task_id="load_issues", - python_callable=load_issues, - provide_context=True, - dag=dag, -) - -get_repos_task >> task_extract_issues >> task_transform_issues >> task_load_issues diff --git a/dags/neo4j_storage/__init__.py b/dags/neo4j_storage/__init__.py index 0a22ad3c..64c5cb55 100644 --- a/dags/neo4j_storage/__init__.py +++ b/dags/neo4j_storage/__init__.py @@ -1,15 +1,16 @@ +# flake8: noqa +from .comments import save_comment_to_neo4j, save_review_comment_to_neo4j +from .commits import save_commit_files_changes_to_neo4j, save_commit_to_neo4j +from .issues import save_issue_to_neo4j +from .labels import save_label_to_neo4j from .orgs import ( - save_orgs_to_neo4j, - save_org_member_to_neo4j, get_orgs_profile_from_neo4j, + save_org_member_to_neo4j, + save_orgs_to_neo4j, ) -from .repos import save_repo_to_neo4j, save_repo_contributors_to_neo4j from .pull_requests import ( + save_pr_files_changes_to_neo4j, save_pull_request_to_neo4j, save_review_to_neo4j, - save_pr_files_changes_to_neo4j, ) -from .issues import save_issue_to_neo4j -from .labels import save_label_to_neo4j -from .commits import save_commit_to_neo4j, save_commit_files_changes_to_neo4j -from .comments import save_review_comment_to_neo4j, save_comment_to_neo4j +from .repos import save_repo_contributors_to_neo4j, save_repo_to_neo4j diff --git a/dags/neo4j_storage/commits.py b/dags/neo4j_storage/commits.py index df11dfe7..354c4fc3 100644 --- a/dags/neo4j_storage/commits.py +++ b/dags/neo4j_storage/commits.py @@ -50,7 +50,8 @@ def save_commit_files_changes_to_neo4j( session.execute_write( lambda tx: tx.run( f""" - MATCH (repo:{Node.Repository.value} {{id: $repository_id}}), (c:{Node.Commit.value} {{sha: $commit_sha}}) + MATCH (repo:{Node.Repository.value} {{id: $repository_id}}), + (c:{Node.Commit.value} {{sha: $commit_sha}}) WITH repo, c UNWIND $file_changes AS file_change MERGE (f:{Node.File.value} {{sha: file_change.sha, filename: file_change.filename}}) diff --git a/dags/neo4j_storage/neo4j_connection.py b/dags/neo4j_storage/neo4j_connection.py index 5f699127..6e32e381 100644 --- a/dags/neo4j_storage/neo4j_connection.py +++ b/dags/neo4j_storage/neo4j_connection.py @@ -1,7 +1,7 @@ import os from dotenv import load_dotenv -from neo4j import GraphDatabase, Driver +from neo4j import Driver, GraphDatabase class Neo4jConnection: diff --git a/dags/neo4j_storage/neo4j_enums.py b/dags/neo4j_storage/neo4j_enums.py index 8b83f80b..1091bc88 100644 --- a/dags/neo4j_storage/neo4j_enums.py +++ b/dags/neo4j_storage/neo4j_enums.py @@ -2,7 +2,8 @@ class Node(Enum): - OrganizationProfile = "OrganizationProfile" # This node is created by the API, and we receive a list of organizations detail to extract data from + # This node is created by the API, and we receive a list of organizations detail to extract data from + OrganizationProfile = "OrganizationProfile" GitHubOrganization = "GitHubOrganization" GitHubUser = "GitHubUser" PullRequest = "PullRequest" diff --git a/dags/neo4j_storage/orgs.py b/dags/neo4j_storage/orgs.py index 5d95e154..1b8ba1f5 100644 --- a/dags/neo4j_storage/orgs.py +++ b/dags/neo4j_storage/orgs.py @@ -1,6 +1,7 @@ +from neo4j.time import DateTime as Neo4jDateTime + from .neo4j_connection import Neo4jConnection from .neo4j_enums import Node, Relationship -from neo4j.time import DateTime as Neo4jDateTime def get_orgs_profile_from_neo4j(): diff --git a/dags/neo4j_storage/pull_requests.py b/dags/neo4j_storage/pull_requests.py index 58db5200..86492b4a 100644 --- a/dags/neo4j_storage/pull_requests.py +++ b/dags/neo4j_storage/pull_requests.py @@ -62,7 +62,7 @@ def save_pull_request_to_neo4j(pr: dict, repository_id: str): f""" MERGE (pr:{Node.PullRequest.value} {{id: $pr.id}}) SET pr += $pr, pr.repository_id = $repository_id, pr.latestSavedAt = datetime() - + WITH pr MERGE (ghu:{Node.GitHubUser.value} {{id: $repo_creator.id}}) SET ghu += $repo_creator, ghu.latestSavedAt = datetime() @@ -88,7 +88,7 @@ def save_pull_request_to_neo4j(pr: dict, repository_id: str): driver.close() -def save_review_to_neo4j(pr_id: dict, review: dict): +def save_review_to_neo4j(pr_id: int, review: dict): neo4jConnection = Neo4jConnection() driver = neo4jConnection.connect_neo4j() @@ -129,7 +129,8 @@ def save_pr_files_changes_to_neo4j(pr_id: int, repository_id: str, file_changes: session.execute_write( lambda tx: tx.run( f""" - MATCH (repo:{Node.Repository.value} {{id: $repository_id}}), (pr:{Node.PullRequest.value} {{id: $pr_id}}) + MATCH (repo:{Node.Repository.value} {{id: $repository_id}}), + (pr:{Node.PullRequest.value} {{id: $pr_id}}) WITH repo, pr UNWIND $file_changes AS file_change MERGE (f:{Node.File.value} {{sha: file_change.sha, filename: file_change.filename}}) diff --git a/dags/some_info.txt b/dags/some_info.txt deleted file mode 100644 index a7e44177..00000000 --- a/dags/some_info.txt +++ /dev/null @@ -1,3 +0,0 @@ -- API for getting members of organizations seems doesn't work correctly, since returns very low members -- Some commits do not have a commiter (commiter: None), which seems odd -- We have a request_reviewer field in the PR, which represents a reviewer who has not yet reviewed the PR. Once the requested reviewer submits their review, they are no longer considered a requested reviewer \ No newline at end of file diff --git a/dags/tests/units/__init__.py b/dags/tests/units/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/dags/tests/units/test_labels_storage.py b/dags/tests/units/test_labels_storage.py new file mode 100644 index 00000000..922cce36 --- /dev/null +++ b/dags/tests/units/test_labels_storage.py @@ -0,0 +1,16 @@ +import unittest + +from neo4j_storage import save_label_to_neo4j + + +class TestSaveLabelToNeo4j(unittest.TestCase): + def test_save_label_to_neo4j(self): + # Define a sample label + sample_label = {"id": "123", "name": "SampleLabel"} + + # Call the function with the sample label + response = save_label_to_neo4j(sample_label) + + self.assertIsNone( + response, "The response of save_label_to_neo4j should be None" + ) diff --git a/docker-compose.test.yml b/docker-compose.test.yml new file mode 100644 index 00000000..0c917f1b --- /dev/null +++ b/docker-compose.test.yml @@ -0,0 +1,37 @@ +version: "3.9" + +services: + app: + build: + context: . + target: test + dockerfile: Dockerfile + environment: + - PORT=3000 + - MONGODB_HOST=mongo + - MONGODB_PORT=27017 + - MONGODB_USER=root + - MONGODB_PASS=pass + - NEO4J_PROTOCOL=bolt + - NEO4J_HOST=neo4j + - NEO4J_PORT=7687 + - NEO4J_USER=neo4j + - NEO4J_PASSWORD=password + - NEO4J_DB=neo4j + volumes: + - ./coverage:/project/coverage + depends_on: + neo4j: + condition: service_healthy + neo4j: + image: "neo4j:5.9.0" + environment: + - NEO4J_AUTH=neo4j/password + - NEO4J_PLUGINS=["apoc", "graph-data-science"] + - NEO4J_dbms_security_procedures_unrestricted=apoc.*,gds.* + healthcheck: + test: ["CMD" ,"wget", "http://localhost:7474"] + interval: 1m30s + timeout: 10s + retries: 2 + start_period: 40s diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh new file mode 100644 index 00000000..72eec50b --- /dev/null +++ b/docker-entrypoint.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +echo "chang dir to dags" +cd dags || exit + +python3 -m coverage run --omit=tests/* -m pytest tests + +cp .coverage ../.coverage +cd .. || exit +python3 -m coverage lcov -i -o coverage/lcov.info \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 00966dc1..fac2f671 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,4 @@ -neo4j \ No newline at end of file +neo4j==5.14.1 +coverage==7.3.3 +pytest==7.4.3 +python-dotenv==1.0.0 \ No newline at end of file