From f28f706fa6b2f1796f2eb220bcdd57fa6f70a7cb Mon Sep 17 00:00:00 2001 From: Mohammad Twin Date: Thu, 14 Dec 2023 16:52:32 +0400 Subject: [PATCH 01/26] feat: basic CI/CD were added --- .github/workflows/production.yml | 12 ++++++++++++ .github/workflows/start.staging.yml | 9 +++++++++ 2 files changed, 21 insertions(+) create mode 100644 .github/workflows/production.yml create mode 100644 .github/workflows/start.staging.yml diff --git a/.github/workflows/production.yml b/.github/workflows/production.yml new file mode 100644 index 00000000..a1be27ba --- /dev/null +++ b/.github/workflows/production.yml @@ -0,0 +1,12 @@ +name: Production CI/CD Pipeline + +on: + push: + branches: + - main + +jobs: + ci: + uses: TogetherCrew/operations/.github/workflows/ci.yml@main + secrets: + CC_TEST_REPORTER_ID: ${{ secrets.CC_TEST_REPORTER_ID }} \ No newline at end of file diff --git a/.github/workflows/start.staging.yml b/.github/workflows/start.staging.yml new file mode 100644 index 00000000..842e3bda --- /dev/null +++ b/.github/workflows/start.staging.yml @@ -0,0 +1,9 @@ +name: Staging CI/CD Pipeline + +on: pull_request + +jobs: + ci: + uses: TogetherCrew/operations/.github/workflows/ci.yml@main + secrets: + CC_TEST_REPORTER_ID: ${{ secrets.CC_TEST_REPORTER_ID }} \ No newline at end of file From 99137eb0042a418b656211a9b9de60ceb9454949 Mon Sep 17 00:00:00 2001 From: Mohammad Twin Date: Thu, 14 Dec 2023 18:54:01 +0400 Subject: [PATCH 02/26] feat: docker-compose.test.yml file was added --- docker-compose.test.yml | 326 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 326 insertions(+) create mode 100644 docker-compose.test.yml diff --git a/docker-compose.test.yml b/docker-compose.test.yml new file mode 100644 index 00000000..a15c5f02 --- /dev/null +++ b/docker-compose.test.yml @@ -0,0 +1,326 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# Basic Airflow cluster configuration for CeleryExecutor with Redis and PostgreSQL. +# +# WARNING: This configuration is for local development. Do not use it in a production deployment. +# +# This configuration supports basic configuration using environment variables or an .env file +# The following variables are supported: +# +# AIRFLOW_IMAGE_NAME - Docker image name used to run Airflow. +# Default: apache/airflow:2.7.3 +# AIRFLOW_UID - User ID in Airflow containers +# Default: 50000 +# AIRFLOW_PROJ_DIR - Base path to which all the files will be volumed. +# Default: . +# Those configurations are useful mostly in case of standalone testing/running Airflow in test/try-out mode +# +# _AIRFLOW_WWW_USER_USERNAME - Username for the administrator account (if requested). +# Default: airflow +# _AIRFLOW_WWW_USER_PASSWORD - Password for the administrator account (if requested). +# Default: airflow +# _PIP_ADDITIONAL_REQUIREMENTS - Additional PIP requirements to add when starting all containers. +# Use this option ONLY for quick checks. Installing requirements at container +# startup is done EVERY TIME the service is started. +# A better way is to build a custom image or extend the official image +# as described in https://airflow.apache.org/docs/docker-stack/build.html. +# Default: '' +# +# Feel free to modify this file to suit your needs. +--- +version: '3.8' +x-airflow-common: + &airflow-common + # In order to add custom dependencies or upgrade provider packages you can use your extended image. + # Comment the image line, place your Dockerfile in the directory where you placed the docker-compose.yaml + # and uncomment the "build" line below, Then run `docker-compose build` to build the images. + image: ${AIRFLOW_IMAGE_NAME:-apache/airflow:2.7.3} + # build: . + command: > + /bin/bash -c " + python3 -m coverage run --omit=tests/* -m pytest tests && + python3 -m coverage lcov -i -o coverage/lcov.info + " + environment: + &airflow-common-env + AIRFLOW__CORE__EXECUTOR: CeleryExecutor + AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow + # For backward compatibility, with Airflow <2.3 + AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow + AIRFLOW__CELERY__RESULT_BACKEND: db+postgresql://airflow:airflow@postgres/airflow + AIRFLOW__CELERY__BROKER_URL: redis://:@redis:6379/0 + AIRFLOW__CORE__FERNET_KEY: '' + AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: 'true' + AIRFLOW__CORE__LOAD_EXAMPLES: 'true' + AIRFLOW__API__AUTH_BACKENDS: 'airflow.api.auth.backend.basic_auth,airflow.api.auth.backend.session' + # yamllint disable rule:line-length + # Use simple http server on scheduler for health checks + # See https://airflow.apache.org/docs/apache-airflow/stable/administration-and-deployment/logging-monitoring/check-health.html#scheduler-health-check-server + # yamllint enable rule:line-length + AIRFLOW__SCHEDULER__ENABLE_HEALTH_CHECK: 'true' + # WARNING: Use _PIP_ADDITIONAL_REQUIREMENTS option ONLY for a quick checks + # for other purpose (development, test and especially production usage) build/extend Airflow image. + # _PIP_ADDITIONAL_REQUIREMENTS: ${_PIP_ADDITIONAL_REQUIREMENTS:-} + _PIP_ADDITIONAL_REQUIREMENTS: neo4j + NEO4J_PROTOCOL: bolt + NEO4J_HOST: neo4j + NEO4J_PORT: 7687 + NEO4J_USER: neo4j + NEO4J_PASSWORD: neo4j123456 + volumes: + - ${AIRFLOW_PROJ_DIR:-.}/dags:/opt/airflow/dags + - ${AIRFLOW_PROJ_DIR:-.}/logs:/opt/airflow/logs + - ${AIRFLOW_PROJ_DIR:-.}/config:/opt/airflow/config + - ${AIRFLOW_PROJ_DIR:-.}/plugins:/opt/airflow/plugins + user: "${AIRFLOW_UID:-50000}:0" + depends_on: + &airflow-common-depends-on + redis: + condition: service_healthy + postgres: + condition: service_healthy + neo4j: + condition: service_healthy + +services: + postgres: + image: postgres:13 + environment: + POSTGRES_USER: airflow + POSTGRES_PASSWORD: airflow + POSTGRES_DB: airflow + volumes: + - postgres-db-volume:/var/lib/postgresql/data + healthcheck: + test: ["CMD", "pg_isready", "-U", "airflow"] + interval: 10s + retries: 5 + start_period: 5s + restart: always + + redis: + image: redis:latest + expose: + - 6379 + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 10s + timeout: 30s + retries: 50 + start_period: 30s + restart: always + + neo4j: + image: "neo4j:5.9.0" + environment: + - NEO4J_AUTH=neo4j/neo4j123456 + - NEO4J_PLUGINS=["apoc", "graph-data-science"] + - NEO4J_dbms_security_procedures_unrestricted=apoc.*,gds.* + healthcheck: + test: ["CMD" ,"wget", "http://localhost:7474"] + interval: 1m30s + timeout: 10s + retries: 2 + start_period: 40s + + airflow-webserver: + <<: *airflow-common + command: webserver + ports: + - "8080:8080" + healthcheck: + test: ["CMD", "curl", "--fail", "http://localhost:8080/health"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 30s + restart: always + depends_on: + <<: *airflow-common-depends-on + airflow-init: + condition: service_completed_successfully + + airflow-scheduler: + <<: *airflow-common + command: scheduler + healthcheck: + test: ["CMD", "curl", "--fail", "http://localhost:8974/health"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 30s + restart: always + depends_on: + <<: *airflow-common-depends-on + airflow-init: + condition: service_completed_successfully + + airflow-worker: + <<: *airflow-common + command: celery worker + healthcheck: + # yamllint disable rule:line-length + test: + - "CMD-SHELL" + - 'celery --app airflow.providers.celery.executors.celery_executor.app inspect ping -d "celery@$${HOSTNAME}" || celery --app airflow.executors.celery_executor.app inspect ping -d "celery@$${HOSTNAME}"' + interval: 30s + timeout: 10s + retries: 5 + start_period: 30s + environment: + <<: *airflow-common-env + # Required to handle warm shutdown of the celery workers properly + # See https://airflow.apache.org/docs/docker-stack/entrypoint.html#signal-propagation + DUMB_INIT_SETSID: "0" + restart: always + depends_on: + <<: *airflow-common-depends-on + airflow-init: + condition: service_completed_successfully + + airflow-triggerer: + <<: *airflow-common + command: triggerer + healthcheck: + test: ["CMD-SHELL", 'airflow jobs check --job-type TriggererJob --hostname "$${HOSTNAME}"'] + interval: 30s + timeout: 10s + retries: 5 + start_period: 30s + restart: always + depends_on: + <<: *airflow-common-depends-on + airflow-init: + condition: service_completed_successfully + + airflow-init: + <<: *airflow-common + entrypoint: /bin/bash + # yamllint disable rule:line-length + command: + - -c + - | + function ver() { + printf "%04d%04d%04d%04d" $${1//./ } + } + airflow_version=$$(AIRFLOW__LOGGING__LOGGING_LEVEL=INFO && gosu airflow airflow version) + airflow_version_comparable=$$(ver $${airflow_version}) + min_airflow_version=2.2.0 + min_airflow_version_comparable=$$(ver $${min_airflow_version}) + if (( airflow_version_comparable < min_airflow_version_comparable )); then + echo + echo -e "\033[1;31mERROR!!!: Too old Airflow version $${airflow_version}!\e[0m" + echo "The minimum Airflow version supported: $${min_airflow_version}. Only use this or higher!" + echo + exit 1 + fi + if [[ -z "${AIRFLOW_UID}" ]]; then + echo + echo -e "\033[1;33mWARNING!!!: AIRFLOW_UID not set!\e[0m" + echo "If you are on Linux, you SHOULD follow the instructions below to set " + echo "AIRFLOW_UID environment variable, otherwise files will be owned by root." + echo "For other operating systems you can get rid of the warning with manually created .env file:" + echo " See: https://airflow.apache.org/docs/apache-airflow/stable/howto/docker-compose/index.html#setting-the-right-airflow-user" + echo + fi + one_meg=1048576 + mem_available=$$(($$(getconf _PHYS_PAGES) * $$(getconf PAGE_SIZE) / one_meg)) + cpus_available=$$(grep -cE 'cpu[0-9]+' /proc/stat) + disk_available=$$(df / | tail -1 | awk '{print $$4}') + warning_resources="false" + if (( mem_available < 4000 )) ; then + echo + echo -e "\033[1;33mWARNING!!!: Not enough memory available for Docker.\e[0m" + echo "At least 4GB of memory required. You have $$(numfmt --to iec $$((mem_available * one_meg)))" + echo + warning_resources="true" + fi + if (( cpus_available < 2 )); then + echo + echo -e "\033[1;33mWARNING!!!: Not enough CPUS available for Docker.\e[0m" + echo "At least 2 CPUs recommended. You have $${cpus_available}" + echo + warning_resources="true" + fi + if (( disk_available < one_meg * 10 )); then + echo + echo -e "\033[1;33mWARNING!!!: Not enough Disk space available for Docker.\e[0m" + echo "At least 10 GBs recommended. You have $$(numfmt --to iec $$((disk_available * 1024 )))" + echo + warning_resources="true" + fi + if [[ $${warning_resources} == "true" ]]; then + echo + echo -e "\033[1;33mWARNING!!!: You have not enough resources to run Airflow (see above)!\e[0m" + echo "Please follow the instructions to increase amount of resources available:" + echo " https://airflow.apache.org/docs/apache-airflow/stable/howto/docker-compose/index.html#before-you-begin" + echo + fi + mkdir -p /sources/logs /sources/dags /sources/plugins + chown -R "${AIRFLOW_UID}:0" /sources/{logs,dags,plugins} + exec /entrypoint airflow version + # yamllint enable rule:line-length + environment: + <<: *airflow-common-env + _AIRFLOW_DB_MIGRATE: 'true' + _AIRFLOW_WWW_USER_CREATE: 'true' + _AIRFLOW_WWW_USER_USERNAME: ${_AIRFLOW_WWW_USER_USERNAME:-airflow} + _AIRFLOW_WWW_USER_PASSWORD: ${_AIRFLOW_WWW_USER_PASSWORD:-airflow} + _PIP_ADDITIONAL_REQUIREMENTS: '' + user: "0:0" + volumes: + - ${AIRFLOW_PROJ_DIR:-.}:/sources + + airflow-cli: + <<: *airflow-common + profiles: + - debug + environment: + <<: *airflow-common-env + CONNECTION_CHECK_MAX_COUNT: "0" + # Workaround for entrypoint issue. See: https://github.com/apache/airflow/issues/16252 + command: + - bash + - -c + - airflow + + # You can enable flower by adding "--profile flower" option e.g. docker-compose --profile flower up + # or by explicitly targeted on the command line e.g. docker-compose up flower. + # See: https://docs.docker.com/compose/profiles/ + flower: + <<: *airflow-common + command: celery flower + profiles: + - flower + ports: + - "5555:5555" + healthcheck: + test: ["CMD", "curl", "--fail", "http://localhost:5555/"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 30s + restart: always + depends_on: + <<: *airflow-common-depends-on + airflow-init: + condition: service_completed_successfully + +volumes: + postgres-db-volume: From c174a10a4931e7e7f79c3336f6a965567fee9e9c Mon Sep 17 00:00:00 2001 From: Mohammad Twin Date: Fri, 15 Dec 2023 12:04:30 +0400 Subject: [PATCH 03/26] feat: two unneeded files were removed --- dags/businees-requremensts.txt | 40 ---------------------------------- dags/some_info.txt | 3 --- 2 files changed, 43 deletions(-) delete mode 100644 dags/businees-requremensts.txt delete mode 100644 dags/some_info.txt diff --git a/dags/businees-requremensts.txt b/dags/businees-requremensts.txt deleted file mode 100644 index 70fc4d76..00000000 --- a/dags/businees-requremensts.txt +++ /dev/null @@ -1,40 +0,0 @@ -1. PRs - 1. Title - OK - "title" field - 2. Description - OK - "title" field - 3. Author - OK - "user" field - 4. Date - OK - "created_at" field - 5. Commits (see below) ######## - OK - "_links.commits" - 6. Linked issues (see below) ######## - - 7. Comments (with author, date, reactions, and mentions) - -> 2ways -> 1. get all of the comments repository "_links.comments", 2. get all of the comments's PR - - "user" field - - "created_at" field - - "reactions" info - - "reactions.url" user reactions - 8. review_comments - - "user" field - - "created_at" field - - "reactions" info - - "reactions.url" user reactions - 9. status (open, in review, closed) - OK - "state" field - -2. Commits - 2. Description - "commit.message" field - 3. Author - OK - "author" field - 4. Date - OK - "commit.committer.date" field - 5. Scripts that were changed - OK - /repos/:owner/:repo/commits/:commit_sha - -3. Issues - 1. Title - OK - "title" field - 2. Description - OK - "body" field - 3. Author - OK - "user" field - 4. Date - OK - "created_at" field - 5. Assignees - OK - "assignees" field - 6. Labels - OK - "labels" field - 7. Projects - 8. Comments (with author, date, reactions and mentions) - -> 2ways -> 1. get all of the comments repository, 2. get all of the comments's PR - - "user" field - - "created_at" field - - "reactions" info - - "reactions.url" user reactions diff --git a/dags/some_info.txt b/dags/some_info.txt deleted file mode 100644 index a7e44177..00000000 --- a/dags/some_info.txt +++ /dev/null @@ -1,3 +0,0 @@ -- API for getting members of organizations seems doesn't work correctly, since returns very low members -- Some commits do not have a commiter (commiter: None), which seems odd -- We have a request_reviewer field in the PR, which represents a reviewer who has not yet reviewed the PR. Once the requested reviewer submits their review, they are no longer considered a requested reviewer \ No newline at end of file From 4abba635eaa5652e4bb910eb2152491c78a5643f Mon Sep 17 00:00:00 2001 From: Mohammad Twin Date: Fri, 15 Dec 2023 12:06:43 +0400 Subject: [PATCH 04/26] feat: unneeded hear was removed --- dags/github_api_helpers/pull_requests.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/dags/github_api_helpers/pull_requests.py b/dags/github_api_helpers/pull_requests.py index 94820416..f55aa6a6 100644 --- a/dags/github_api_helpers/pull_requests.py +++ b/dags/github_api_helpers/pull_requests.py @@ -233,11 +233,8 @@ def fetch_comment_reactions( :return: A list of reactions for the specified issue comment. """ endpoint = f"https://api.github.com/repos/{owner}/{repo}/issues/comments/{comment_id}/reactions" - headers = { - "Accept": "application/vnd.github.squirrel-girl-preview+json" - } # Custom media type is required params = {"page": page, "per_page": per_page} - response = get(endpoint, headers=headers, params=params) + response = get(endpoint, params=params) return response.json() From e08e266292c28a7f6cd592531560db0e7fe112c8 Mon Sep 17 00:00:00 2001 From: Mohammad Twin Date: Fri, 15 Dec 2023 12:34:32 +0400 Subject: [PATCH 05/26] feat: align code with Linter --- Readme.md | 2 +- dags/github.py | 37 +++++++++++++------------- dags/github_api_helpers/__init__.py | 22 +++++++-------- dags/github_api_helpers/commits.py | 1 - dags/github_api_helpers/issues.py | 3 +-- dags/github_api_helpers/orgs.py | 1 - dags/github_api_helpers/smart_proxy.py | 4 +-- dags/github_old_version.py | 6 ++--- dags/neo4j_storage/__init__.py | 16 +++++------ dags/neo4j_storage/commits.py | 3 ++- dags/neo4j_storage/neo4j_connection.py | 2 +- dags/neo4j_storage/neo4j_enums.py | 3 ++- dags/neo4j_storage/orgs.py | 3 ++- 13 files changed, 51 insertions(+), 52 deletions(-) diff --git a/Readme.md b/Readme.md index c51ca256..00b1be0b 100644 --- a/Readme.md +++ b/Readme.md @@ -15,4 +15,4 @@ The code can be linted by using the below command ```bash python -m black . -``` +``` \ No newline at end of file diff --git a/dags/github.py b/dags/github.py index 1667c186..fea2a9ae 100644 --- a/dags/github.py +++ b/dags/github.py @@ -17,42 +17,41 @@ # under the License. """Example DAG demonstrating the usage of dynamic task mapping.""" from __future__ import annotations + from datetime import datetime, timedelta from airflow import DAG from airflow.decorators import task - from github_api_helpers import ( - get_all_repo_issues_and_prs_comments, - get_all_reviews_of_pull_request, - get_all_repo_review_comments, - get_all_repo_contributors, - get_all_pull_request_files, - get_all_pull_requests, - get_all_org_members, - get_all_repo_labels, - get_all_org_repos, - get_all_org_repos, - fetch_org_details, fetch_commit_files, + fetch_org_details, get_all_commits, get_all_issues, + get_all_org_members, + get_all_org_repos, + get_all_pull_request_files, + get_all_pull_requests, + get_all_repo_contributors, + get_all_repo_issues_and_prs_comments, + get_all_repo_labels, + get_all_repo_review_comments, + get_all_reviews_of_pull_request, ) from neo4j_storage import ( - save_commit_files_changes_to_neo4j, - save_repo_contributors_to_neo4j, - save_pr_files_changes_to_neo4j, - save_review_comment_to_neo4j, get_orgs_profile_from_neo4j, - save_pull_request_to_neo4j, - save_org_member_to_neo4j, save_comment_to_neo4j, + save_commit_files_changes_to_neo4j, save_commit_to_neo4j, - save_review_to_neo4j, save_issue_to_neo4j, save_label_to_neo4j, + save_org_member_to_neo4j, save_orgs_to_neo4j, + save_pr_files_changes_to_neo4j, + save_pull_request_to_neo4j, + save_repo_contributors_to_neo4j, save_repo_to_neo4j, + save_review_comment_to_neo4j, + save_review_to_neo4j, ) with DAG( diff --git a/dags/github_api_helpers/__init__.py b/dags/github_api_helpers/__init__.py index 586482aa..4044b9d1 100644 --- a/dags/github_api_helpers/__init__.py +++ b/dags/github_api_helpers/__init__.py @@ -1,16 +1,16 @@ -from .repos import get_all_org_repos, get_all_repo_contributors -from .commits import get_all_commits, fetch_commit_details, fetch_commit_files -from .issues import get_all_issues, get_all_comments_of_issue +from .comments import get_all_repo_issues_and_prs_comments, get_all_repo_review_comments +from .commits import fetch_commit_details, fetch_commit_files, get_all_commits +from .issues import get_all_comments_of_issue, get_all_issues +from .labels import get_all_repo_labels +from .orgs import fetch_org_details, get_all_org_members from .pull_requests import ( - get_all_pull_requests, - get_all_commits_of_pull_request, get_all_comments_of_pull_request, - get_all_review_comments_of_pull_request, - get_all_reactions_of_review_comment, + get_all_commits_of_pull_request, + get_all_pull_request_files, + get_all_pull_requests, get_all_reactions_of_comment, + get_all_reactions_of_review_comment, + get_all_review_comments_of_pull_request, get_all_reviews_of_pull_request, - get_all_pull_request_files, ) -from .orgs import fetch_org_details, get_all_org_members -from .labels import get_all_repo_labels -from .comments import get_all_repo_review_comments, get_all_repo_issues_and_prs_comments +from .repos import get_all_org_repos, get_all_repo_contributors diff --git a/dags/github_api_helpers/commits.py b/dags/github_api_helpers/commits.py index 56d880e9..d1aeebde 100644 --- a/dags/github_api_helpers/commits.py +++ b/dags/github_api_helpers/commits.py @@ -1,4 +1,3 @@ -import requests from .smart_proxy import get diff --git a/dags/github_api_helpers/issues.py b/dags/github_api_helpers/issues.py index 19470d61..456dbceb 100644 --- a/dags/github_api_helpers/issues.py +++ b/dags/github_api_helpers/issues.py @@ -1,4 +1,3 @@ -import requests from .smart_proxy import get @@ -87,7 +86,7 @@ def get_all_comments_of_issue(owner: str, repo: str, issue_number: int): all_comments = [] current_page = 1 while True: - comments = fetch_pull_request_comments(owner, repo, issue_number, current_page) + comments = fetch_issue_comments(owner, repo, issue_number, current_page) if not comments: # Break the loop if no more comments are found break all_comments.extend(comments) diff --git a/dags/github_api_helpers/orgs.py b/dags/github_api_helpers/orgs.py index 83f458c0..429c58f9 100644 --- a/dags/github_api_helpers/orgs.py +++ b/dags/github_api_helpers/orgs.py @@ -1,4 +1,3 @@ -import requests from .smart_proxy import get diff --git a/dags/github_api_helpers/smart_proxy.py b/dags/github_api_helpers/smart_proxy.py index 1e98d54f..f34001a9 100644 --- a/dags/github_api_helpers/smart_proxy.py +++ b/dags/github_api_helpers/smart_proxy.py @@ -1,6 +1,6 @@ -import requests import random +import requests def get(url: str, params=None): """ @@ -9,7 +9,7 @@ def get(url: str, params=None): :param url: URL for the new :class:`Request` object. :param params: (optional) Dictionary, list of tuples or bytes to send in the query string for the :class:`Request`. - :param \*\*kwargs: Optional arguments that ``request`` takes. + :param **kwargs: Optional arguments that ``request`` takes. :return: :class:`Response ` object :rtype: requests.Response """ diff --git a/dags/github_old_version.py b/dags/github_old_version.py index 1770656f..3d233df2 100644 --- a/dags/github_old_version.py +++ b/dags/github_old_version.py @@ -1,8 +1,8 @@ -from airflow import DAG -from airflow.operators.python_operator import PythonOperator from datetime import datetime, timedelta -import requests +import requests +from airflow import DAG +from airflow.operators.python_operator import PythonOperator default_args = { "owner": "MohammadTwin", diff --git a/dags/neo4j_storage/__init__.py b/dags/neo4j_storage/__init__.py index 0a22ad3c..dd3991a2 100644 --- a/dags/neo4j_storage/__init__.py +++ b/dags/neo4j_storage/__init__.py @@ -1,15 +1,15 @@ +from .comments import save_comment_to_neo4j, save_review_comment_to_neo4j +from .commits import save_commit_files_changes_to_neo4j, save_commit_to_neo4j +from .issues import save_issue_to_neo4j +from .labels import save_label_to_neo4j from .orgs import ( - save_orgs_to_neo4j, - save_org_member_to_neo4j, get_orgs_profile_from_neo4j, + save_org_member_to_neo4j, + save_orgs_to_neo4j, ) -from .repos import save_repo_to_neo4j, save_repo_contributors_to_neo4j from .pull_requests import ( + save_pr_files_changes_to_neo4j, save_pull_request_to_neo4j, save_review_to_neo4j, - save_pr_files_changes_to_neo4j, ) -from .issues import save_issue_to_neo4j -from .labels import save_label_to_neo4j -from .commits import save_commit_to_neo4j, save_commit_files_changes_to_neo4j -from .comments import save_review_comment_to_neo4j, save_comment_to_neo4j +from .repos import save_repo_contributors_to_neo4j, save_repo_to_neo4j diff --git a/dags/neo4j_storage/commits.py b/dags/neo4j_storage/commits.py index df11dfe7..5f5e4486 100644 --- a/dags/neo4j_storage/commits.py +++ b/dags/neo4j_storage/commits.py @@ -50,7 +50,8 @@ def save_commit_files_changes_to_neo4j( session.execute_write( lambda tx: tx.run( f""" - MATCH (repo:{Node.Repository.value} {{id: $repository_id}}), (c:{Node.Commit.value} {{sha: $commit_sha}}) + MATCH (repo:{Node.Repository.value} {{id: $repository_id}}), + (c:{Node.Commit.value} {{sha: $commit_sha}}) WITH repo, c UNWIND $file_changes AS file_change MERGE (f:{Node.File.value} {{sha: file_change.sha, filename: file_change.filename}}) diff --git a/dags/neo4j_storage/neo4j_connection.py b/dags/neo4j_storage/neo4j_connection.py index 5f699127..6e32e381 100644 --- a/dags/neo4j_storage/neo4j_connection.py +++ b/dags/neo4j_storage/neo4j_connection.py @@ -1,7 +1,7 @@ import os from dotenv import load_dotenv -from neo4j import GraphDatabase, Driver +from neo4j import Driver, GraphDatabase class Neo4jConnection: diff --git a/dags/neo4j_storage/neo4j_enums.py b/dags/neo4j_storage/neo4j_enums.py index 8b83f80b..1091bc88 100644 --- a/dags/neo4j_storage/neo4j_enums.py +++ b/dags/neo4j_storage/neo4j_enums.py @@ -2,7 +2,8 @@ class Node(Enum): - OrganizationProfile = "OrganizationProfile" # This node is created by the API, and we receive a list of organizations detail to extract data from + # This node is created by the API, and we receive a list of organizations detail to extract data from + OrganizationProfile = "OrganizationProfile" GitHubOrganization = "GitHubOrganization" GitHubUser = "GitHubUser" PullRequest = "PullRequest" diff --git a/dags/neo4j_storage/orgs.py b/dags/neo4j_storage/orgs.py index 5d95e154..1b8ba1f5 100644 --- a/dags/neo4j_storage/orgs.py +++ b/dags/neo4j_storage/orgs.py @@ -1,6 +1,7 @@ +from neo4j.time import DateTime as Neo4jDateTime + from .neo4j_connection import Neo4jConnection from .neo4j_enums import Node, Relationship -from neo4j.time import DateTime as Neo4jDateTime def get_orgs_profile_from_neo4j(): From c15d7cae438b5e81700ff0d19d48eeb6fac6cd1e Mon Sep 17 00:00:00 2001 From: Mohammad Twin Date: Fri, 15 Dec 2023 12:44:19 +0400 Subject: [PATCH 06/26] feat: more lint formate --- Readme.md | 2 - dags/github.py | 8 +- dags/github_api_helpers/smart_proxy.py | 1 + dags/github_old_version.py | 177 ------------------------- dags/neo4j_storage/commits.py | 2 +- dags/neo4j_storage/pull_requests.py | 5 +- 6 files changed, 10 insertions(+), 185 deletions(-) delete mode 100644 dags/github_old_version.py diff --git a/Readme.md b/Readme.md index 00b1be0b..c5ae4ffe 100644 --- a/Readme.md +++ b/Readme.md @@ -2,7 +2,6 @@ ## Running the app - You can quickly launch the application using `Docker Compose`: ```bash @@ -10,7 +9,6 @@ docker-compose --profile flower up ``` ## Lint the code - The code can be linted by using the below command ```bash diff --git a/dags/github.py b/dags/github.py index fea2a9ae..e78a427f 100644 --- a/dags/github.py +++ b/dags/github.py @@ -66,18 +66,20 @@ def get_all_organization(): orgs = get_orgs_profile_from_neo4j() return orgs - #! for testing + # !for testing # toghether_crew_org = { # "id": 1, # "name": "TogetherCrew", - # "description": "TogetherCrew is a community of developers, designers, and creators who are passionate about building and learning together.", + # "description": """TogetherCrew is a community of developers, designers, and creators + # who are passionate about building and learning together.""", # "url": "", # "key": "" # } # rndao_org = { # "id": 2, # "name": "RnDAO", - # "description": "RnDAO is a community of developers, designers, and creators who are passionate about building and learning together.", + # "description": """RnDAO is a community of developers, designers, and creators + # who are passionate about building and learning together.""", # "url": "", # "key": "" # } diff --git a/dags/github_api_helpers/smart_proxy.py b/dags/github_api_helpers/smart_proxy.py index f34001a9..c7531e17 100644 --- a/dags/github_api_helpers/smart_proxy.py +++ b/dags/github_api_helpers/smart_proxy.py @@ -2,6 +2,7 @@ import requests + def get(url: str, params=None): """ Sends a GET request With Smart Proxy. diff --git a/dags/github_old_version.py b/dags/github_old_version.py deleted file mode 100644 index 3d233df2..00000000 --- a/dags/github_old_version.py +++ /dev/null @@ -1,177 +0,0 @@ -from datetime import datetime, timedelta - -import requests -from airflow import DAG -from airflow.operators.python_operator import PythonOperator - -default_args = { - "owner": "MohammadTwin", - "start_date": datetime(2023, 11, 8), - "retries": 1, - "retry_delay": timedelta(minutes=1), -} - -dag = DAG( - "github_old_version", - default_args=default_args, - description="GitHub Data Extraction DAG", - schedule_interval=None, - catchup=False, -) - - -def get_github_repos(ti): - endpoint = "https://api.github.com/orgs/TogetherCrew/repos" - - response = requests.get(endpoint) - response_data = response.json() - - print("[response_data] ", response_data) - ti.xcom_push(key="github_repos", value=response_data) - - -get_repos_task = PythonOperator( - task_id="get_github_repos", - python_callable=get_github_repos, - provide_context=True, - dag=dag, -) - - -def get_pull_requests(owner: str, repo: str): - endpoint = f"https://api.github.com/repos/{owner}/{repo}/pulls" - - params = {"per_page": 100, "page": 1, "state": "all"} - response = requests.get(endpoint, params=params) - response_data = response.json() - - return response_data - - -def extract_pull_requests(ti): - prs_data = {} - - github_repos = ti.xcom_pull(key="github_repos", task_ids="get_github_repos") - for repo in github_repos: - prs = get_pull_requests(owner=repo["owner"]["login"], repo=repo["name"]) - prs_data[repo["id"]] = prs - - ti.xcom_push(key="github_prs", value=github_repos) - return prs_data - - -def transform_pull_requests(ti): - return None - - -def load_pull_requests(ti): - print("Loaded PR data into the destination:") - - -task_extract_pull_requests = PythonOperator( - task_id="extract_pull_requests", - python_callable=extract_pull_requests, - provide_context=True, - dag=dag, -) - -task_transform_pull_requests = PythonOperator( - task_id="transform_pull_requests", - python_callable=transform_pull_requests, - provide_context=True, - dag=dag, -) - -task_load_pull_requests = PythonOperator( - task_id="load_pull_requests", - python_callable=load_pull_requests, - provide_context=True, - dag=dag, -) - -( - get_repos_task - >> task_extract_pull_requests - >> task_transform_pull_requests - >> task_load_pull_requests -) - - -def extract_commits(ti): - github_repos = ti.xcom_pull(key="github_repos", task_ids="get_github_repos") - for repo in github_repos: - print("\n[repo] ", repo) - - return None - - -def transform_commits(ti): - return None - - -def load_commits(ti): - print("Loaded Commit data into the destination:") - - -task_extract_commits = PythonOperator( - task_id="extract_commits", - python_callable=extract_commits, - provide_context=True, - dag=dag, -) - -task_transform_commits = PythonOperator( - task_id="transform_commits", - python_callable=transform_commits, - provide_context=True, - dag=dag, -) - -task_load_commits = PythonOperator( - task_id="load_commits", - python_callable=load_commits, - provide_context=True, - dag=dag, -) - -get_repos_task >> task_extract_commits >> task_transform_commits >> task_load_commits - - -def extract_issues(ti): - github_repos = ti.xcom_pull(key="github_repos", task_ids="get_github_repos") - for repo in github_repos: - print("\n[repo] ", repo) - - return None - - -def transform_issues(ti): - return None - - -def load_issues(ti): - print("Loaded issues data into the destination:") - - -task_extract_issues = PythonOperator( - task_id="extract_issues", - python_callable=extract_issues, - provide_context=True, - dag=dag, -) - -task_transform_issues = PythonOperator( - task_id="transform_issues", - python_callable=transform_issues, - provide_context=True, - dag=dag, -) - -task_load_issues = PythonOperator( - task_id="load_issues", - python_callable=load_issues, - provide_context=True, - dag=dag, -) - -get_repos_task >> task_extract_issues >> task_transform_issues >> task_load_issues diff --git a/dags/neo4j_storage/commits.py b/dags/neo4j_storage/commits.py index 5f5e4486..354c4fc3 100644 --- a/dags/neo4j_storage/commits.py +++ b/dags/neo4j_storage/commits.py @@ -50,7 +50,7 @@ def save_commit_files_changes_to_neo4j( session.execute_write( lambda tx: tx.run( f""" - MATCH (repo:{Node.Repository.value} {{id: $repository_id}}), + MATCH (repo:{Node.Repository.value} {{id: $repository_id}}), (c:{Node.Commit.value} {{sha: $commit_sha}}) WITH repo, c UNWIND $file_changes AS file_change diff --git a/dags/neo4j_storage/pull_requests.py b/dags/neo4j_storage/pull_requests.py index 58db5200..16f13f8c 100644 --- a/dags/neo4j_storage/pull_requests.py +++ b/dags/neo4j_storage/pull_requests.py @@ -62,7 +62,7 @@ def save_pull_request_to_neo4j(pr: dict, repository_id: str): f""" MERGE (pr:{Node.PullRequest.value} {{id: $pr.id}}) SET pr += $pr, pr.repository_id = $repository_id, pr.latestSavedAt = datetime() - + WITH pr MERGE (ghu:{Node.GitHubUser.value} {{id: $repo_creator.id}}) SET ghu += $repo_creator, ghu.latestSavedAt = datetime() @@ -129,7 +129,8 @@ def save_pr_files_changes_to_neo4j(pr_id: int, repository_id: str, file_changes: session.execute_write( lambda tx: tx.run( f""" - MATCH (repo:{Node.Repository.value} {{id: $repository_id}}), (pr:{Node.PullRequest.value} {{id: $pr_id}}) + MATCH (repo:{Node.Repository.value} {{id: $repository_id}}), + (pr:{Node.PullRequest.value} {{id: $pr_id}}) WITH repo, pr UNWIND $file_changes AS file_change MERGE (f:{Node.File.value} {{sha: file_change.sha, filename: file_change.filename}}) From 24f78df24344fd21c071733218c0e01cf34afdef Mon Sep 17 00:00:00 2001 From: Mohammad Twin Date: Fri, 15 Dec 2023 13:08:07 +0400 Subject: [PATCH 07/26] feat: more lint formate --- Readme.md | 1 + dags/github.py | 4 ++-- dags/neo4j_storage/pull_requests.py | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Readme.md b/Readme.md index c5ae4ffe..cda9d0c8 100644 --- a/Readme.md +++ b/Readme.md @@ -8,6 +8,7 @@ You can quickly launch the application using `Docker Compose`: docker-compose --profile flower up ``` + ## Lint the code The code can be linted by using the below command diff --git a/dags/github.py b/dags/github.py index e78a427f..fc848726 100644 --- a/dags/github.py +++ b/dags/github.py @@ -70,7 +70,7 @@ def get_all_organization(): # toghether_crew_org = { # "id": 1, # "name": "TogetherCrew", - # "description": """TogetherCrew is a community of developers, designers, and creators + # "description": """TogetherCrew is a community of developers, designers, and creators # who are passionate about building and learning together.""", # "url": "", # "key": "" @@ -78,7 +78,7 @@ def get_all_organization(): # rndao_org = { # "id": 2, # "name": "RnDAO", - # "description": """RnDAO is a community of developers, designers, and creators + # "description": """RnDAO is a community of developers, designers, and creators # who are passionate about building and learning together.""", # "url": "", # "key": "" diff --git a/dags/neo4j_storage/pull_requests.py b/dags/neo4j_storage/pull_requests.py index 16f13f8c..86492b4a 100644 --- a/dags/neo4j_storage/pull_requests.py +++ b/dags/neo4j_storage/pull_requests.py @@ -88,7 +88,7 @@ def save_pull_request_to_neo4j(pr: dict, repository_id: str): driver.close() -def save_review_to_neo4j(pr_id: dict, review: dict): +def save_review_to_neo4j(pr_id: int, review: dict): neo4jConnection = Neo4jConnection() driver = neo4jConnection.connect_neo4j() @@ -129,7 +129,7 @@ def save_pr_files_changes_to_neo4j(pr_id: int, repository_id: str, file_changes: session.execute_write( lambda tx: tx.run( f""" - MATCH (repo:{Node.Repository.value} {{id: $repository_id}}), + MATCH (repo:{Node.Repository.value} {{id: $repository_id}}), (pr:{Node.PullRequest.value} {{id: $pr_id}}) WITH repo, pr UNWIND $file_changes AS file_change From 95876950dbf0816b28c54d559b46fb93a389f6d9 Mon Sep 17 00:00:00 2001 From: Mohammad Twin Date: Fri, 15 Dec 2023 13:34:17 +0400 Subject: [PATCH 08/26] feat: ignore __init__.py files from liniting --- dags/github_api_helpers/__init__.py | 1 + dags/neo4j_storage/__init__.py | 1 + 2 files changed, 2 insertions(+) diff --git a/dags/github_api_helpers/__init__.py b/dags/github_api_helpers/__init__.py index 4044b9d1..f709f6e5 100644 --- a/dags/github_api_helpers/__init__.py +++ b/dags/github_api_helpers/__init__.py @@ -1,3 +1,4 @@ +# flake8: noqa from .comments import get_all_repo_issues_and_prs_comments, get_all_repo_review_comments from .commits import fetch_commit_details, fetch_commit_files, get_all_commits from .issues import get_all_comments_of_issue, get_all_issues diff --git a/dags/neo4j_storage/__init__.py b/dags/neo4j_storage/__init__.py index dd3991a2..64c5cb55 100644 --- a/dags/neo4j_storage/__init__.py +++ b/dags/neo4j_storage/__init__.py @@ -1,3 +1,4 @@ +# flake8: noqa from .comments import save_comment_to_neo4j, save_review_comment_to_neo4j from .commits import save_commit_files_changes_to_neo4j, save_commit_to_neo4j from .issues import save_issue_to_neo4j From 74f931cd7a73168d11e0f26c0ffdbb64a0ad0447 Mon Sep 17 00:00:00 2001 From: Mohammad Twin Date: Fri, 15 Dec 2023 13:47:10 +0400 Subject: [PATCH 09/26] feat: resolved --- Readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Readme.md b/Readme.md index cda9d0c8..a357ed2d 100644 --- a/Readme.md +++ b/Readme.md @@ -9,7 +9,7 @@ docker-compose --profile flower up ``` -## Lint the code +## Lint the code The code can be linted by using the below command ```bash From 66e6cf9ffd7b1cf6abaa81b3446f1d5e3ac3fb6b Mon Sep 17 00:00:00 2001 From: Mohammad Twin Date: Fri, 15 Dec 2023 15:51:53 +0400 Subject: [PATCH 10/26] feat: docker-compose.test.yml file updated align with github ci/cd --- docker-compose.test.yml | 106 ++++++++++++++++++++-------------------- 1 file changed, 54 insertions(+), 52 deletions(-) diff --git a/docker-compose.test.yml b/docker-compose.test.yml index a15c5f02..088a0a20 100644 --- a/docker-compose.test.yml +++ b/docker-compose.test.yml @@ -45,60 +45,62 @@ # Feel free to modify this file to suit your needs. --- version: '3.8' -x-airflow-common: - &airflow-common - # In order to add custom dependencies or upgrade provider packages you can use your extended image. - # Comment the image line, place your Dockerfile in the directory where you placed the docker-compose.yaml - # and uncomment the "build" line below, Then run `docker-compose build` to build the images. - image: ${AIRFLOW_IMAGE_NAME:-apache/airflow:2.7.3} - # build: . - command: > - /bin/bash -c " - python3 -m coverage run --omit=tests/* -m pytest tests && - python3 -m coverage lcov -i -o coverage/lcov.info - " - environment: - &airflow-common-env - AIRFLOW__CORE__EXECUTOR: CeleryExecutor - AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow - # For backward compatibility, with Airflow <2.3 - AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow - AIRFLOW__CELERY__RESULT_BACKEND: db+postgresql://airflow:airflow@postgres/airflow - AIRFLOW__CELERY__BROKER_URL: redis://:@redis:6379/0 - AIRFLOW__CORE__FERNET_KEY: '' - AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: 'true' - AIRFLOW__CORE__LOAD_EXAMPLES: 'true' - AIRFLOW__API__AUTH_BACKENDS: 'airflow.api.auth.backend.basic_auth,airflow.api.auth.backend.session' - # yamllint disable rule:line-length - # Use simple http server on scheduler for health checks - # See https://airflow.apache.org/docs/apache-airflow/stable/administration-and-deployment/logging-monitoring/check-health.html#scheduler-health-check-server - # yamllint enable rule:line-length - AIRFLOW__SCHEDULER__ENABLE_HEALTH_CHECK: 'true' - # WARNING: Use _PIP_ADDITIONAL_REQUIREMENTS option ONLY for a quick checks - # for other purpose (development, test and especially production usage) build/extend Airflow image. - # _PIP_ADDITIONAL_REQUIREMENTS: ${_PIP_ADDITIONAL_REQUIREMENTS:-} - _PIP_ADDITIONAL_REQUIREMENTS: neo4j - NEO4J_PROTOCOL: bolt - NEO4J_HOST: neo4j - NEO4J_PORT: 7687 - NEO4J_USER: neo4j - NEO4J_PASSWORD: neo4j123456 - volumes: - - ${AIRFLOW_PROJ_DIR:-.}/dags:/opt/airflow/dags - - ${AIRFLOW_PROJ_DIR:-.}/logs:/opt/airflow/logs - - ${AIRFLOW_PROJ_DIR:-.}/config:/opt/airflow/config - - ${AIRFLOW_PROJ_DIR:-.}/plugins:/opt/airflow/plugins - user: "${AIRFLOW_UID:-50000}:0" - depends_on: - &airflow-common-depends-on - redis: - condition: service_healthy - postgres: - condition: service_healthy - neo4j: - condition: service_healthy services: + app: + &airflow-common + # In order to add custom dependencies or upgrade provider packages you can use your extended image. + # Comment the image line, place your Dockerfile in the directory where you placed the docker-compose.yaml + # and uncomment the "build" line below, Then run `docker-compose build` to build the images. + image: ${AIRFLOW_IMAGE_NAME:-apache/airflow:2.7.3} + # build: . + command: > + /bin/bash -c " + python3 -m coverage run --omit=tests/* -m pytest tests && + python3 -m coverage lcov -i -o coverage/lcov.info + " + environment: + &airflow-common-env + AIRFLOW__CORE__EXECUTOR: CeleryExecutor + AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow + # For backward compatibility, with Airflow <2.3 + AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow + AIRFLOW__CELERY__RESULT_BACKEND: db+postgresql://airflow:airflow@postgres/airflow + AIRFLOW__CELERY__BROKER_URL: redis://:@redis:6379/0 + AIRFLOW__CORE__FERNET_KEY: '' + AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: 'true' + AIRFLOW__CORE__LOAD_EXAMPLES: 'true' + AIRFLOW__API__AUTH_BACKENDS: 'airflow.api.auth.backend.basic_auth,airflow.api.auth.backend.session' + # yamllint disable rule:line-length + # Use simple http server on scheduler for health checks + # See https://airflow.apache.org/docs/apache-airflow/stable/administration-and-deployment/logging-monitoring/check-health.html#scheduler-health-check-server + # yamllint enable rule:line-length + AIRFLOW__SCHEDULER__ENABLE_HEALTH_CHECK: 'true' + # WARNING: Use _PIP_ADDITIONAL_REQUIREMENTS option ONLY for a quick checks + # for other purpose (development, test and especially production usage) build/extend Airflow image. + # _PIP_ADDITIONAL_REQUIREMENTS: ${_PIP_ADDITIONAL_REQUIREMENTS:-} + _PIP_ADDITIONAL_REQUIREMENTS: neo4j + NEO4J_PROTOCOL: bolt + NEO4J_HOST: neo4j + NEO4J_PORT: 7687 + NEO4J_USER: neo4j + NEO4J_PASSWORD: neo4j123456 + volumes: + - ${AIRFLOW_PROJ_DIR:-.}/dags:/opt/airflow/dags + - ${AIRFLOW_PROJ_DIR:-.}/logs:/opt/airflow/logs + - ${AIRFLOW_PROJ_DIR:-.}/config:/opt/airflow/config + - ${AIRFLOW_PROJ_DIR:-.}/plugins:/opt/airflow/plugins + user: "${AIRFLOW_UID:-50000}:0" + depends_on: + &airflow-common-depends-on + redis: + condition: service_healthy + postgres: + condition: service_healthy + neo4j: + condition: service_healthy + + postgres: image: postgres:13 environment: From fb5b7e49c4d9bc58c1c9913ad7d6dcec92cdf5c9 Mon Sep 17 00:00:00 2001 From: Mohammad Twin Date: Fri, 15 Dec 2023 18:49:07 +0400 Subject: [PATCH 11/26] feat: new docker-compose file was written --- TestDockerfile | 9 ++ docker-compose.test.yml | 327 +++------------------------------------- docker-entrypoint.sh | 4 + 3 files changed, 31 insertions(+), 309 deletions(-) create mode 100644 TestDockerfile create mode 100644 docker-entrypoint.sh diff --git a/TestDockerfile b/TestDockerfile new file mode 100644 index 00000000..67a72a0a --- /dev/null +++ b/TestDockerfile @@ -0,0 +1,9 @@ +# It's recommended that we use `bullseye` for Python (alpine isn't suitable as it conflcts with numpy) +FROM python:3.10-bullseye AS base +WORKDIR /project +COPY . . +RUN pip3 install -r requirements.txt + +FROM base AS test +RUN chmod +x docker-entrypoint.sh +CMD ["./docker-entrypoint.sh"] \ No newline at end of file diff --git a/docker-compose.test.yml b/docker-compose.test.yml index 088a0a20..8a6c6c55 100644 --- a/docker-compose.test.yml +++ b/docker-compose.test.yml @@ -1,137 +1,32 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -# Basic Airflow cluster configuration for CeleryExecutor with Redis and PostgreSQL. -# -# WARNING: This configuration is for local development. Do not use it in a production deployment. -# -# This configuration supports basic configuration using environment variables or an .env file -# The following variables are supported: -# -# AIRFLOW_IMAGE_NAME - Docker image name used to run Airflow. -# Default: apache/airflow:2.7.3 -# AIRFLOW_UID - User ID in Airflow containers -# Default: 50000 -# AIRFLOW_PROJ_DIR - Base path to which all the files will be volumed. -# Default: . -# Those configurations are useful mostly in case of standalone testing/running Airflow in test/try-out mode -# -# _AIRFLOW_WWW_USER_USERNAME - Username for the administrator account (if requested). -# Default: airflow -# _AIRFLOW_WWW_USER_PASSWORD - Password for the administrator account (if requested). -# Default: airflow -# _PIP_ADDITIONAL_REQUIREMENTS - Additional PIP requirements to add when starting all containers. -# Use this option ONLY for quick checks. Installing requirements at container -# startup is done EVERY TIME the service is started. -# A better way is to build a custom image or extend the official image -# as described in https://airflow.apache.org/docs/docker-stack/build.html. -# Default: '' -# -# Feel free to modify this file to suit your needs. ---- -version: '3.8' +version: "3.9" services: app: - &airflow-common - # In order to add custom dependencies or upgrade provider packages you can use your extended image. - # Comment the image line, place your Dockerfile in the directory where you placed the docker-compose.yaml - # and uncomment the "build" line below, Then run `docker-compose build` to build the images. - image: ${AIRFLOW_IMAGE_NAME:-apache/airflow:2.7.3} - # build: . - command: > - /bin/bash -c " - python3 -m coverage run --omit=tests/* -m pytest tests && - python3 -m coverage lcov -i -o coverage/lcov.info - " + build: + context: . + target: test + dockerfile: TestDockerfile environment: - &airflow-common-env - AIRFLOW__CORE__EXECUTOR: CeleryExecutor - AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow - # For backward compatibility, with Airflow <2.3 - AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow - AIRFLOW__CELERY__RESULT_BACKEND: db+postgresql://airflow:airflow@postgres/airflow - AIRFLOW__CELERY__BROKER_URL: redis://:@redis:6379/0 - AIRFLOW__CORE__FERNET_KEY: '' - AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: 'true' - AIRFLOW__CORE__LOAD_EXAMPLES: 'true' - AIRFLOW__API__AUTH_BACKENDS: 'airflow.api.auth.backend.basic_auth,airflow.api.auth.backend.session' - # yamllint disable rule:line-length - # Use simple http server on scheduler for health checks - # See https://airflow.apache.org/docs/apache-airflow/stable/administration-and-deployment/logging-monitoring/check-health.html#scheduler-health-check-server - # yamllint enable rule:line-length - AIRFLOW__SCHEDULER__ENABLE_HEALTH_CHECK: 'true' - # WARNING: Use _PIP_ADDITIONAL_REQUIREMENTS option ONLY for a quick checks - # for other purpose (development, test and especially production usage) build/extend Airflow image. - # _PIP_ADDITIONAL_REQUIREMENTS: ${_PIP_ADDITIONAL_REQUIREMENTS:-} - _PIP_ADDITIONAL_REQUIREMENTS: neo4j - NEO4J_PROTOCOL: bolt - NEO4J_HOST: neo4j - NEO4J_PORT: 7687 - NEO4J_USER: neo4j - NEO4J_PASSWORD: neo4j123456 + - PORT=3000 + - MONGODB_HOST=mongo + - MONGODB_PORT=27017 + - MONGODB_USER=root + - MONGODB_PASS=pass + - NEO4J_PROTOCOL=bolt + - NEO4J_HOST=neo4j + - NEO4J_PORT=7687 + - NEO4J_USER=neo4j + - NEO4J_PASSWORD=password + - NEO4J_DB=neo4j volumes: - - ${AIRFLOW_PROJ_DIR:-.}/dags:/opt/airflow/dags - - ${AIRFLOW_PROJ_DIR:-.}/logs:/opt/airflow/logs - - ${AIRFLOW_PROJ_DIR:-.}/config:/opt/airflow/config - - ${AIRFLOW_PROJ_DIR:-.}/plugins:/opt/airflow/plugins - user: "${AIRFLOW_UID:-50000}:0" + - ./coverage:/project/coverage depends_on: - &airflow-common-depends-on - redis: - condition: service_healthy - postgres: - condition: service_healthy neo4j: condition: service_healthy - - - postgres: - image: postgres:13 - environment: - POSTGRES_USER: airflow - POSTGRES_PASSWORD: airflow - POSTGRES_DB: airflow - volumes: - - postgres-db-volume:/var/lib/postgresql/data - healthcheck: - test: ["CMD", "pg_isready", "-U", "airflow"] - interval: 10s - retries: 5 - start_period: 5s - restart: always - - redis: - image: redis:latest - expose: - - 6379 - healthcheck: - test: ["CMD", "redis-cli", "ping"] - interval: 10s - timeout: 30s - retries: 50 - start_period: 30s - restart: always - neo4j: image: "neo4j:5.9.0" environment: - - NEO4J_AUTH=neo4j/neo4j123456 + - NEO4J_AUTH=neo4j/password - NEO4J_PLUGINS=["apoc", "graph-data-science"] - NEO4J_dbms_security_procedures_unrestricted=apoc.*,gds.* healthcheck: @@ -140,189 +35,3 @@ services: timeout: 10s retries: 2 start_period: 40s - - airflow-webserver: - <<: *airflow-common - command: webserver - ports: - - "8080:8080" - healthcheck: - test: ["CMD", "curl", "--fail", "http://localhost:8080/health"] - interval: 30s - timeout: 10s - retries: 5 - start_period: 30s - restart: always - depends_on: - <<: *airflow-common-depends-on - airflow-init: - condition: service_completed_successfully - - airflow-scheduler: - <<: *airflow-common - command: scheduler - healthcheck: - test: ["CMD", "curl", "--fail", "http://localhost:8974/health"] - interval: 30s - timeout: 10s - retries: 5 - start_period: 30s - restart: always - depends_on: - <<: *airflow-common-depends-on - airflow-init: - condition: service_completed_successfully - - airflow-worker: - <<: *airflow-common - command: celery worker - healthcheck: - # yamllint disable rule:line-length - test: - - "CMD-SHELL" - - 'celery --app airflow.providers.celery.executors.celery_executor.app inspect ping -d "celery@$${HOSTNAME}" || celery --app airflow.executors.celery_executor.app inspect ping -d "celery@$${HOSTNAME}"' - interval: 30s - timeout: 10s - retries: 5 - start_period: 30s - environment: - <<: *airflow-common-env - # Required to handle warm shutdown of the celery workers properly - # See https://airflow.apache.org/docs/docker-stack/entrypoint.html#signal-propagation - DUMB_INIT_SETSID: "0" - restart: always - depends_on: - <<: *airflow-common-depends-on - airflow-init: - condition: service_completed_successfully - - airflow-triggerer: - <<: *airflow-common - command: triggerer - healthcheck: - test: ["CMD-SHELL", 'airflow jobs check --job-type TriggererJob --hostname "$${HOSTNAME}"'] - interval: 30s - timeout: 10s - retries: 5 - start_period: 30s - restart: always - depends_on: - <<: *airflow-common-depends-on - airflow-init: - condition: service_completed_successfully - - airflow-init: - <<: *airflow-common - entrypoint: /bin/bash - # yamllint disable rule:line-length - command: - - -c - - | - function ver() { - printf "%04d%04d%04d%04d" $${1//./ } - } - airflow_version=$$(AIRFLOW__LOGGING__LOGGING_LEVEL=INFO && gosu airflow airflow version) - airflow_version_comparable=$$(ver $${airflow_version}) - min_airflow_version=2.2.0 - min_airflow_version_comparable=$$(ver $${min_airflow_version}) - if (( airflow_version_comparable < min_airflow_version_comparable )); then - echo - echo -e "\033[1;31mERROR!!!: Too old Airflow version $${airflow_version}!\e[0m" - echo "The minimum Airflow version supported: $${min_airflow_version}. Only use this or higher!" - echo - exit 1 - fi - if [[ -z "${AIRFLOW_UID}" ]]; then - echo - echo -e "\033[1;33mWARNING!!!: AIRFLOW_UID not set!\e[0m" - echo "If you are on Linux, you SHOULD follow the instructions below to set " - echo "AIRFLOW_UID environment variable, otherwise files will be owned by root." - echo "For other operating systems you can get rid of the warning with manually created .env file:" - echo " See: https://airflow.apache.org/docs/apache-airflow/stable/howto/docker-compose/index.html#setting-the-right-airflow-user" - echo - fi - one_meg=1048576 - mem_available=$$(($$(getconf _PHYS_PAGES) * $$(getconf PAGE_SIZE) / one_meg)) - cpus_available=$$(grep -cE 'cpu[0-9]+' /proc/stat) - disk_available=$$(df / | tail -1 | awk '{print $$4}') - warning_resources="false" - if (( mem_available < 4000 )) ; then - echo - echo -e "\033[1;33mWARNING!!!: Not enough memory available for Docker.\e[0m" - echo "At least 4GB of memory required. You have $$(numfmt --to iec $$((mem_available * one_meg)))" - echo - warning_resources="true" - fi - if (( cpus_available < 2 )); then - echo - echo -e "\033[1;33mWARNING!!!: Not enough CPUS available for Docker.\e[0m" - echo "At least 2 CPUs recommended. You have $${cpus_available}" - echo - warning_resources="true" - fi - if (( disk_available < one_meg * 10 )); then - echo - echo -e "\033[1;33mWARNING!!!: Not enough Disk space available for Docker.\e[0m" - echo "At least 10 GBs recommended. You have $$(numfmt --to iec $$((disk_available * 1024 )))" - echo - warning_resources="true" - fi - if [[ $${warning_resources} == "true" ]]; then - echo - echo -e "\033[1;33mWARNING!!!: You have not enough resources to run Airflow (see above)!\e[0m" - echo "Please follow the instructions to increase amount of resources available:" - echo " https://airflow.apache.org/docs/apache-airflow/stable/howto/docker-compose/index.html#before-you-begin" - echo - fi - mkdir -p /sources/logs /sources/dags /sources/plugins - chown -R "${AIRFLOW_UID}:0" /sources/{logs,dags,plugins} - exec /entrypoint airflow version - # yamllint enable rule:line-length - environment: - <<: *airflow-common-env - _AIRFLOW_DB_MIGRATE: 'true' - _AIRFLOW_WWW_USER_CREATE: 'true' - _AIRFLOW_WWW_USER_USERNAME: ${_AIRFLOW_WWW_USER_USERNAME:-airflow} - _AIRFLOW_WWW_USER_PASSWORD: ${_AIRFLOW_WWW_USER_PASSWORD:-airflow} - _PIP_ADDITIONAL_REQUIREMENTS: '' - user: "0:0" - volumes: - - ${AIRFLOW_PROJ_DIR:-.}:/sources - - airflow-cli: - <<: *airflow-common - profiles: - - debug - environment: - <<: *airflow-common-env - CONNECTION_CHECK_MAX_COUNT: "0" - # Workaround for entrypoint issue. See: https://github.com/apache/airflow/issues/16252 - command: - - bash - - -c - - airflow - - # You can enable flower by adding "--profile flower" option e.g. docker-compose --profile flower up - # or by explicitly targeted on the command line e.g. docker-compose up flower. - # See: https://docs.docker.com/compose/profiles/ - flower: - <<: *airflow-common - command: celery flower - profiles: - - flower - ports: - - "5555:5555" - healthcheck: - test: ["CMD", "curl", "--fail", "http://localhost:5555/"] - interval: 30s - timeout: 10s - retries: 5 - start_period: 30s - restart: always - depends_on: - <<: *airflow-common-depends-on - airflow-init: - condition: service_completed_successfully - -volumes: - postgres-db-volume: diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh new file mode 100644 index 00000000..ee92265c --- /dev/null +++ b/docker-entrypoint.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +cd dags +python3 -m coverage run --omit=tests/* -m pytest tests +python3 -m coverage lcov -i -o coverage/lcov.info \ No newline at end of file From 5932d95d9aab026ba757aa967f5ed9608b9ee908 Mon Sep 17 00:00:00 2001 From: Mohammad Twin Date: Fri, 15 Dec 2023 18:54:29 +0400 Subject: [PATCH 12/26] fix: manage code in a way that can clean the bash --- docker-entrypoint.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh index ee92265c..798ad40d 100644 --- a/docker-entrypoint.sh +++ b/docker-entrypoint.sh @@ -1,4 +1,4 @@ #!/usr/bin/env bash -cd dags +cd foo || exit python3 -m coverage run --omit=tests/* -m pytest tests python3 -m coverage lcov -i -o coverage/lcov.info \ No newline at end of file From 7c0f069a977013a644d63da7906947f8831fdb71 Mon Sep 17 00:00:00 2001 From: Mohammad Twin Date: Mon, 18 Dec 2023 11:54:40 +0400 Subject: [PATCH 13/26] feat: a basic unit test were added for saving labels into db --- dags/tests/units/__init__.py | 0 dags/tests/units/test_labels_storage.py | 16 ++++++++++++++++ 2 files changed, 16 insertions(+) create mode 100644 dags/tests/units/__init__.py create mode 100644 dags/tests/units/test_labels_storage.py diff --git a/dags/tests/units/__init__.py b/dags/tests/units/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/dags/tests/units/test_labels_storage.py b/dags/tests/units/test_labels_storage.py new file mode 100644 index 00000000..78f924e7 --- /dev/null +++ b/dags/tests/units/test_labels_storage.py @@ -0,0 +1,16 @@ +import unittest +# from unittest.mock import Mock, patch +from neo4j_storage import save_label_to_neo4j + +class TestSaveLabelToNeo4j(unittest.TestCase): + + # @patch('your_module.Neo4jConnection') # Replace 'your_module' with the actual name of your module + def test_save_label_to_neo4j(self): + + # Define a sample label + sample_label = {'id': '123', 'name': 'SampleLabel'} + + # Call the function with the sample label + response = save_label_to_neo4j(sample_label) + + self.assertIsNone(response, "The response of save_label_to_neo4j should be None") From 1efb76b6ab2714dc42feb0287e7365bd7e73661f Mon Sep 17 00:00:00 2001 From: Mohammad Twin Date: Mon, 18 Dec 2023 11:55:09 +0400 Subject: [PATCH 14/26] feat: a dockerfile & docker-compose & etc were added for managin test on the CI --- TestDockerfile | 3 +-- docker-compose.test.yml | 1 - docker-entrypoint.sh | 4 +++- requirements.txt | 5 ++++- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/TestDockerfile b/TestDockerfile index 67a72a0a..27e9a300 100644 --- a/TestDockerfile +++ b/TestDockerfile @@ -1,9 +1,8 @@ # It's recommended that we use `bullseye` for Python (alpine isn't suitable as it conflcts with numpy) -FROM python:3.10-bullseye AS base +FROM python:3.10-bullseye WORKDIR /project COPY . . RUN pip3 install -r requirements.txt -FROM base AS test RUN chmod +x docker-entrypoint.sh CMD ["./docker-entrypoint.sh"] \ No newline at end of file diff --git a/docker-compose.test.yml b/docker-compose.test.yml index 8a6c6c55..dcbf7b8b 100644 --- a/docker-compose.test.yml +++ b/docker-compose.test.yml @@ -4,7 +4,6 @@ services: app: build: context: . - target: test dockerfile: TestDockerfile environment: - PORT=3000 diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh index 798ad40d..dd931a4a 100644 --- a/docker-entrypoint.sh +++ b/docker-entrypoint.sh @@ -1,4 +1,6 @@ #!/usr/bin/env bash -cd foo || exit +echo "Running tests" +cd dags || exit +echo "After cd dags" python3 -m coverage run --omit=tests/* -m pytest tests python3 -m coverage lcov -i -o coverage/lcov.info \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 00966dc1..fac2f671 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,4 @@ -neo4j \ No newline at end of file +neo4j==5.14.1 +coverage==7.3.3 +pytest==7.4.3 +python-dotenv==1.0.0 \ No newline at end of file From 00fd27c10ccda563aba1471cf1e72e100cb90d1a Mon Sep 17 00:00:00 2001 From: Mohammad Twin Date: Mon, 18 Dec 2023 12:06:04 +0400 Subject: [PATCH 15/26] fix: lint fixes --- dags/tests/units/test_labels_storage.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/dags/tests/units/test_labels_storage.py b/dags/tests/units/test_labels_storage.py index 78f924e7..eea4fffa 100644 --- a/dags/tests/units/test_labels_storage.py +++ b/dags/tests/units/test_labels_storage.py @@ -1,16 +1,15 @@ import unittest -# from unittest.mock import Mock, patch from neo4j_storage import save_label_to_neo4j -class TestSaveLabelToNeo4j(unittest.TestCase): - # @patch('your_module.Neo4jConnection') # Replace 'your_module' with the actual name of your module +class TestSaveLabelToNeo4j(unittest.TestCase): def test_save_label_to_neo4j(self): - # Define a sample label - sample_label = {'id': '123', 'name': 'SampleLabel'} + sample_label = {"id": "123", "name": "SampleLabel"} # Call the function with the sample label response = save_label_to_neo4j(sample_label) - self.assertIsNone(response, "The response of save_label_to_neo4j should be None") + self.assertIsNone( + response, "The response of save_label_to_neo4j should be None" + ) From bc5befa03259d943527de74e4c95f784b6106f7d Mon Sep 17 00:00:00 2001 From: Mohammad Twin Date: Mon, 18 Dec 2023 12:16:53 +0400 Subject: [PATCH 16/26] feat: aligned with lint --- dags/tests/units/test_labels_storage.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dags/tests/units/test_labels_storage.py b/dags/tests/units/test_labels_storage.py index eea4fffa..922cce36 100644 --- a/dags/tests/units/test_labels_storage.py +++ b/dags/tests/units/test_labels_storage.py @@ -1,4 +1,5 @@ import unittest + from neo4j_storage import save_label_to_neo4j From c9d2254f2585e8dd500f8b764353ae4e1dedf4e0 Mon Sep 17 00:00:00 2001 From: Mohammad Twin Date: Mon, 18 Dec 2023 15:17:33 +0400 Subject: [PATCH 17/26] feat: docker-entrypoint.sh updated --- docker-entrypoint.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh index dd931a4a..aea73932 100644 --- a/docker-entrypoint.sh +++ b/docker-entrypoint.sh @@ -1,6 +1,8 @@ #!/usr/bin/env bash -echo "Running tests" +echo "chang dir to dags" cd dags || exit -echo "After cd dags" + python3 -m coverage run --omit=tests/* -m pytest tests + +cp .coverage ../.coverage python3 -m coverage lcov -i -o coverage/lcov.info \ No newline at end of file From d3444ffbc29b73e6844197275f36ba51b0f42dcd Mon Sep 17 00:00:00 2001 From: Mohammad Twin Date: Mon, 18 Dec 2023 15:30:40 +0400 Subject: [PATCH 18/26] fix: update docker-entrypoint.sh to save all changes --- docker-entrypoint.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh index aea73932..72eec50b 100644 --- a/docker-entrypoint.sh +++ b/docker-entrypoint.sh @@ -5,4 +5,5 @@ cd dags || exit python3 -m coverage run --omit=tests/* -m pytest tests cp .coverage ../.coverage +cd .. || exit python3 -m coverage lcov -i -o coverage/lcov.info \ No newline at end of file From b1f7e6b8de9590a02eabb767bcbd1853772bf160 Mon Sep 17 00:00:00 2001 From: Mohammad Twin Date: Mon, 18 Dec 2023 15:31:12 +0400 Subject: [PATCH 19/26] feat: add 'coverage' file to the .gitignore file --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index c73c1acb..63a32c88 100644 --- a/.gitignore +++ b/.gitignore @@ -55,6 +55,7 @@ coverage.xml .hypothesis/ .pytest_cache/ cover/ +coverage # Translations *.mo From b7218969d9b81601d0c8c4834510abb9b5478ff6 Mon Sep 17 00:00:00 2001 From: Mohammad Twin Date: Mon, 18 Dec 2023 15:46:11 +0400 Subject: [PATCH 20/26] feat: test staged was added to Dockerfile and docker-compose --- TestDockerfile => Dockerfile | 3 ++- docker-compose.test.yml | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) rename TestDockerfile => Dockerfile (81%) diff --git a/TestDockerfile b/Dockerfile similarity index 81% rename from TestDockerfile rename to Dockerfile index 27e9a300..67a72a0a 100644 --- a/TestDockerfile +++ b/Dockerfile @@ -1,8 +1,9 @@ # It's recommended that we use `bullseye` for Python (alpine isn't suitable as it conflcts with numpy) -FROM python:3.10-bullseye +FROM python:3.10-bullseye AS base WORKDIR /project COPY . . RUN pip3 install -r requirements.txt +FROM base AS test RUN chmod +x docker-entrypoint.sh CMD ["./docker-entrypoint.sh"] \ No newline at end of file diff --git a/docker-compose.test.yml b/docker-compose.test.yml index dcbf7b8b..8a6c6c55 100644 --- a/docker-compose.test.yml +++ b/docker-compose.test.yml @@ -4,6 +4,7 @@ services: app: build: context: . + target: test dockerfile: TestDockerfile environment: - PORT=3000 From 5a2708c82c10f8800148c720a02b8a8ea2147586 Mon Sep 17 00:00:00 2001 From: Mohammad Twin Date: Mon, 18 Dec 2023 15:47:34 +0400 Subject: [PATCH 21/26] fix: typo was fixed --- docker-compose.test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.test.yml b/docker-compose.test.yml index 8a6c6c55..0c917f1b 100644 --- a/docker-compose.test.yml +++ b/docker-compose.test.yml @@ -5,7 +5,7 @@ services: build: context: . target: test - dockerfile: TestDockerfile + dockerfile: Dockerfile environment: - PORT=3000 - MONGODB_HOST=mongo From 174289d00d43d5bad522ab7a5ecf6a7cce275af4 Mon Sep 17 00:00:00 2001 From: Mohammad Twin Date: Mon, 18 Dec 2023 16:04:14 +0400 Subject: [PATCH 22/26] fix: remove production.yml file as we don't need that for now --- .github/workflows/production.yml | 12 ------------ 1 file changed, 12 deletions(-) delete mode 100644 .github/workflows/production.yml diff --git a/.github/workflows/production.yml b/.github/workflows/production.yml deleted file mode 100644 index a1be27ba..00000000 --- a/.github/workflows/production.yml +++ /dev/null @@ -1,12 +0,0 @@ -name: Production CI/CD Pipeline - -on: - push: - branches: - - main - -jobs: - ci: - uses: TogetherCrew/operations/.github/workflows/ci.yml@main - secrets: - CC_TEST_REPORTER_ID: ${{ secrets.CC_TEST_REPORTER_ID }} \ No newline at end of file From fdd22582ba49861f9e4426cefadd22ca7e77c57d Mon Sep 17 00:00:00 2001 From: Mohammad Twin Date: Tue, 19 Dec 2023 12:13:37 +0400 Subject: [PATCH 23/26] feat: add prod stage to the Dockerfile --- Dockerfile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 67a72a0a..56eba672 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,4 +6,7 @@ RUN pip3 install -r requirements.txt FROM base AS test RUN chmod +x docker-entrypoint.sh -CMD ["./docker-entrypoint.sh"] \ No newline at end of file +CMD ["./docker-entrypoint.sh"] + +FROM base AS prod +CMD ["echo", "aiflow dags should be running on airlfow container"] \ No newline at end of file From 464137aee42abce7843c70535d6320ca332b5203 Mon Sep 17 00:00:00 2001 From: Mohammad Twin Date: Tue, 19 Dec 2023 12:18:27 +0400 Subject: [PATCH 24/26] feat: aligned codes with lint --- dags/github_api_helpers/comments.py | 8 ++- dags/github_api_helpers/commits.py | 12 +++-- dags/github_api_helpers/issues.py | 14 +++-- dags/github_api_helpers/labels.py | 5 +- dags/github_api_helpers/orgs.py | 4 +- dags/github_api_helpers/pull_requests.py | 66 +++++++++++++++++------- dags/github_api_helpers/repos.py | 16 ++++-- 7 files changed, 92 insertions(+), 33 deletions(-) diff --git a/dags/github_api_helpers/comments.py b/dags/github_api_helpers/comments.py index dcb23358..4315fd8e 100644 --- a/dags/github_api_helpers/comments.py +++ b/dags/github_api_helpers/comments.py @@ -45,7 +45,9 @@ def fetch_repo_review_comments_page( ) ) - logging.info(f"Found {len(updated_response_data)} comments for {owner}/{repo} on page {page}. comments: {updated_response_data}") + logging.info( + f"Found {len(updated_response_data)} comments for {owner}/{repo} on page {page}. comments: {updated_response_data}" + ) return updated_response_data @@ -117,7 +119,9 @@ def fetch_repo_issues_and_prs_comments_page( map(lambda x: {**x, **extract_type_from_comment_response(x)}, response_data) ) - logging.info(f"Found {len(updated_response_data)} comments for {owner}/{repo} on page {page}. comments: {updated_response_data}") + logging.info( + f"Found {len(updated_response_data)} comments for {owner}/{repo} on page {page}. comments: {updated_response_data}" + ) return updated_response_data diff --git a/dags/github_api_helpers/commits.py b/dags/github_api_helpers/commits.py index c3133c22..6bcc0709 100644 --- a/dags/github_api_helpers/commits.py +++ b/dags/github_api_helpers/commits.py @@ -18,7 +18,9 @@ def fetch_commits(owner: str, repo: str, page: int, per_page: int = 100): response = get(endpoint, params=params) response_data = response.json() - logging.info(f"Found {len(response_data)} commits for {owner}/{repo} on page {page}. Commits: {response_data}") + logging.info( + f"Found {len(response_data)} commits for {owner}/{repo} on page {page}. Commits: {response_data}" + ) return response_data @@ -61,7 +63,9 @@ def fetch_commit_details(owner: str, repo: str, commit_sha: str): response = get(endpoint) response_data = response.json() - logging.info(f"Found details for commit {commit_sha} of {owner}/{repo}: {response_data}") + logging.info( + f"Found details for commit {commit_sha} of {owner}/{repo}: {response_data}" + ) return response_data @@ -77,7 +81,9 @@ def fetch_commit_files(owner: str, repo: str, sha: str): logging.info(f"Fetching files changed in commit {sha} of {owner}/{repo}...") commit_details = fetch_commit_details(owner, repo, sha) if "files" in commit_details: - logging.info(f"Found {len(commit_details['files'])} files changed in commit {sha} of {owner}/{repo}.") + logging.info( + f"Found {len(commit_details['files'])} files changed in commit {sha} of {owner}/{repo}." + ) return commit_details["files"] else: logging.info(f"No files changed in commit {sha} of {owner}/{repo}.") diff --git a/dags/github_api_helpers/issues.py b/dags/github_api_helpers/issues.py index 6913d656..9cd113fc 100644 --- a/dags/github_api_helpers/issues.py +++ b/dags/github_api_helpers/issues.py @@ -27,7 +27,9 @@ def fetch_issues(owner: str, repo: str, page: int, per_page: int = 100): issues = [issue for issue in response_data if "pull_request" not in issue] is_more_issues = len(response_data) == per_page - logging.info(f"Found {len(issues)} issues for {owner}/{repo} on page {page}. Issues: {issues}") + logging.info( + f"Found {len(issues)} issues for {owner}/{repo} on page {page}. Issues: {issues}" + ) return issues, is_more_issues @@ -78,7 +80,9 @@ def fetch_issue_comments( response = get(endpoint, params=params) response_data = response.json() - logging.info(f"Found {len(response_data)} comments for issue {issue_number} on page {page}. Comments: {response_data}") + logging.info( + f"Found {len(response_data)} comments for issue {issue_number} on page {page}. Comments: {response_data}" + ) return response_data @@ -101,6 +105,8 @@ def get_all_comments_of_issue(owner: str, repo: str, issue_number: int): break all_comments.extend(comments) current_page += 1 - - logging.info(f"Found a total of {len(all_comments)} comments for issue {issue_number}.") + + logging.info( + f"Found a total of {len(all_comments)} comments for issue {issue_number}." + ) return all_comments diff --git a/dags/github_api_helpers/labels.py b/dags/github_api_helpers/labels.py index f556d7a8..6f4480c7 100644 --- a/dags/github_api_helpers/labels.py +++ b/dags/github_api_helpers/labels.py @@ -1,6 +1,7 @@ from .smart_proxy import get import logging + def fetch_repo_labels_page(owner: str, repo: str, page: int, per_page: int = 100): """ Fetches the labels for a specific repository in GitHub. @@ -17,7 +18,9 @@ def fetch_repo_labels_page(owner: str, repo: str, page: int, per_page: int = 100 response = get(endpoint, params=params) response_data = response.json() - logging.info(f"Found {len(response_data)} labels for {owner}/{repo} on page {page}. Labels: {response_data}") + logging.info( + f"Found {len(response_data)} labels for {owner}/{repo} on page {page}. Labels: {response_data}" + ) return response_data diff --git a/dags/github_api_helpers/orgs.py b/dags/github_api_helpers/orgs.py index fff424a5..c8fd34d6 100644 --- a/dags/github_api_helpers/orgs.py +++ b/dags/github_api_helpers/orgs.py @@ -34,7 +34,9 @@ def fetch_org_members_page(org: str, page: int, per_page: int = 100): response = get(endpoint, params=params) response_data = response.json() - logging.info(f"Found {len(response_data)} members for organization {org} on page {page}. Members: {response_data}") + logging.info( + f"Found {len(response_data)} members for organization {org} on page {page}. Members: {response_data}" + ) return response_data diff --git a/dags/github_api_helpers/pull_requests.py b/dags/github_api_helpers/pull_requests.py index 75597594..a6736000 100644 --- a/dags/github_api_helpers/pull_requests.py +++ b/dags/github_api_helpers/pull_requests.py @@ -22,7 +22,9 @@ def fetch_pull_requests(owner: str, repo: str, page: int, per_page: int = 100): response = get(endpoint, params=params) response_data = response.json() - logging.info(f"Found {len(response_data)} pull requests for {owner}/{repo} on page {page}. Pull requests: {response_data}") + logging.info( + f"Found {len(response_data)} pull requests for {owner}/{repo} on page {page}. Pull requests: {response_data}" + ) return response_data @@ -49,7 +51,9 @@ def get_all_pull_requests(owner: str, repo: str): all_pull_requests.extend(pull_requests) current_page += 1 - logging.info(f"Found a total of {len(all_pull_requests)} pull requests for {owner}/{repo}.") + logging.info( + f"Found a total of {len(all_pull_requests)} pull requests for {owner}/{repo}." + ) return all_pull_requests @@ -74,7 +78,9 @@ def fetch_pull_requests_commits( response = get(endpoint, params=params) response_data = response.json() - logging.info(f"Found {len(response_data)} commits for pull request {pull_number} on page {page}. Commits: {response_data}") + logging.info( + f"Found {len(response_data)} commits for pull request {pull_number} on page {page}. Commits: {response_data}" + ) return response_data @@ -101,7 +107,9 @@ def get_all_commits_of_pull_request(owner: str, repo: str, pull_number: int): all_commits.extend(commits) current_page += 1 - logging.info(f"Found a total of {len(all_commits)} commits for pull request {pull_number}.") + logging.info( + f"Found a total of {len(all_commits)} commits for pull request {pull_number}." + ) return all_commits @@ -125,7 +133,9 @@ def fetch_pull_request_comments( response = get(endpoint, params=params) response_data = response.json() - logging.info(f"Found {len(response_data)} comments for pull request {issue_number} on page {page}. Comments: {response_data}") + logging.info( + f"Found {len(response_data)} comments for pull request {issue_number} on page {page}. Comments: {response_data}" + ) return response_data @@ -149,7 +159,9 @@ def get_all_comments_of_pull_request(owner: str, repo: str, issue_number: int): all_comments.extend(comments) current_page += 1 - logging.info(f"Found a total of {len(all_comments)} comments for pull request {issue_number}.") + logging.info( + f"Found a total of {len(all_comments)} comments for pull request {issue_number}." + ) return all_comments @@ -173,7 +185,9 @@ def fetch_pull_request_review_comments( response = get(endpoint, params=params) response_data = response.json() - logging.info(f"Found {len(response_data)} review comments for pull request {pull_number} on page {page}. Comments: {response_data}") + logging.info( + f"Found {len(response_data)} review comments for pull request {pull_number} on page {page}. Comments: {response_data}" + ) return response_data @@ -198,8 +212,10 @@ def get_all_review_comments_of_pull_request(owner: str, repo: str, pull_number: break all_comments.extend(comments) current_page += 1 - - logging.info(f"Found a total of {len(all_comments)} review comments for pull request {pull_number}.") + + logging.info( + f"Found a total of {len(all_comments)} review comments for pull request {pull_number}." + ) return all_comments @@ -221,7 +237,9 @@ def fetch_review_comment_reactions( response = get(endpoint, params=params) response_data = response.json() - logging.info(f"Found {len(response_data)} reactions for review comment {comment_id} on page {page}. Reactions: {response_data}") + logging.info( + f"Found {len(response_data)} reactions for review comment {comment_id} on page {page}. Reactions: {response_data}" + ) return response_data @@ -245,7 +263,9 @@ def get_all_reactions_of_review_comment(owner: str, repo: str, comment_id: int): all_reactions.extend(reactions) current_page += 1 - logging.info(f"Found a total of {len(all_reactions)} reactions for review comment {comment_id}.") + logging.info( + f"Found a total of {len(all_reactions)} reactions for review comment {comment_id}." + ) return all_reactions @@ -267,7 +287,9 @@ def fetch_comment_reactions( response = get(endpoint, params=params) response_data = response.json() - logging.info(f"Found {len(response_data)} reactions for comment {comment_id} on page {page}. Reactions: {response_data}") + logging.info( + f"Found {len(response_data)} reactions for comment {comment_id} on page {page}. Reactions: {response_data}" + ) return response_data @@ -290,8 +312,10 @@ def get_all_reactions_of_comment(owner: str, repo: str, comment_id: int): break all_reactions.extend(reactions) current_page += 1 - - logging.info(f"Found a total of {len(all_reactions)} reactions for comment {comment_id}.") + + logging.info( + f"Found a total of {len(all_reactions)} reactions for comment {comment_id}." + ) return all_reactions @@ -315,7 +339,9 @@ def fetch_pull_request_reviews( response = get(endpoint, params=params) response_data = response.json() - logging.info(f"Found {len(response_data)} reviews for pull request {pull_number} on page {page}. Reviews: {response_data}") + logging.info( + f"Found {len(response_data)} reviews for pull request {pull_number} on page {page}. Reviews: {response_data}" + ) return response_data @@ -338,8 +364,10 @@ def get_all_reviews_of_pull_request(owner: str, repo: str, pull_number: int): break all_reviews.extend(reviews) current_page += 1 - - logging.info(f"Found a total of {len(all_reviews)} reviews for pull request {pull_number}.") + + logging.info( + f"Found a total of {len(all_reviews)} reviews for pull request {pull_number}." + ) return all_reviews @@ -362,7 +390,9 @@ def fetch_pull_request_files_page( response = get(endpoint, params=params) response_data = response.json() - logging.info(f"Found {len(response_data)} files for pull request {pull_number} on page {page}. Files: {response_data}") + logging.info( + f"Found {len(response_data)} files for pull request {pull_number} on page {page}. Files: {response_data}" + ) return response_data diff --git a/dags/github_api_helpers/repos.py b/dags/github_api_helpers/repos.py index 0f075b89..ef2f8a45 100644 --- a/dags/github_api_helpers/repos.py +++ b/dags/github_api_helpers/repos.py @@ -17,7 +17,9 @@ def fetch_org_repos_page(org_name: str, page: int, per_page: int = 100): response = get(endpoint, params=params) response_data = response.json() - logging.info(f"Found {len(response_data)} repos for organization {org_name} on page {page}. Repos: {response_data}") + logging.info( + f"Found {len(response_data)} repos for organization {org_name} on page {page}. Repos: {response_data}" + ) return response_data @@ -42,7 +44,9 @@ def get_all_org_repos(org_name: str): all_repos.extend(repos) current_page += 1 - logging.info(f"Found a total of {len(all_repos)} repos for organization {org_name}.") + logging.info( + f"Found a total of {len(all_repos)} repos for organization {org_name}." + ) return all_repos @@ -62,7 +66,9 @@ def fetch_repo_contributors_page(owner: str, repo: str, page: int, per_page: int response = get(endpoint, params=params) response_data = response.json() - logging.info(f"Found {len(response_data)} contributors for {owner}/{repo} on page {page}. Contributors: {response_data}") + logging.info( + f"Found {len(response_data)} contributors for {owner}/{repo} on page {page}. Contributors: {response_data}" + ) return response_data @@ -88,5 +94,7 @@ def get_all_repo_contributors(owner: str, repo: str): all_contributors.extend(contributors) current_page += 1 - logging.info(f"Found a total of {len(all_contributors)} contributors for {owner}/{repo}.") + logging.info( + f"Found a total of {len(all_contributors)} contributors for {owner}/{repo}." + ) return all_contributors From 6eacf8e80f5ac5f82d462fc90229ea6577422071 Mon Sep 17 00:00:00 2001 From: Mohammad Twin Date: Tue, 19 Dec 2023 12:33:28 +0400 Subject: [PATCH 25/26] feat: aligned with lint --- dags/github_api_helpers/comments.py | 1 + dags/github_api_helpers/commits.py | 1 + dags/github_api_helpers/issues.py | 1 + dags/github_api_helpers/labels.py | 3 ++- dags/github_api_helpers/orgs.py | 1 + dags/github_api_helpers/pull_requests.py | 1 + dags/github_api_helpers/repos.py | 1 + dags/github_api_helpers/smart_proxy.py | 3 ++- 8 files changed, 10 insertions(+), 2 deletions(-) diff --git a/dags/github_api_helpers/comments.py b/dags/github_api_helpers/comments.py index 4315fd8e..93880133 100644 --- a/dags/github_api_helpers/comments.py +++ b/dags/github_api_helpers/comments.py @@ -1,4 +1,5 @@ import logging + from .smart_proxy import get diff --git a/dags/github_api_helpers/commits.py b/dags/github_api_helpers/commits.py index 6bcc0709..d1dc487a 100644 --- a/dags/github_api_helpers/commits.py +++ b/dags/github_api_helpers/commits.py @@ -1,4 +1,5 @@ import logging + from .smart_proxy import get diff --git a/dags/github_api_helpers/issues.py b/dags/github_api_helpers/issues.py index 9cd113fc..44d6f4c6 100644 --- a/dags/github_api_helpers/issues.py +++ b/dags/github_api_helpers/issues.py @@ -1,4 +1,5 @@ import logging + from .smart_proxy import get diff --git a/dags/github_api_helpers/labels.py b/dags/github_api_helpers/labels.py index 6f4480c7..a903bc37 100644 --- a/dags/github_api_helpers/labels.py +++ b/dags/github_api_helpers/labels.py @@ -1,6 +1,7 @@ -from .smart_proxy import get import logging +from .smart_proxy import get + def fetch_repo_labels_page(owner: str, repo: str, page: int, per_page: int = 100): """ diff --git a/dags/github_api_helpers/orgs.py b/dags/github_api_helpers/orgs.py index c8fd34d6..5bdc9874 100644 --- a/dags/github_api_helpers/orgs.py +++ b/dags/github_api_helpers/orgs.py @@ -1,4 +1,5 @@ import logging + from .smart_proxy import get diff --git a/dags/github_api_helpers/pull_requests.py b/dags/github_api_helpers/pull_requests.py index a6736000..6ea5fb5d 100644 --- a/dags/github_api_helpers/pull_requests.py +++ b/dags/github_api_helpers/pull_requests.py @@ -1,4 +1,5 @@ import logging + from .smart_proxy import get diff --git a/dags/github_api_helpers/repos.py b/dags/github_api_helpers/repos.py index ef2f8a45..781b0d49 100644 --- a/dags/github_api_helpers/repos.py +++ b/dags/github_api_helpers/repos.py @@ -1,4 +1,5 @@ import logging + from .smart_proxy import get diff --git a/dags/github_api_helpers/smart_proxy.py b/dags/github_api_helpers/smart_proxy.py index a646e264..e1d2804d 100644 --- a/dags/github_api_helpers/smart_proxy.py +++ b/dags/github_api_helpers/smart_proxy.py @@ -1,7 +1,8 @@ import logging -import requests import random +import requests + def get(url: str, params=None): """ From 367289153c413d9dde860476857a572521f9592a Mon Sep 17 00:00:00 2001 From: Mohammad Twin Date: Tue, 19 Dec 2023 12:53:38 +0400 Subject: [PATCH 26/26] feat: short the lines --- dags/github_api_helpers/comments.py | 16 ++++++++++------ dags/github_api_helpers/pull_requests.py | 16 ++++++++++------ 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/dags/github_api_helpers/comments.py b/dags/github_api_helpers/comments.py index 93880133..8a07182f 100644 --- a/dags/github_api_helpers/comments.py +++ b/dags/github_api_helpers/comments.py @@ -46,9 +46,11 @@ def fetch_repo_review_comments_page( ) ) - logging.info( - f"Found {len(updated_response_data)} comments for {owner}/{repo} on page {page}. comments: {updated_response_data}" - ) + msg = f"Found {len(updated_response_data)}" + msg += f" review comments for {owner}/{repo} on page {page}." + msg += f" comments: {updated_response_data}" + logging.info(msg) + return updated_response_data @@ -120,9 +122,11 @@ def fetch_repo_issues_and_prs_comments_page( map(lambda x: {**x, **extract_type_from_comment_response(x)}, response_data) ) - logging.info( - f"Found {len(updated_response_data)} comments for {owner}/{repo} on page {page}. comments: {updated_response_data}" - ) + msg = f"Found {len(updated_response_data)}" + msg += f" comments for {owner}/{repo} on page {page}." + msg += f" comments: {updated_response_data}" + logging.info(msg) + return updated_response_data diff --git a/dags/github_api_helpers/pull_requests.py b/dags/github_api_helpers/pull_requests.py index 6ea5fb5d..8a86a4fb 100644 --- a/dags/github_api_helpers/pull_requests.py +++ b/dags/github_api_helpers/pull_requests.py @@ -186,9 +186,11 @@ def fetch_pull_request_review_comments( response = get(endpoint, params=params) response_data = response.json() - logging.info( - f"Found {len(response_data)} review comments for pull request {pull_number} on page {page}. Comments: {response_data}" - ) + msg = f"Found {len(response_data)} review comments" + msg += f"for pull request {pull_number} on page {page}." + msg += f"Comments: {response_data}" + logging.info(msg) + return response_data @@ -238,9 +240,11 @@ def fetch_review_comment_reactions( response = get(endpoint, params=params) response_data = response.json() - logging.info( - f"Found {len(response_data)} reactions for review comment {comment_id} on page {page}. Reactions: {response_data}" - ) + msg = f"Found {len(response_data)} reactions" + msg += f"for review comment {comment_id} on page {page}." + msg += f"Reactions: {response_data}" + logging.info(msg) + return response_data