diff --git a/dags/github_api_helpers/comments.py b/dags/github_api_helpers/comments.py index 77c675a0..dcb23358 100644 --- a/dags/github_api_helpers/comments.py +++ b/dags/github_api_helpers/comments.py @@ -1,3 +1,4 @@ +import logging from .smart_proxy import get @@ -44,6 +45,7 @@ def fetch_repo_review_comments_page( ) ) + logging.info(f"Found {len(updated_response_data)} comments for {owner}/{repo} on page {page}. comments: {updated_response_data}") return updated_response_data @@ -55,10 +57,12 @@ def get_all_repo_review_comments(owner: str, repo: str): :param repo: The name of the repository. :return: A list of all review comments for the specified repository. """ + logging.info(f"Fetching all comments for {owner}/{repo}...") all_comments = [] current_page = 1 while True: + logging.info(f"Fetching page {current_page} of comments...") comments = fetch_repo_review_comments_page(owner, repo, current_page) if not comments: @@ -67,6 +71,7 @@ def get_all_repo_review_comments(owner: str, repo: str): all_comments.extend(comments) current_page += 1 + logging.info(f"Found a total of {len(all_comments)} comments for {owner}/{repo}.") return all_comments @@ -112,6 +117,7 @@ def fetch_repo_issues_and_prs_comments_page( map(lambda x: {**x, **extract_type_from_comment_response(x)}, response_data) ) + logging.info(f"Found {len(updated_response_data)} comments for {owner}/{repo} on page {page}. comments: {updated_response_data}") return updated_response_data @@ -123,10 +129,12 @@ def get_all_repo_issues_and_prs_comments(owner: str, repo: str): :param repo: The name of the repository. :return: A list of all review comments for the specified repository. """ + logging.info(f"Fetching all comments for {owner}/{repo}...") all_comments = [] current_page = 1 while True: + logging.info(f"Fetching page {current_page} of comments...") comments = fetch_repo_issues_and_prs_comments_page(owner, repo, current_page) if not comments: @@ -135,4 +143,5 @@ def get_all_repo_issues_and_prs_comments(owner: str, repo: str): all_comments.extend(comments) current_page += 1 + logging.info(f"Found a total of {len(all_comments)} comments for {owner}/{repo}.") return all_comments diff --git a/dags/github_api_helpers/commits.py b/dags/github_api_helpers/commits.py index 56d880e9..c3133c22 100644 --- a/dags/github_api_helpers/commits.py +++ b/dags/github_api_helpers/commits.py @@ -1,4 +1,4 @@ -import requests +import logging from .smart_proxy import get @@ -18,6 +18,7 @@ def fetch_commits(owner: str, repo: str, page: int, per_page: int = 100): response = get(endpoint, params=params) response_data = response.json() + logging.info(f"Found {len(response_data)} commits for {owner}/{repo} on page {page}. Commits: {response_data}") return response_data @@ -29,10 +30,12 @@ def get_all_commits(owner: str, repo: str): :param repo: The name of the repository. :return: A list of all commits for the specified repo. """ + logging.info(f"Fetching all commits for {owner}/{repo}...") all_commits = [] current_page = 1 while True: + logging.info(f"Fetching page {current_page} of commits...") commits = fetch_commits(owner, repo, current_page) if not commits: @@ -41,6 +44,7 @@ def get_all_commits(owner: str, repo: str): all_commits.extend(commits) current_page += 1 + logging.info(f"Found a total of {len(all_commits)} commits for {owner}/{repo}.") return all_commits @@ -55,7 +59,10 @@ def fetch_commit_details(owner: str, repo: str, commit_sha: str): """ endpoint = f"https://api.github.com/repos/{owner}/{repo}/commits/{commit_sha}" response = get(endpoint) - return response.json() + response_data = response.json() + + logging.info(f"Found details for commit {commit_sha} of {owner}/{repo}: {response_data}") + return response_data def fetch_commit_files(owner: str, repo: str, sha: str): @@ -67,8 +74,11 @@ def fetch_commit_files(owner: str, repo: str, sha: str): :param sha: The SHA identifier of the commit. :return: A list of files changed in the specified commit. """ + logging.info(f"Fetching files changed in commit {sha} of {owner}/{repo}...") commit_details = fetch_commit_details(owner, repo, sha) if "files" in commit_details: + logging.info(f"Found {len(commit_details['files'])} files changed in commit {sha} of {owner}/{repo}.") return commit_details["files"] else: + logging.info(f"No files changed in commit {sha} of {owner}/{repo}.") return [] diff --git a/dags/github_api_helpers/issues.py b/dags/github_api_helpers/issues.py index 19470d61..65c74c42 100644 --- a/dags/github_api_helpers/issues.py +++ b/dags/github_api_helpers/issues.py @@ -1,4 +1,4 @@ -import requests +import logging from .smart_proxy import get @@ -27,6 +27,7 @@ def fetch_issues(owner: str, repo: str, page: int, per_page: int = 100): issues = [issue for issue in response_data if "pull_request" not in issue] is_more_issues = len(response_data) == per_page + logging.info(f"Found {len(issues)} issues for {owner}/{repo} on page {page}. Issues: {issues}") return issues, is_more_issues @@ -38,10 +39,12 @@ def get_all_issues(owner: str, repo: str): :param repo: The name of the repository. :return: A list of all issues for the specified repo. """ + logging.info(f"Fetching all issues for {owner}/{repo}...") all_issues = [] current_page = 1 while True: + logging.info(f"Fetching page {current_page} of issues...") issues, is_more_issues = fetch_issues(owner, repo, current_page) all_issues.extend(issues) @@ -50,6 +53,7 @@ def get_all_issues(owner: str, repo: str): current_page += 1 + logging.info(f"Found a total of {len(all_issues)} issues for {owner}/{repo}.") return all_issues @@ -72,7 +76,10 @@ def fetch_issue_comments( ) params = {"page": page, "per_page": per_page} response = get(endpoint, params=params) - return response.json() + response_data = response.json() + + logging.info(f"Found {len(response_data)} comments for issue {issue_number} on page {page}. Comments: {response_data}") + return response_data def get_all_comments_of_issue(owner: str, repo: str, issue_number: int): @@ -84,12 +91,16 @@ def get_all_comments_of_issue(owner: str, repo: str, issue_number: int): :param issue_number: The number of the issue. :return: A list of all comments for the specified issue. """ + logging.info(f"Fetching all comments for issue {issue_number}...") all_comments = [] current_page = 1 while True: + logging.info(f"Fetching page {current_page} of comments...") comments = fetch_pull_request_comments(owner, repo, issue_number, current_page) if not comments: # Break the loop if no more comments are found break all_comments.extend(comments) current_page += 1 + + logging.info(f"Found a total of {len(all_comments)} comments for issue {issue_number}.") return all_comments diff --git a/dags/github_api_helpers/labels.py b/dags/github_api_helpers/labels.py index 7d0259da..f556d7a8 100644 --- a/dags/github_api_helpers/labels.py +++ b/dags/github_api_helpers/labels.py @@ -1,5 +1,5 @@ from .smart_proxy import get - +import logging def fetch_repo_labels_page(owner: str, repo: str, page: int, per_page: int = 100): """ @@ -17,6 +17,7 @@ def fetch_repo_labels_page(owner: str, repo: str, page: int, per_page: int = 100 response = get(endpoint, params=params) response_data = response.json() + logging.info(f"Found {len(response_data)} labels for {owner}/{repo} on page {page}. Labels: {response_data}") return response_data @@ -28,17 +29,19 @@ def get_all_repo_labels(owner: str, repo: str): :param repo: The name of the repository. :return: A list of labels for the specified repository. """ + logging.info(f"Fetching all labels for {owner}/{repo}...") all_labels = [] current_page = 1 while True: + logging.info(f"Fetching page {current_page} of labels...") labels = fetch_repo_labels_page(owner, repo, current_page) if not labels: break # No more labels to fetch - print("-> ", labels) all_labels.extend(labels) current_page += 1 + logging.info(f"Found a total of {len(all_labels)} labels for {owner}/{repo}.") return all_labels diff --git a/dags/github_api_helpers/orgs.py b/dags/github_api_helpers/orgs.py index 83f458c0..fff424a5 100644 --- a/dags/github_api_helpers/orgs.py +++ b/dags/github_api_helpers/orgs.py @@ -1,4 +1,4 @@ -import requests +import logging from .smart_proxy import get @@ -9,11 +9,13 @@ def fetch_org_details(org_name: str): :param org_name: The name of the organization. :return: A dict containing the details of the specified organization. """ + logging.info(f"Fetching details for organization {org_name}...") endpoint = f"https://api.github.com/orgs/{org_name}" response = get(endpoint) response_data = response.json() + logging.info(f"Found details for organization {org_name}: {response_data}") return response_data @@ -32,6 +34,7 @@ def fetch_org_members_page(org: str, page: int, per_page: int = 100): response = get(endpoint, params=params) response_data = response.json() + logging.info(f"Found {len(response_data)} members for organization {org} on page {page}. Members: {response_data}") return response_data @@ -42,10 +45,12 @@ def get_all_org_members(org: str): :param org: The name of the organization. :return: A list of members of the organization. """ + logging.info(f"Fetching all members for organization {org}...") all_members = [] current_page = 1 while True: + logging.info(f"Fetching page {current_page} of members...") members = fetch_org_members_page(org, current_page) if not members: @@ -54,4 +59,5 @@ def get_all_org_members(org: str): all_members.extend(members) current_page += 1 + logging.info(f"Found a total of {len(all_members)} members for organization {org}.") return all_members diff --git a/dags/github_api_helpers/pull_requests.py b/dags/github_api_helpers/pull_requests.py index 94820416..25c76979 100644 --- a/dags/github_api_helpers/pull_requests.py +++ b/dags/github_api_helpers/pull_requests.py @@ -1,3 +1,4 @@ +import logging from .smart_proxy import get @@ -21,6 +22,7 @@ def fetch_pull_requests(owner: str, repo: str, page: int, per_page: int = 100): response = get(endpoint, params=params) response_data = response.json() + logging.info(f"Found {len(response_data)} pull requests for {owner}/{repo} on page {page}. Pull requests: {response_data}") return response_data @@ -33,10 +35,12 @@ def get_all_pull_requests(owner: str, repo: str): :param pull_number: The number of the pull request. :return: A list of all commits for the specified pull request. """ + logging.info(f"Fetching all pull requests for {owner}/{repo}...") all_pull_requests = [] current_page = 1 while True: + logging.info(f"Fetching page {current_page} of pull requests...") pull_requests = fetch_pull_requests(owner, repo, current_page) if not pull_requests: @@ -45,6 +49,7 @@ def get_all_pull_requests(owner: str, repo: str): all_pull_requests.extend(pull_requests) current_page += 1 + logging.info(f"Found a total of {len(all_pull_requests)} pull requests for {owner}/{repo}.") return all_pull_requests @@ -69,6 +74,7 @@ def fetch_pull_requests_commits( response = get(endpoint, params=params) response_data = response.json() + logging.info(f"Found {len(response_data)} commits for pull request {pull_number} on page {page}. Commits: {response_data}") return response_data @@ -81,10 +87,12 @@ def get_all_commits_of_pull_request(owner: str, repo: str, pull_number: int): :param pull_number: The number of the pull request. :return: A list of all commits for the specified pull request. """ + logging.info(f"Fetching all commits for pull request {pull_number}...") all_commits = [] current_page = 1 while True: + logging.info(f"Fetching page {current_page} of commits...") commits = fetch_pull_requests_commits(owner, repo, pull_number, current_page) if not commits: @@ -93,6 +101,7 @@ def get_all_commits_of_pull_request(owner: str, repo: str, pull_number: int): all_commits.extend(commits) current_page += 1 + logging.info(f"Found a total of {len(all_commits)} commits for pull request {pull_number}.") return all_commits @@ -114,7 +123,10 @@ def fetch_pull_request_comments( ) params = {"page": page, "per_page": per_page} response = get(endpoint, params=params) - return response.json() + response_data = response.json() + + logging.info(f"Found {len(response_data)} comments for pull request {issue_number} on page {page}. Comments: {response_data}") + return response_data def get_all_comments_of_pull_request(owner: str, repo: str, issue_number: int): @@ -126,14 +138,18 @@ def get_all_comments_of_pull_request(owner: str, repo: str, issue_number: int): :param issue_number: The number of the issue. :return: A list of all comments for the specified issue. """ + logging.info(f"Fetching all comments for pull request {issue_number}...") all_comments = [] current_page = 1 while True: + logging.info(f"Fetching page {current_page} of comments...") comments = fetch_pull_request_comments(owner, repo, issue_number, current_page) if not comments: # Break the loop if no more comments are found break all_comments.extend(comments) current_page += 1 + + logging.info(f"Found a total of {len(all_comments)} comments for pull request {issue_number}.") return all_comments @@ -155,7 +171,10 @@ def fetch_pull_request_review_comments( ) params = {"page": page, "per_page": per_page} response = get(endpoint, params=params) - return response.json() + response_data = response.json() + + logging.info(f"Found {len(response_data)} review comments for pull request {pull_number} on page {page}. Comments: {response_data}") + return response_data def get_all_review_comments_of_pull_request(owner: str, repo: str, pull_number: int): @@ -167,9 +186,11 @@ def get_all_review_comments_of_pull_request(owner: str, repo: str, pull_number: :param pull_number: The number of the pull request. :return: A list of all review comments for the specified pull request. """ + logging.info(f"Fetching all review comments for pull request {pull_number}...") all_comments = [] current_page = 1 while True: + logging.info(f"Fetching page {current_page} of review comments...") comments = fetch_pull_request_review_comments( owner, repo, pull_number, current_page ) @@ -177,6 +198,8 @@ def get_all_review_comments_of_pull_request(owner: str, repo: str, pull_number: break all_comments.extend(comments) current_page += 1 + + logging.info(f"Found a total of {len(all_comments)} review comments for pull request {pull_number}.") return all_comments @@ -196,7 +219,10 @@ def fetch_review_comment_reactions( endpoint = f"https://api.github.com/repos/{owner}/{repo}/pulls/comments/{comment_id}/reactions" params = {"page": page, "per_page": per_page} response = get(endpoint, params=params) - return response.json() + response_data = response.json() + + logging.info(f"Found {len(response_data)} reactions for review comment {comment_id} on page {page}. Reactions: {response_data}") + return response_data def get_all_reactions_of_review_comment(owner: str, repo: str, comment_id: int): @@ -208,14 +234,18 @@ def get_all_reactions_of_review_comment(owner: str, repo: str, comment_id: int): :param comment_id: The ID of the comment. :return: A list of all reactions for the specified pull request comment. """ + logging.info(f"Fetching all reactions for review comment {comment_id}...") all_reactions = [] current_page = 1 while True: + logging.info(f"Fetching page {current_page} of reactions...") reactions = fetch_comment_reactions(owner, repo, comment_id, current_page) if not reactions: # Break the loop if no more reactions are found break all_reactions.extend(reactions) current_page += 1 + + logging.info(f"Found a total of {len(all_reactions)} reactions for review comment {comment_id}.") return all_reactions @@ -238,7 +268,10 @@ def fetch_comment_reactions( } # Custom media type is required params = {"page": page, "per_page": per_page} response = get(endpoint, headers=headers, params=params) - return response.json() + response_data = response.json() + + logging.info(f"Found {len(response_data)} reactions for comment {comment_id} on page {page}. Reactions: {response_data}") + return response_data def get_all_reactions_of_comment(owner: str, repo: str, comment_id: int): @@ -250,14 +283,18 @@ def get_all_reactions_of_comment(owner: str, repo: str, comment_id: int): :param comment_id: The ID of the comment. :return: A list of all reactions for the specified issue comment. """ + logging.info(f"Fetching all reactions for comment {comment_id}...") all_reactions = [] current_page = 1 while True: + logging.info(f"Fetching page {current_page} of reactions...") reactions = fetch_comment_reactions(owner, repo, comment_id, current_page) if not reactions: # Break the loop if no more reactions are found break all_reactions.extend(reactions) current_page += 1 + + logging.info(f"Found a total of {len(all_reactions)} reactions for comment {comment_id}.") return all_reactions @@ -279,7 +316,10 @@ def fetch_pull_request_reviews( ) params = {"page": page, "per_page": per_page} response = get(endpoint, params=params) - return response.json() + response_data = response.json() + + logging.info(f"Found {len(response_data)} reviews for pull request {pull_number} on page {page}. Reviews: {response_data}") + return response_data def get_all_reviews_of_pull_request(owner: str, repo: str, pull_number: int): @@ -291,14 +331,18 @@ def get_all_reviews_of_pull_request(owner: str, repo: str, pull_number: int): :param pull_number: The number of the pull request. :return: A list of all reviews for the specified pull request. """ + logging.info(f"Fetching all reviews for pull request {pull_number}...") all_reviews = [] current_page = 1 while True: + logging.info(f"Fetching page {current_page} of reviews...") reviews = fetch_pull_request_reviews(owner, repo, pull_number, current_page) if not reviews: # Break the loop if no more reviews are found break all_reviews.extend(reviews) current_page += 1 + + logging.info(f"Found a total of {len(all_reviews)} reviews for pull request {pull_number}.") return all_reviews @@ -321,6 +365,7 @@ def fetch_pull_request_files_page( response = get(endpoint, params=params) response_data = response.json() + logging.info(f"Found {len(response_data)} files for pull request {pull_number} on page {page}. Files: {response_data}") return response_data @@ -333,13 +378,16 @@ def get_all_pull_request_files(owner: str, repo: str, pull_number: int): :param pull_number: The number of the pull request. :return: A list of all files for the specified pull request. """ + logging.info(f"Fetching all files for pull request {pull_number}...") files = [] page = 1 while True: + logging.info(f"Fetching page {page} of files...") page_files = fetch_pull_request_files_page(owner, repo, pull_number, page) if not page_files: break files.extend(page_files) page += 1 + logging.info(f"Found a total of {len(files)} files for pull request {pull_number}.") return files diff --git a/dags/github_api_helpers/repos.py b/dags/github_api_helpers/repos.py index 0d1318f6..0f075b89 100644 --- a/dags/github_api_helpers/repos.py +++ b/dags/github_api_helpers/repos.py @@ -1,3 +1,4 @@ +import logging from .smart_proxy import get @@ -16,6 +17,7 @@ def fetch_org_repos_page(org_name: str, page: int, per_page: int = 100): response = get(endpoint, params=params) response_data = response.json() + logging.info(f"Found {len(response_data)} repos for organization {org_name} on page {page}. Repos: {response_data}") return response_data @@ -26,10 +28,12 @@ def get_all_org_repos(org_name: str): :param org_name: The name of the organization. :return: A list of repos for the specified organization. """ + logging.info(f"Fetching all repos for organization {org_name}...") all_repos = [] current_page = 1 while True: + logging.info(f"Fetching page {current_page} of repos...") repos = fetch_org_repos_page(org_name, current_page) if not repos: @@ -38,6 +42,7 @@ def get_all_org_repos(org_name: str): all_repos.extend(repos) current_page += 1 + logging.info(f"Found a total of {len(all_repos)} repos for organization {org_name}.") return all_repos @@ -57,6 +62,7 @@ def fetch_repo_contributors_page(owner: str, repo: str, page: int, per_page: int response = get(endpoint, params=params) response_data = response.json() + logging.info(f"Found {len(response_data)} contributors for {owner}/{repo} on page {page}. Contributors: {response_data}") return response_data @@ -68,10 +74,12 @@ def get_all_repo_contributors(owner: str, repo: str): :param repo: The name of the repository. :return: A list of contributors for the specified repository. """ + logging.info(f"Fetching all contributors for {owner}/{repo}...") all_contributors = [] current_page = 1 while True: + logging.info(f"Fetching page {current_page} of contributors...") contributors = fetch_repo_contributors_page(owner, repo, current_page) if not contributors: @@ -80,4 +88,5 @@ def get_all_repo_contributors(owner: str, repo: str): all_contributors.extend(contributors) current_page += 1 + logging.info(f"Found a total of {len(all_contributors)} contributors for {owner}/{repo}.") return all_contributors diff --git a/dags/github_api_helpers/smart_proxy.py b/dags/github_api_helpers/smart_proxy.py index 1e98d54f..d6f8efd6 100644 --- a/dags/github_api_helpers/smart_proxy.py +++ b/dags/github_api_helpers/smart_proxy.py @@ -1,3 +1,4 @@ +import logging import requests import random @@ -16,7 +17,7 @@ def get(url: str, params=None): random_port = random.randint(20001, 29980) proxy_url = f"http://spusfxy185:TwinTwinTwin@eu.dc.smartproxy.com:{random_port}" - print("proxy_url: ", proxy_url) + logging.info(f"Using proxy {proxy_url}") proxies = { "http": proxy_url, "https": proxy_url,