Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add gitlab merge request and issue collection #2658

Merged
merged 8 commits into from
Dec 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions augur/application/cli/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,12 @@ def start(disable_collection, development, port):
logger.info("Deleting old task schedule")
os.remove("celerybeat-schedule.db")

celery_beat_process = None
celery_command = "celery -A augur.tasks.init.celery_app.celery_app beat -l debug"
celery_beat_process = subprocess.Popen(celery_command.split(" "))
with DatabaseSession(logger) as db_session:
config = AugurConfig(logger, db_session)
log_level = config.get_value("Logging", "log_level")
celery_beat_process = None
celery_command = f"celery -A augur.tasks.init.celery_app.celery_app beat -l {log_level.lower()}"
celery_beat_process = subprocess.Popen(celery_command.split(" "))

if not disable_collection:

Expand Down
79 changes: 76 additions & 3 deletions augur/application/db/data_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ def extract_needed_pr_message_ref_data(comment: dict, pull_request_id: int, repo
def extract_needed_pr_data(pr, repo_id, tool_source, tool_version):


pr_dict = {
pr = {
'repo_id': repo_id,
'pr_url': pr['url'],
# 1-22-2022 inconsistent casting; sometimes int, sometimes float in bulk_insert
Expand Down Expand Up @@ -367,7 +367,7 @@ def extract_needed_pr_data(pr, repo_id, tool_source, tool_version):
'data_source': 'GitHub API'
}

return pr_dict
return pr

def extract_needed_issue_data(issue: dict, repo_id: int, tool_source: str, tool_version: str, data_source: str):

Expand Down Expand Up @@ -513,8 +513,81 @@ def extract_needed_pr_review_data(review, pull_request_id, repo_id, platform_id,

return review_row

def extract_needed_pr_data_from_gitlab_merge_request(pr, repo_id, tool_source, tool_version):


pr_dict = {
'repo_id': repo_id,
'pr_url': pr['web_url'],
'pr_src_id': pr['id'],
'pr_src_node_id': None,
'pr_html_url': pr['web_url'],
'pr_diff_url': None,
'pr_patch_url': None,
'pr_issue_url': None,
'pr_augur_issue_id': None,
'pr_src_number': pr['iid'],
'pr_src_state': pr['state'],
'pr_src_locked': pr['discussion_locked'],
'pr_src_title': pr['title'],
# TODO: Add contributor logic for gitlab
'pr_augur_contributor_id': None,
'pr_body': pr['description'],
'pr_created_at': pr['created_at'],
'pr_updated_at': pr['updated_at'],
'pr_closed_at': pr['closed_at'],
'pr_merged_at': pr['merged_at'],
'pr_merge_commit_sha': pr['merge_commit_sha'],
'pr_teams': None,
'pr_milestone': pr['milestone'].get('title') if pr['milestone'] else None,
'pr_commits_url': None,
'pr_review_comments_url': None,
'pr_review_comment_url': None,
'pr_comments_url': None,
'pr_statuses_url': None,
'pr_meta_head_id': None,
'pr_meta_base_id': None,
'pr_src_issue_url': None,
'pr_src_comments_url': None,
'pr_src_review_comments_url': None,
'pr_src_commits_url': None,
'pr_src_statuses_url': None,
'pr_src_author_association': None,
'tool_source': tool_source,
'tool_version': tool_version,
'data_source': 'Gitlab API'
}

return pr_dict


def extract_needed_issue_data_from_gitlab_issue(issue: dict, repo_id: int, tool_source: str, tool_version: str, data_source: str):

issue_dict = {
"repo_id": repo_id,
"reporter_id": None,
"pull_request": None,
"pull_request_id": None,
"created_at": issue['created_at'],
"issue_title": issue['title'],
"issue_body": issue['description'] if 'description' in issue else None,
"comment_count": issue['user_notes_count'],
"updated_at": issue['updated_at'],
"closed_at": issue['closed_at'],
"repository_url": issue['_links']['project'],
"issue_url": issue['_links']['self'],
"labels_url": None,
"comments_url": issue['_links']['notes'],
"events_url": None,
"html_url": issue['_links']['self'],
"issue_state": issue['state'],
"issue_node_id": None,
"gh_issue_id": issue['id'],
"gh_issue_number": issue['iid'],
"gh_user_id": issue['author']['id'],
"tool_source": tool_source,
"tool_version": tool_version,
"data_source": data_source
}

return issue_dict

17 changes: 15 additions & 2 deletions augur/application/db/models/augur_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -1232,10 +1232,22 @@ def insert(session, repo_id):
repo_git = repo.repo_git

collection_status_unique = ["repo_id"]
pr_issue_count = 0
github_weight = 0
if "github" in repo_git:

try:
pr_issue_count = get_repo_weight_by_issue(session.logger, repo_git)
#session.logger.info(f"date weight: {calculate_date_weight_from_timestamps(repo.repo_added, None)}")
github_weight = pr_issue_count - calculate_date_weight_from_timestamps(repo.repo_added, None)
except Exception as e:
pr_issue_count = None
github_weight = None
session.logger.error(
''.join(traceback.format_exception(None, e, e.__traceback__)))

try:
pr_issue_count = get_repo_weight_by_issue(session.logger, repo_git)
#session.logger.info(f"date weight: {calculate_date_weight_from_timestamps(repo.repo_added, None)}")
pr_issue_count = 0
github_weight = pr_issue_count - calculate_date_weight_from_timestamps(repo.repo_added, None)
except Exception as e:
pr_issue_count = None
Expand All @@ -1251,6 +1263,7 @@ def insert(session, repo_id):
"secondary_weight": github_weight,
"ml_weight": github_weight
}


result = session.insert_data(record, CollectionStatus, collection_status_unique, on_conflict_update=False)

Expand Down
2 changes: 1 addition & 1 deletion augur/tasks/github/pull_requests/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def retrieve_all_pr_data(repo_git: str, logger, key_auth) -> None:

return all_data


def process_pull_requests(pull_requests, task_name, repo_id, logger, augur_db):

tool_source = "Pr Task"
Expand Down
2 changes: 1 addition & 1 deletion augur/tasks/github/util/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def parse_json_response(logger: logging.Logger, response: httpx.Response) -> dic
try:
return response.json()
except json.decoder.JSONDecodeError as e:
logger.warning(f"invalid return from GitHub. Response was: {response.text}. Exception: {e}")
logger.warning(f"invalid return. Response was: {response.text}. Exception: {e}")
return json.loads(json.dumps(response.text))

def get_repo_weight_by_issue(logger,repo_git):
Expand Down
12 changes: 6 additions & 6 deletions augur/tasks/gitlab/gitlab_api_key_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class GitlabApiKeyHandler():
Attributes:
session (DatabaseSession): Database connection
logger (logging.Logger): Handles all logs
oauth_redis_key (str): The key where the github api keys are cached in redis
oauth_redis_key (str): The key where the gitlab api keys are cached in redis
redis_key_list (RedisList): Acts like a python list, and interacts directly with the redis cache
config_key (str): The api key that is stored in the users config table
key: (List[str]): List of keys retrieve from database or cache
Expand All @@ -46,21 +46,21 @@ def get_random_key(self):
"""Retrieves a random key from the list of keys

Returns:
A random github api key
A random gitlab api key
"""

return random.choice(self.keys)

def get_config_key(self) -> str:
"""Retrieves the users github api key from their config table
"""Retrieves the users gitlab api key from their config table

Returns:
Github API key from config table
"""
return self.config.get_value("Keys", "gitlab_api_key")

def get_api_keys_from_database(self) -> List[str]:
"""Retieves all github api keys from database
"""Retieves all gitlab api keys from database

Note:
It retrieves all the keys from the database except the one defined in the users config
Expand Down Expand Up @@ -131,7 +131,7 @@ def get_api_keys(self) -> List[str]:
self.redis_key_list.extend(valid_keys)

if not valid_keys:
raise NoValidKeysError("No valid github api keys found in the config or worker oauth table")
raise NoValidKeysError("No valid gitlab api keys found in the config or worker oauth table")


# shuffling the keys so not all processes get the same keys in the same order
Expand All @@ -152,7 +152,7 @@ def is_bad_api_key(self, client: httpx.Client, oauth_key: str) -> bool:

Args:
client: makes the http requests
oauth_key: github api key that is being tested
oauth_key: gitlab api key that is being tested

Returns:
True if key is bad. False if the key is good
Expand Down
Loading
Loading