From ee7e226b7e8f6209842a53b726b74e5eeee49410 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sun, 17 Mar 2024 21:32:07 -0500 Subject: [PATCH 1/3] Fixes for some metrics --- augur/api/metrics/deps.py | 4 ++-- augur/api/metrics/pull_request.py | 37 ++++++++++++++++--------------- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/augur/api/metrics/deps.py b/augur/api/metrics/deps.py index 4f708cc40e..909ae4cb51 100644 --- a/augur/api/metrics/deps.py +++ b/augur/api/metrics/deps.py @@ -48,7 +48,7 @@ def deps(repo_group_id, repo_id=None, period='day', begin_date=None, end_date=No """) with current_app.engine.connect() as conn: - results = pd.read_sql(depsSQL, conn) + results = pd.read_sql(depsSQL, conn, params={'repo_id': repo_id}) else: @@ -73,7 +73,7 @@ def deps(repo_group_id, repo_id=None, period='day', begin_date=None, end_date=No """) with current_app.engine.connect() as conn: - results = pd.read_sql(depsSQL, conn) + results = pd.read_sql(depsSQL, conn, params={'repo_group_id': repo_group_id}) return results diff --git a/augur/api/metrics/pull_request.py b/augur/api/metrics/pull_request.py index 75116b5e54..3e5ba03143 100644 --- a/augur/api/metrics/pull_request.py +++ b/augur/api/metrics/pull_request.py @@ -3,14 +3,13 @@ Metrics that provide data about pull requests & their associated activity """ -import datetime +from datetime import datetime import sqlalchemy as s import pandas as pd from flask import current_app from augur.api.util import register_metric - @register_metric() def pull_requests_new(repo_group_id, repo_id=None, period='day', begin_date=None, end_date=None): """ @@ -38,7 +37,8 @@ def pull_requests_new(repo_group_id, repo_id=None, period='day', begin_date=None GROUP BY created_date """) - results = pd.read_sql(new_pull_requests_query, current_app.engine, params={'repo_id': repo_id, 'period': period, + with current_app.engine.connect() as conn: + results = pd.read_sql(new_pull_requests_query, conn, params={'repo_id': repo_id, 'period': period, 'begin_date': begin_date, 'end_date': end_date}) else: @@ -51,8 +51,9 @@ def pull_requests_new(repo_group_id, repo_id=None, period='day', begin_date=None GROUP BY created_date """) - results = pd.read_sql(new_pull_requests_query, current_app.engine, - params={'repo_group_id': repo_group_id, 'period': period, + with current_app.engine.connect() as conn: + results = pd.read_sql(new_pull_requests_query, conn, + params={'repo_group_id': repo_group_id, 'period': period, 'begin_date': begin_date, 'end_date': end_date}) @@ -73,7 +74,7 @@ def pull_requests_merge_contributor_new(repo_group_id, repo_id=None, period='day if not begin_date: begin_date = '1970-1-1 00:00:01' if not end_date: - end_date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + end_date = datetime.now().strftime('%Y-%m-%d %H:%M:%S') if repo_id: commitNewContributor = s.sql.text(""" @@ -129,7 +130,7 @@ def pull_requests_closed_no_merge(repo_group_id, repo_id=None, period='day', beg if not begin_date: begin_date = '1970-1-1 00:00:01' if not end_date: - end_date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + end_date = datetime.now().strftime('%Y-%m-%d %H:%M:%S') if repo_id: closedNoMerge = s.sql.text(""" @@ -181,7 +182,7 @@ def reviews(repo_group_id, repo_id=None, period='day', begin_date=None, end_date if not begin_date: begin_date = '1970-1-1' if not end_date: - end_date = datetime.datetime.now().strftime('%Y-%m-%d') + end_date = datetime.now().strftime('%Y-%m-%d') if not repo_id: reviews_SQL = s.sql.text(""" @@ -242,7 +243,7 @@ def reviews_accepted(repo_group_id, repo_id=None, period='day', begin_date=None, if not begin_date: begin_date = '1970-1-1' if not end_date: - end_date = datetime.datetime.now().strftime('%Y-%m-%d') + end_date = datetime.now().strftime('%Y-%m-%d') if not repo_id: reviews_accepted_SQL = s.sql.text(""" @@ -303,7 +304,7 @@ def reviews_declined(repo_group_id, repo_id=None, period='day', begin_date=None, if not begin_date: begin_date = '1970-1-1' if not end_date: - end_date = datetime.datetime.now().strftime('%Y-%m-%d') + end_date = datetime.now().strftime('%Y-%m-%d') if not repo_id: reviews_declined_SQL = s.sql.text(""" @@ -363,7 +364,7 @@ def review_duration(repo_group_id, repo_id=None, begin_date=None, end_date=None) if not begin_date: begin_date = '1970-1-1' if not end_date: - end_date = datetime.datetime.now().strftime('%Y-%m-%d') + end_date = datetime.now().strftime('%Y-%m-%d') if not repo_id: review_duration_SQL = s.sql.text(""" @@ -428,7 +429,7 @@ def pull_request_acceptance_rate(repo_group_id, repo_id=None, begin_date=None, e if not begin_date: begin_date = '1970-1-1 00:00:01' if not end_date: - end_date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + end_date = datetime.now().strftime('%Y-%m-%d %H:%M:%S') if not repo_id: prAccRateSQL = s.sql.text(""" @@ -517,7 +518,7 @@ def pull_request_average_time_to_close(repo_group_id, repo_id=None, group_by='mo if not begin_date: begin_date = '1970-1-1' if not end_date: - end_date = datetime.datetime.now().strftime('%Y-%m-%d') + end_date = datetime.now().strftime('%Y-%m-%d') unit_options = ['year', 'month', 'week', 'day'] @@ -633,7 +634,7 @@ def pull_request_merged_status_counts(repo_group_id, repo_id=None, begin_date='1 if not begin_date: begin_date = '1970-1-1' if not end_date: - end_date = datetime.datetime.now().strftime('%Y-%m-%d') + end_date = datetime.now().strftime('%Y-%m-%d') unit_options = ['year', 'month', 'week', 'day'] @@ -741,7 +742,7 @@ def pull_request_average_commit_counts(repo_group_id, repo_id=None, group_by='mo if not begin_date: begin_date = '1970-1-1' if not end_date: - end_date = datetime.datetime.now().strftime('%Y-%m-%d') + end_date = datetime.now().strftime('%Y-%m-%d') unit_options = ['year', 'month', 'week', 'day'] @@ -852,7 +853,7 @@ def pull_request_average_event_counts(repo_group_id, repo_id=None, group_by='mon if not begin_date: begin_date = '1970-1-1' if not end_date: - end_date = datetime.datetime.now().strftime('%Y-%m-%d') + end_date = datetime.now().strftime('%Y-%m-%d') unit_options = ['year', 'month', 'week', 'day'] @@ -1019,7 +1020,7 @@ def pull_request_average_time_to_responses_and_close(repo_group_id, repo_id=None if not begin_date: begin_date = '1970-1-1' if not end_date: - end_date = datetime.datetime.now().strftime('%Y-%m-%d') + end_date = datetime.now().strftime('%Y-%m-%d') unit_options = ['year', 'month', 'week', 'day'] @@ -1135,7 +1136,7 @@ def pull_request_merged_status_counts(repo_group_id, repo_id=None, begin_date='1 """ if not end_date: - end_date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + end_date = datetime.now().strftime('%Y-%m-%d %H:%M:%S') unit_options = ['year', 'month', 'week', 'day'] time_group_bys = [] From 13dbb88bb51ae739003b9f960a98b742ee33de7a Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Mon, 18 Mar 2024 18:19:30 -0500 Subject: [PATCH 2/3] Fix errors where df is empty --- augur/api/metrics/pull_request.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/augur/api/metrics/pull_request.py b/augur/api/metrics/pull_request.py index 3e5ba03143..447c9557ae 100644 --- a/augur/api/metrics/pull_request.py +++ b/augur/api/metrics/pull_request.py @@ -30,7 +30,7 @@ def pull_requests_new(repo_group_id, repo_id=None, period='day', begin_date=None if repo_id: new_pull_requests_query = s.sql.text(""" SELECT DATE_TRUNC(:period, pr_created_at) AS created_date, - COUNT(pr_id) AS new_pull_requests + COUNT(*) AS new_pull_requests FROM pull_requests WHERE repo_id = :repo_id AND pr_created_at BETWEEN :begin_date AND :end_date @@ -607,6 +607,11 @@ def pull_request_average_time_to_close(repo_group_id, repo_id=None, group_by='mo pr_all = pd.read_sql(pr_all_SQL, conn, params={'repo_id': repo_id, 'repo_group_id':repo_group_id, 'begin_date': begin_date, 'end_date': end_date}) + + + if pr_all.empty: + return [] + if not repo_id: pr_avg_time_to_close = pr_all.groupby(['merged_status', 'repo_id', 'repo_name', 'repo_group_id', 'repo_group_name'] + time_group_bys).mean().reset_index()[['merged_status', 'repo_id', 'repo_name', 'repo_group_id', 'repo_group_name'] + time_group_bys + ['average_{}_to_close'.format(time_unit)]] else: @@ -720,6 +725,10 @@ def pull_request_merged_status_counts(repo_group_id, repo_id=None, begin_date='1 pr_all = pd.read_sql(pr_all_SQL, conn, params={'repo_id': repo_id, 'repo_group_id':repo_group_id, 'begin_date': begin_date, 'end_date': end_date}) + + if pr_all.empty: + return [] + if not repo_id: pr_avg_time_between_responses = pr_all.groupby(['merged_status', 'repo_id', 'repo_name', 'repo_group_id', 'repo_group_name'] + time_group_bys).mean().reset_index()[['merged_status', 'repo_id', 'repo_name', 'repo_group_id', 'repo_group_name'] + time_group_bys + ['average_{}_between_responses'.format(time_unit)]] else: @@ -831,6 +840,10 @@ def pull_request_average_commit_counts(repo_group_id, repo_id=None, group_by='mo pr_all = pd.read_sql(pr_all_SQL, conn, params={'repo_id': repo_id, 'repo_group_id':repo_group_id, 'begin_date': begin_date, 'end_date': end_date}) + + if pr_all.empty: + return [] + if not repo_id: pr_avg_commit_counts = pr_all.groupby(['merged_status', 'repo_id', 'repo_name', 'repo_group_id', 'repo_group_name'] + time_group_bys).mean().reset_index()[['merged_status', 'repo_id', 'repo_name', 'repo_group_id', 'repo_group_name'] + time_group_bys + ['average_commits_per_pull_request']] else: @@ -997,6 +1010,10 @@ def pull_request_average_event_counts(repo_group_id, repo_id=None, group_by='mon for name in count_names.copy(): average_count_names.append('average_' + name) + + if pr_all.empty: + return [] + if not repo_id: pr_avg_event_counts = pr_all.groupby(['merged_status', 'repo_id', 'repo_name', 'repo_group_id', 'repo_group_name'] + time_group_bys).mean().reset_index()[['merged_status', 'repo_id', 'repo_name', 'repo_group_id', 'repo_group_name'] + time_group_bys + average_count_names] else: @@ -1116,6 +1133,9 @@ def pull_request_average_time_to_responses_and_close(repo_group_id, repo_id=None params={'repo_id': repo_id, 'repo_group_id':repo_group_id, 'begin_date': begin_date, 'end_date': end_date}) + if pr_all.empty: + return [] + if not repo_id: avg_pr_time_to_responses_and_close = pr_all.groupby(['merged_status', 'repo_id', 'repo_name', 'repo_group_id', 'repo_group_name'] + time_group_bys).mean().reset_index()[['merged_status', 'repo_id', 'repo_name', 'repo_group_id', 'repo_group_name'] + time_group_bys + ['average_{}_to_first_response'.format(time_unit), 'average_{}_to_last_response'.format(time_unit), 'average_{}_to_close'.format(time_unit)]] else: From df230e235a7c7199a1114209ac75d7f36dfb954f Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Mon, 18 Mar 2024 18:51:45 -0500 Subject: [PATCH 3/3] Fix api stuff --- augur/api/metrics/repo_meta.py | 4 ++++ augur/api/metrics/toss.py | 8 ++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/augur/api/metrics/repo_meta.py b/augur/api/metrics/repo_meta.py index 256469abac..ffc8fc84ef 100644 --- a/augur/api/metrics/repo_meta.py +++ b/augur/api/metrics/repo_meta.py @@ -285,6 +285,10 @@ def nadia_project_labeling_badge(repo_group_id, repo_id=None): with current_app.engine.connect() as conn: raw_df = pd.read_sql(stars_count_SQL, conn) + + if raw_df.empty: + return {"status": "Not enough data"} + stargazers_count = int(raw_df.at[0,'stars']) repo_name = str(raw_df.at[0,'repo_name']) diff --git a/augur/api/metrics/toss.py b/augur/api/metrics/toss.py index 99c7683eb8..40a4a12b00 100644 --- a/augur/api/metrics/toss.py +++ b/augur/api/metrics/toss.py @@ -28,7 +28,7 @@ def toss_pull_request_acceptance_rate(repo_id, begin_date=None, end_date=None, g ( SELECT COUNT ( pull_request_events.pull_request_id ) AS num_approved, - repo_id + pull_requests.repo_id FROM pull_requests JOIN pull_request_events ON pull_request_events.pull_request_id = pull_requests.pull_request_id @@ -39,12 +39,12 @@ def toss_pull_request_acceptance_rate(repo_id, begin_date=None, end_date=None, g AND pull_request_events.created_at BETWEEN :begin_date AND :end_date GROUP BY - repo_id + pull_requests.repo_id ) merged JOIN ( SELECT COUNT ( pull_request_events.pull_request_id ) AS num_opened, - repo_id + pull_requests.repo_id FROM pull_requests JOIN pull_request_events ON pull_request_events.pull_request_id = pull_requests.pull_request_id @@ -54,7 +54,7 @@ def toss_pull_request_acceptance_rate(repo_id, begin_date=None, end_date=None, g AND pull_request_events.created_at BETWEEN :begin_date AND :end_date GROUP BY - repo_id + pull_requests.repo_id ) opened ON merged.repo_id = opened.repo_id """)