Skip to content

Commit

Permalink
Merge pull request oss-aspen#631 from JamesKunstle/update_packages
Browse files Browse the repository at this point in the history
Update packages
  • Loading branch information
cdolfi authored Feb 13, 2024
2 parents 71a921c + 779d6df commit 3af11ce
Show file tree
Hide file tree
Showing 15 changed files with 105 additions and 92 deletions.
4 changes: 2 additions & 2 deletions 8Knot/pages/affiliation/visualizations/commit_domains.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,10 +187,10 @@ def process_data(df: pd.DataFrame, num, start_date, end_date):
# creates df of domains and counts
df = pd.DataFrame(email_domains, columns=["domains"]).value_counts().to_frame().reset_index()

df = df.rename(columns={0: "occurrences"})
df = df.rename(columns={"count": "occurrences"})

# changes the name of the company if under a certain threshold
df.loc[df.occurrences <= num, "domains"] = "Other"
df.loc[df["occurrences"] <= num, "domains"] = "Other"

# groups others together for final counts
df = (
Expand Down
4 changes: 2 additions & 2 deletions 8Knot/pages/affiliation/visualizations/gh_org_affiliation.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ def process_data(df: pd.DataFrame, num, start_date, end_date):
df["company_name"] = df.index
df = df.reset_index()
df["company_name"] = df["company_name"].astype(str)
df = df.rename(columns={"index": "orginal_name", "cntrb_company": "contribution_count"})
df = df.rename(columns={"cntrb_company": "orginal_name", "count": "contribution_count"})

# applies fuzzy matching comparing all rows to each other
df["match"] = df.apply(lambda row: fuzzy_match(df, row["company_name"]), axis=1)
Expand All @@ -212,7 +212,7 @@ def process_data(df: pd.DataFrame, num, start_date, end_date):
)

# changes the name of the company if under a certain threshold
df.loc[df.contribution_count <= num, "company_name"] = "Other"
df.loc[df["contribution_count"] <= num, "company_name"] = "Other"

# groups others together for final counts
df = (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ def process_data(df: pd.DataFrame, num, start_date, end_date, email_filter):
# creates df of domains and counts
df = pd.DataFrame(email_domains, columns=["domains"]).value_counts().to_frame().reset_index()

df = df.rename(columns={0: "occurrences"})
df = df.rename(columns={"count": "occurrences"})

# changes the name of the organization if under a certain threshold
df.loc[df.occurrences <= num, "domains"] = "Other"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ def process_data(df: pd.DataFrame, contributions, contributors, start_date, end_
# creates df of domains and counts
df = pd.DataFrame(email_domains, columns=["domains"]).value_counts().to_frame().reset_index()

df = df.rename(columns={0: "contributors"})
df = df.rename(columns={"count": "contributors"})

# changes the name of the org if under a certain threshold
df.loc[df.contributors <= contributors, "domains"] = "Other"
Expand Down
2 changes: 1 addition & 1 deletion 8Knot/pages/affiliation/visualizations/unqiue_domains.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def process_data(df: pd.DataFrame, num, start_date, end_date):
# creates df of domains and counts
df = pd.DataFrame(email_domains, columns=["domains"]).value_counts().to_frame().reset_index()

df = df.rename(columns={0: "occurences"})
df = df.rename(columns={"count": "occurences"})

# changes the name of the company if under a certain threshold
df.loc[df.occurences <= num, "domains"] = "Other"
Expand Down
5 changes: 4 additions & 1 deletion 8Knot/pages/chaoss/visualizations/contrib_importance_pie.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,10 @@ def process_data(df: pd.DataFrame, action_type, top_k, patterns, start_date, end
df_sum = df[action_type].sum()

# calculate the remaining contributions by taking the the difference of t_sum and df_sum
df = df.append({"cntrb_id": "Other", action_type: t_sum - df_sum}, ignore_index=True)
# dataframes no longer implement above 'append' interface as of Pandas 1.4.4
# create a single-entry dataframe that we can concatenate onto existing df
df_concat = pd.DataFrame(data={"cntrb_id": ["Other"], action_type: [t_sum - df_sum]})
df = pd.concat([df, df_concat], ignore_index=True)

return df

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -217,13 +217,7 @@ def process_data(df: pd.DataFrame, interval, assign_req, start_date, end_date):
df_contrib = df[df["assignment_action"] == "assigned"]

# count the assignments total for each contributor
df_contrib = (
df_contrib["assignee"]
.value_counts()
.to_frame()
.reset_index()
.rename(columns={"assignee": "count", "index": "assignee"})
)
df_contrib = df_contrib["assignee"].value_counts().to_frame().reset_index()

# create list of all contributors that meet the assignment requirement
contributors = df_contrib["assignee"][df_contrib["count"] >= assign_req].to_list()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -214,13 +214,7 @@ def process_data(df: pd.DataFrame, interval, assign_req, start_date, end_date):
df_contrib = df[df["assignment_action"] == "assigned"]

# count the assignments total for each contributor
df_contrib = (
df_contrib["assignee"]
.value_counts()
.to_frame()
.reset_index()
.rename(columns={"assignee": "count", "index": "assignee"})
)
df_contrib = df_contrib["assignee"].value_counts().to_frame().reset_index()

# create list of all contributors that meet the assignment requirement
contributors = df_contrib["assignee"][df_contrib["count"] >= assign_req].to_list()
Expand Down
5 changes: 3 additions & 2 deletions 8Knot/pages/contributions/visualizations/issues_over_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,15 +189,16 @@ def process_data(df: pd.DataFrame, interval, start_date, end_date):
created_range = pd.to_datetime(df["created_at"]).dt.to_period(interval).value_counts().sort_index()

# converts to data frame object and creates date column from period values
df_created = created_range.to_frame().reset_index().rename(columns={"index": "Date"})
df_created = created_range.to_frame().reset_index().rename(columns={"created_at": "Date", "count": "created_at"})

# converts date column to a datetime object, converts to string first to handle period information
# the period slice is to handle weekly corner case
df_created["Date"] = pd.to_datetime(df_created["Date"].astype(str).str[:period_slice])

# df for closed issues in time interval
closed_range = pd.to_datetime(df["closed_at"]).dt.to_period(interval).value_counts().sort_index()
df_closed = closed_range.to_frame().reset_index().rename(columns={"index": "Date"})
df_closed = closed_range.to_frame().reset_index().rename(columns={"closed_at": "Date", "count": "closed_at"})

df_closed["Date"] = pd.to_datetime(df_closed["Date"].astype(str).str[:period_slice])

# first and last elements of the dataframe are the
Expand Down
6 changes: 3 additions & 3 deletions 8Knot/pages/contributions/visualizations/pr_over_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,20 +165,20 @@ def process_data(df: pd.DataFrame, interval):
created_range = df["created_at"].dt.to_period(interval).value_counts().sort_index()

# converts to data frame object and created date column from period values
df_created = created_range.to_frame().reset_index().rename(columns={"index": "Date"})
df_created = created_range.to_frame().reset_index().rename(columns={"created_at": "Date", "count": "created_at"})

# converts date column to a datetime object, converts to string first to handle period information
# the period slice is to handle weekly corner case
df_created["Date"] = pd.to_datetime(df_created["Date"].astype(str).str[:period_slice])

# df for merged prs in time interval
merged_range = pd.to_datetime(df["merged_at"]).dt.to_period(interval).value_counts().sort_index()
df_merged = merged_range.to_frame().reset_index().rename(columns={"index": "Date"})
df_merged = merged_range.to_frame().reset_index().rename(columns={"merged_at": "Date", "count": "merged_at"})
df_merged["Date"] = pd.to_datetime(df_merged["Date"].astype(str).str[:period_slice])

# df for closed prs in time interval
closed_range = pd.to_datetime(df["closed_at"]).dt.to_period(interval).value_counts().sort_index()
df_closed = closed_range.to_frame().reset_index().rename(columns={"index": "Date"})
df_closed = closed_range.to_frame().reset_index().rename(columns={"closed_at": "Date", "count": "closed_at"})
df_closed["Date"] = pd.to_datetime(df_closed["Date"].astype(str).str[:period_slice])

# A single df created for plotting merged and closed as stacked bar chart
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,10 @@ def process_data(df: pd.DataFrame, action_type, top_k, patterns, start_date, end
df_sum = df[action_type].sum()

# calculate the remaining contributions by taking the the difference of t_sum and df_sum
df = df.append({"cntrb_id": "Other", action_type: t_sum - df_sum}, ignore_index=True)
# dataframes no longer implement above 'append' interface as of Pandas 1.4.4
# create a single-entry dataframe that we can concatenate onto existing df
df_concat = pd.DataFrame(data={"cntrb_id": ["Other"], action_type: [t_sum - df_sum]})
df = pd.concat([df, df_concat], ignore_index=True)

return df

Expand Down
2 changes: 1 addition & 1 deletion 8Knot/pages/contributors/visualizations/new_contributor.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ def process_data(df, interval):
created_range = pd.to_datetime(df["created_at"]).dt.to_period(interval).value_counts().sort_index()

# converts to data frame object and creates date column from period values
df_contribs = created_range.to_frame().reset_index().rename(columns={"index": "Date", "created_at": "contribs"})
df_contribs = created_range.to_frame().reset_index().rename(columns={"created_at": "Date", "count": "contribs"})

# converts date column to a datetime object, converts to string first to handle period information
df_contribs["Date"] = pd.to_datetime(df_contribs["Date"].astype(str))
Expand Down
7 changes: 0 additions & 7 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,6 @@ RUN pip3 install --no-cache-dir -U pip setuptools && \
# working directory.
COPY ./8Knot/ /opt/app-root/src/

# run flower
# CMD [ "celery", "-A", "app:celery_app", "flower" ]

# run worker
# CMD [ "celery", "-A", "app:celery_app", "worker", "--loglevel=INFO" ]

# run app
# Description of how to choose the number of workers and threads.
# common wisdom is (2*CPU)+1 workers:
# https://medium.com/building-the-system/gunicorn-3-means-of-concurrency-efbb547674b7
Expand Down
24 changes: 24 additions & 0 deletions requirements-base.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# top-level required modules w/o pinned dependencies
# DON'T ADD NEW DEPENDENCIES TO REQUIREMENTS.TXT MANUALLY
# ADD THEM TO REQUIREMENTS-BASE.TXT, THEN RUN
# pip3 freeze -r requirement-base.txt > requirements.txt
sqlalchemy
celery
dash
dash-bootstrap-components
dash-mantine-components
dash-bootstrap-templates
flask-login
flask
redis
uuid
plotly
psycopg2-binary
pandas
numpy
python-dateutil
fuzzywuzzy
python-Levenshtein
datetime
gunicorn
pyarrow
113 changes: 57 additions & 56 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,60 +1,61 @@
-i https://pypi.org/simple
amqp==5.1.1 ; python_version >= '3.6'
async-timeout==4.0.2 ; python_version >= '3.6'
billiard==3.6.4.0
celery==5.2.7
# celery-flower==1.*
cfgv==3.3.1 ; python_full_version >= '3.6.1'
click==8.1.3 ; python_version >= '3.7'
click-didyoumean==0.3.0 ; python_full_version >= '3.6.2' and python_full_version < '4.0.0'
click-plugins==1.1.1
click-repl==0.2.0
dash==2.7.0
# top-level required modules w/o pinned dependencies
# DON'T ADD NEW DEPENDENCIES TO REQUIREMENTS.TXT MANUALLY
# ADD THEM TO REQUIREMENTS-BASE.TXT, THEN RUN
# pip3 freeze -r requirement-base.txt > requirements.txt
SQLAlchemy==2.0.25
celery==5.3.6
dash==2.15.0
dash-bootstrap-components==1.5.0
dash-bootstrap-templates==1.0.7
dash-mantine-components==0.12.1
dash-bootstrap-templates==1.1.2
Flask-Login==0.6.3
Flask==3.0.2
redis==5.0.1
uuid==1.30
plotly==5.18.0
psycopg2-binary==2.9.9
pandas==2.2.0
numpy==1.26.4
python-dateutil==2.8.2
fuzzywuzzy==0.18.0
python-Levenshtein==0.24.0
DateTime==5.4
gunicorn==21.2.0
pyarrow==15.0.0
## The following requirements were added by pip freeze:
amqp==5.2.0
billiard==4.2.0
blinker==1.7.0
certifi==2024.2.2
charset-normalizer==3.3.2
click==8.1.7
click-didyoumean==0.3.0
click-plugins==1.1.1
click-repl==0.3.0
dash-core-components==2.0.0
dash-html-components==2.0.0
dash-table==5.0.0
deprecated==1.2.13 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
distlib==0.3.6
filelock==3.8.0 ; python_version >= '3.7'
flask==2.2.2 ; python_version >= '3.7'
gunicorn==20.1.0
identify==2.5.8 ; python_version >= '3.7'
itsdangerous==2.1.2 ; python_version >= '3.7'
jinja2==3.1.2 ; python_version >= '3.7'
kombu==5.2.4 ; python_version >= '3.7'
markupsafe==2.1.1 ; python_version >= '3.7'
nodeenv==1.7.0 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6'
numpy==1.23.4
packaging==21.3 ; python_version >= '3.6'
pandas==1.5.1
patsy==0.5.3
platformdirs==2.5.3 ; python_version >= '3.7'
plotly==5.11.0 ; python_version >= '3.6'
plotly-express==0.4.1
pre-commit==2.20.0
prompt-toolkit==3.0.32 ; python_full_version >= '3.6.2'
psycopg2-binary
pyparsing==3.0.9 ; python_full_version >= '3.6.8'
python-dateutil==2.8.2 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
pytz==2022.6
pyyaml==6.0 ; python_version >= '3.6'
redis==4.3.4
rq==1.11.1
scipy==1.9.3 ; python_version >= '3.8'
six==1.16.0 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
sqlalchemy==1.4.43
statsmodels==0.13.5 ; python_version >= '3.7'
tenacity==8.1.0 ; python_version >= '3.6'
toml==0.10.2 ; python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'
vine==5.0.0 ; python_version >= '3.6'
wcwidth==0.2.5
werkzeug==2.2.2 ; python_version >= '3.7'
wrapt==1.14.1 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
requests
dash-mantine-components
pyarrow
fuzzywuzzy
python-Levenshtein
flask-login
idna==3.6
importlib-metadata==7.0.1
itsdangerous==2.1.2
Jinja2==3.1.3
kombu==5.3.5
Levenshtein==0.24.0
MarkupSafe==2.1.5
nest-asyncio==1.6.0
packaging==23.2
prompt-toolkit==3.0.43
pytz==2024.1
rapidfuzz==3.6.1
requests==2.31.0
retrying==1.3.4
six==1.16.0
tenacity==8.2.3
typing_extensions==4.9.0
tzdata==2023.4
urllib3==2.2.0
vine==5.1.0
wcwidth==0.2.13
Werkzeug==3.0.1
zipp==3.17.0
zope.interface==6.1

0 comments on commit 3af11ce

Please sign in to comment.