-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils_df.py
28 lines (22 loc) · 1.33 KB
/
utils_df.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import pandas as pd
import numpy as np
import re
from constants import ColumnNames
import utils
def add_categories_to_df(df, categories_dict):
for category_name, keywords in categories_dict.items():
pattern = '|'.join(re.escape(keyword) for keyword in keywords)
df['category'] = np.where(df['category'].str.strip() == '',
np.where(df[ColumnNames.TEXT].str.contains(pattern, case=False, na=False),
category_name, df['category']), df['category'])
def delete_rows(df, to_del_list):
pattern = '|'.join(re.escape(item) for item in to_del_list)
return df[~df[ColumnNames.TEXT].str.contains(pattern, case=False, na=False)]
def get_monthly_expense_df(df, df_grouped):
df['month'] = utils.get_date_col_as_datetime(df).dt.to_period('M').astype(str)
monthly_expenses = df.groupby(['month', ColumnNames.CATEGORY])[ColumnNames.AMOUNT].sum().reset_index()
category_order = df_grouped.sort_values(by=ColumnNames.AMOUNT, ascending=False)[ColumnNames.CATEGORY].tolist()
monthly_expenses[ColumnNames.CATEGORY] = pd.Categorical(monthly_expenses[ColumnNames.CATEGORY],
categories=category_order,
ordered=True)
return monthly_expenses