From c3656edbc90b93d5e6e181aef542926ba39dc7e0 Mon Sep 17 00:00:00 2001 From: MuslemRahimi Date: Thu, 8 Aug 2024 13:47:45 +0200 Subject: [PATCH] update dashboard --- app/cron_corporate_lobbying.py | 47 ++++++++++++++++++++-------------- app/cron_dashboard.py | 2 ++ 2 files changed, 30 insertions(+), 19 deletions(-) diff --git a/app/cron_corporate_lobbying.py b/app/cron_corporate_lobbying.py index f1768bd..b98eca6 100644 --- a/app/cron_corporate_lobbying.py +++ b/app/cron_corporate_lobbying.py @@ -349,27 +349,28 @@ def save_json(symbol, data): json.dump(data, file) -def process_stock(stock, csv_files, reports_folder, threshold): - print(stock['name']) - year_totals = defaultdict(float) - stock_name_lower = stock['name'].lower() +def process_stocks_batch(stocks, csv_files, reports_folder, threshold): + all_df = pd.concat([pd.read_csv(os.path.join(reports_folder, csv_file), usecols=['ClientName', 'AmountReported', 'FilingYear']) for csv_file in csv_files]) + all_df['ClientName_lower'] = all_df['ClientName'].str.lower() - for csv_file in csv_files: - print(csv_file) - df = pd.read_csv(os.path.join(reports_folder, csv_file), usecols=['ClientName', 'AmountReported', 'FilingYear']) + results = {} + for stock in stocks: + print(stock['name']) + stock_name_lower = stock['name'].lower() - df['ClientName_lower'] = df['ClientName'].str.lower() - df['score'] = df['ClientName_lower'].apply(lambda x: process.extractOne(stock_name_lower, [x])[1]) + all_df['score'] = all_df['ClientName_lower'].apply(lambda x: process.extractOne(stock_name_lower, [x])[1]) + matched_df = all_df[all_df['score'] >= threshold] - matched_df = df[df['score'] >= threshold] + year_totals = matched_df.groupby('FilingYear')['AmountReported'].sum().to_dict() + all_res_list = [{'year': year, 'amount': amount} for year, amount in year_totals.items()] - year_totals.update(matched_df.groupby('FilingYear')['AmountReported'].sum().to_dict()) - - all_res_list = [{'year': year, 'amount': amount} for year, amount in year_totals.items()] + if all_res_list: + save_json(stock['symbol'], all_res_list) + print(f"Saved data for {stock['symbol']} ({len(all_res_list)} matches)") + + results[stock['symbol']] = all_res_list - if all_res_list: - save_json(stock['symbol'], all_res_list) - print(f"Saved data for {stock['symbol']} ({len(all_res_list)} matches)") + return results def create_dataset(): reports_folder = "json/corporate-lobbying/reports" @@ -381,11 +382,19 @@ def create_dataset(): cursor.execute("PRAGMA journal_mode = wal") cursor.execute("SELECT DISTINCT symbol, name FROM stocks WHERE marketCap >= 10E9 AND symbol NOT LIKE '%.%' AND symbol NOT LIKE '%-%'") stock_data = [{'symbol': row[0], 'name': row[1]} for row in cursor.fetchall()] - print(len(stock_data)) + print(f"Total stocks: {len(stock_data)}") con.close() - with concurrent.futures.ThreadPoolExecutor(max_workers=os.cpu_count()) as executor: - executor.map(lambda stock: process_stock(stock, csv_files, reports_folder, threshold), stock_data) + batch_size = 10 + stock_batches = [stock_data[i:i+batch_size] for i in range(0, len(stock_data), batch_size)] + + with concurrent.futures.ProcessPoolExecutor(max_workers=os.cpu_count()) as executor: + futures = [executor.submit(process_stocks_batch, batch, csv_files, reports_folder, threshold) for batch in stock_batches] + + for future in concurrent.futures.as_completed(futures): + results = future.result() + print(f"Processed batch with {len(results)} stocks") + if '__main__' == __name__: diff --git a/app/cron_dashboard.py b/app/cron_dashboard.py index e9e80ed..3408dc2 100644 --- a/app/cron_dashboard.py +++ b/app/cron_dashboard.py @@ -123,6 +123,7 @@ async def get_recent_earnings(session): try: symbol = item['ticker'] name = item['name'] + time = item['time'] eps_prior = float(item['eps_prior']) if item['eps_prior'] != '' else 0 eps_surprise = float(item['eps_surprise']) if item['eps_surprise'] != '' else 0 eps = float(item['eps']) if item['eps'] != '' else 0 @@ -135,6 +136,7 @@ async def get_recent_earnings(session): res_list.append({ 'symbol': symbol, 'name': name, + 'time': time, 'marketCap': market_cap, 'epsPrior':eps_prior, 'epsSurprise': eps_surprise,