From 0f44a75242f14df3a36c30843cf94cbaa08966aa Mon Sep 17 00:00:00 2001 From: MuslemRahimi Date: Tue, 28 May 2024 15:34:43 +0200 Subject: [PATCH] update hedge fund db --- app/create_institute_db.py | 53 +++++++++++++------ app/cron_hedge_funds.py | 105 +++++++++++++++++++++++++++++++++++++ app/main.py | 4 +- app/restart_json.py | 2 +- 4 files changed, 146 insertions(+), 18 deletions(-) create mode 100644 app/cron_hedge_funds.py diff --git a/app/create_institute_db.py b/app/create_institute_db.py index 0af4abe..0514d14 100755 --- a/app/create_institute_db.py +++ b/app/create_institute_db.py @@ -17,15 +17,29 @@ # Filter out the specific RuntimeWarning warnings.filterwarnings("ignore", category=RuntimeWarning, message="invalid value encountered in scalar divide") -conn = sqlite3.connect('stocks.db') -cursor = conn.cursor() +con = sqlite3.connect('stocks.db') +etf_con = sqlite3.connect('etf.db') +crypto_con = sqlite3.connect('crypto.db') -# Execute the SQL query -cursor.execute("SELECT symbol FROM stocks") +cursor = con.cursor() +cursor.execute("PRAGMA journal_mode = wal") +cursor.execute("SELECT DISTINCT symbol FROM stocks") +stock_symbols = [row[0] for row in cursor.fetchall()] -# Fetch all the results into a list -symbol_list = [row[0] for row in cursor.fetchall()] -conn.close() +etf_cursor = etf_con.cursor() +etf_cursor.execute("PRAGMA journal_mode = wal") +etf_cursor.execute("SELECT DISTINCT symbol FROM etfs") +etf_symbols = [row[0] for row in etf_cursor.fetchall()] + +crypto_cursor = crypto_con.cursor() +crypto_cursor.execute("PRAGMA journal_mode = wal") +crypto_cursor.execute("SELECT DISTINCT symbol FROM cryptos") +crypto_symbols = [row[0] for row in crypto_cursor.fetchall()] + +total_symbols = stock_symbols + etf_symbols + crypto_symbols +con.close() +etf_con.close() +crypto_con.close() load_dotenv() @@ -121,33 +135,42 @@ async def save_portfolio_data(self, session, cik): if isinstance(parsed_data, list) and "https://financialmodelingprep.com/api/v4/institutional-ownership/portfolio-holdings?cik=" in url: # Handle list response, save as JSON object - parsed_data = [item for item in parsed_data if 'symbol' in item and item['symbol'] is not None and item['symbol'] in symbol_list] #symbol must be included in the database + parsed_data = [ + {**item, 'type': ('stocks' if item['symbol'] in stock_symbols else + 'crypto' if item['symbol'] in crypto_symbols else + 'etf' if item['symbol'] in etf_symbols else None)} + for item in parsed_data + if 'symbol' in item and item['symbol'] is not None and item['symbol'] in total_symbols + ] + portfolio_data['holdings'] = json.dumps(parsed_data) number_of_stocks = len(parsed_data) - total_market_value = sum(item['marketValue'] for item in parsed_data) - avg_performance_percentage = sum(item['performancePercentage'] for item in parsed_data) / len(parsed_data) - + #total_market_value = sum(item['marketValue'] for item in parsed_data) + #avg_performance_percentage = sum(item['performancePercentage'] for item in parsed_data) / len(parsed_data) performance_percentages = [item.get("performancePercentage", 0) for item in parsed_data] + + positive_performance_count = sum(1 for percentage in performance_percentages if percentage > 0) win_rate = round(positive_performance_count / len(performance_percentages) * 100,2) data_dict = { 'winRate': win_rate, 'numberOfStocks': number_of_stocks, - 'marketValue': total_market_value, - 'avgPerformancePercentage': avg_performance_percentage, + #'marketValue': total_market_value, } portfolio_data.update(data_dict) elif isinstance(parsed_data, list) and "https://financialmodelingprep.com/api/v4/institutional-ownership/portfolio-holdings-summary" in url: # Handle list response, save as JSON object + portfolio_data['summary'] = json.dumps(parsed_data) data_dict = { #'numberOfStocks': parsed_data[0]['portfolioSize'], - #'marketValue': parsed_data[0]['marketValue'], + 'marketValue': parsed_data[0]['marketValue'], 'averageHoldingPeriod': parsed_data[0]['averageHoldingPeriod'], 'turnover': parsed_data[0]['turnover'], + 'performancePercentage3year': parsed_data[0]['performancePercentage3year'], #'performancePercentage': parsed_data[0]['performancePercentage'] } portfolio_data.update(data_dict) @@ -164,7 +187,7 @@ async def save_portfolio_data(self, session, cik): holdings_list = json.loads(portfolio_data['holdings']) symbols_to_check = {holding['symbol'] for holding in holdings_list[:3]} # Extract the first two symbols - symbols_not_in_list = not any(symbol in symbol_list for symbol in symbols_to_check) + symbols_not_in_list = not any(symbol in total_symbols for symbol in symbols_to_check) if symbols_not_in_list or 'industry' not in portfolio_data or len(json.loads(portfolio_data['industry'])) == 0: diff --git a/app/cron_hedge_funds.py b/app/cron_hedge_funds.py new file mode 100644 index 0000000..50657de --- /dev/null +++ b/app/cron_hedge_funds.py @@ -0,0 +1,105 @@ +import sqlite3 +import os +import json + + +frontend_json_url = "../../frontend/src/lib/hedge-funds" + +def format_company_name(company_name): + remove_strings = [', LLC','LLC', ',', 'LP', 'LTD', 'LTD.', 'INC.', 'INC', '.', '/DE/','/MD/','PLC'] + preserve_words = ['FMR','MCF'] + + remove_strings_set = set(remove_strings) + preserve_words_set = set(preserve_words) + + words = company_name.split() + + formatted_words = [] + for word in words: + if word in preserve_words_set: + formatted_words.append(word) + else: + new_word = word + for string in remove_strings_set: + new_word = new_word.replace(string, '') + formatted_words.append(new_word.title()) + + return ' '.join(formatted_words) + + +def best_hedge_funds(con): + + # Connect to the SQLite database + cursor = con.cursor() + + # Execute a SQL query to select the top 10 best performing cik entries by winRate + cursor.execute("SELECT cik, name, numberOfStocks, marketValue, winRate, turnover, performancePercentage3year FROM institutes WHERE marketValue > 200000000 AND numberOfStocks > 15 ORDER BY winRate DESC LIMIT 50") + best_performing_ciks = cursor.fetchall() + + res_list = [{ + 'cik': row[0], + 'name': format_company_name(row[1]), + 'numberOfStocks': row[2], + 'marketValue': row[3], + 'winRate': row[4], + 'turnover': row[5], + 'performancePercentage3year': row[6] + } for row in best_performing_ciks] + + with open(f"{frontend_json_url}/best-hedge-funds.json", 'w') as file: + json.dump(res_list, file) + + +def worst_hedge_funds(con): + + # Connect to the SQLite database + cursor = con.cursor() + + cursor.execute("SELECT cik, name, numberOfStocks, marketValue, winRate, turnover, performancePercentage3year FROM institutes WHERE marketValue > 200000000 AND numberOfStocks > 15 AND winRate > 0 ORDER BY winRate ASC LIMIT 50") + worst_performing_ciks = cursor.fetchall() + + res_list = [{ + 'cik': row[0], + 'name': format_company_name(row[1]), + 'numberOfStocks': row[2], + 'marketValue': row[3], + 'winRate': row[4], + 'turnover': row[5], + 'performancePercentage3year': row[6] + } for row in worst_performing_ciks] + + with open(f"{frontend_json_url}/worst-hedge-funds.json", 'w') as file: + json.dump(res_list, file) + + +def all_hedge_funds(con): + + # Connect to the SQLite database + cursor = con.cursor() + + cursor.execute("SELECT cik, name, numberOfStocks, marketValue, winRate, turnover, performancePercentage3year FROM institutes") + all_ciks = cursor.fetchall() + + res_list = [{ + 'cik': row[0], + 'name': format_company_name(row[1]), + 'numberOfStocks': row[2], + 'marketValue': row[3], + 'winRate': row[4], + 'turnover': row[5], + 'performancePercentage3year': row[6] + } for row in all_ciks] + + sorted_res_list = sorted(res_list, key=lambda x: x['marketValue'], reverse=True) + + with open(f"{frontend_json_url}/all-hedge-funds.json", 'w') as file: + json.dump(sorted_res_list, file) + + + +if __name__ == '__main__': + con = sqlite3.connect('institute.db') + #best_hedge_funds(con) + #worst_hedge_funds(con) + all_hedge_funds(con) + con.close() \ No newline at end of file diff --git a/app/main.py b/app/main.py index 51faf5a..eb14037 100755 --- a/app/main.py +++ b/app/main.py @@ -1410,13 +1410,13 @@ async def get_hedge_funds_data(data: GetCIKData): cursor = con_inst.cursor() # Execute a SQL query to select the top 10 best performing cik entries by winRate - cursor.execute("SELECT cik, name, numberOfStocks, avgPerformancePercentage, averageHoldingPeriod, turnover, marketValue, winRate, holdings FROM institutes WHERE cik = ?", (cik,)) + cursor.execute("SELECT cik, name, numberOfStocks, performancePercentage3year, averageHoldingPeriod, turnover, marketValue, winRate, holdings FROM institutes WHERE cik = ?", (cik,)) cik_data = cursor.fetchall() res = [{ 'cik': row[0], 'name': row[1], 'numberOfStocks': row[2], - 'avgPerformancePercentage': row[3], + 'performancePercentage3year': row[3], 'averageHoldingPeriod': row[4], 'turnover': row[5], 'marketValue': row[6], diff --git a/app/restart_json.py b/app/restart_json.py index 146fbe1..09be0f5 100755 --- a/app/restart_json.py +++ b/app/restart_json.py @@ -1060,7 +1060,7 @@ async def save_json_files(): cursor = con.cursor() cursor.execute("PRAGMA journal_mode = wal") - cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE symbol != ?", ('%5EGSPC',)) + cursor.execute("SELECT DISTINCT symbol FROM stocks") symbols = [row[0] for row in cursor.fetchall()] etf_cursor = etf_con.cursor()