Skip to content

Commit

Permalink
update hedge fund db
Browse files Browse the repository at this point in the history
  • Loading branch information
MuslemRahimi committed May 28, 2024
1 parent 2a160b0 commit 0f44a75
Show file tree
Hide file tree
Showing 4 changed files with 146 additions and 18 deletions.
53 changes: 38 additions & 15 deletions app/create_institute_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,29 @@
# Filter out the specific RuntimeWarning
warnings.filterwarnings("ignore", category=RuntimeWarning, message="invalid value encountered in scalar divide")

conn = sqlite3.connect('stocks.db')
cursor = conn.cursor()
con = sqlite3.connect('stocks.db')
etf_con = sqlite3.connect('etf.db')
crypto_con = sqlite3.connect('crypto.db')

# Execute the SQL query
cursor.execute("SELECT symbol FROM stocks")
cursor = con.cursor()
cursor.execute("PRAGMA journal_mode = wal")
cursor.execute("SELECT DISTINCT symbol FROM stocks")
stock_symbols = [row[0] for row in cursor.fetchall()]

# Fetch all the results into a list
symbol_list = [row[0] for row in cursor.fetchall()]
conn.close()
etf_cursor = etf_con.cursor()
etf_cursor.execute("PRAGMA journal_mode = wal")
etf_cursor.execute("SELECT DISTINCT symbol FROM etfs")
etf_symbols = [row[0] for row in etf_cursor.fetchall()]

crypto_cursor = crypto_con.cursor()
crypto_cursor.execute("PRAGMA journal_mode = wal")
crypto_cursor.execute("SELECT DISTINCT symbol FROM cryptos")
crypto_symbols = [row[0] for row in crypto_cursor.fetchall()]

total_symbols = stock_symbols + etf_symbols + crypto_symbols
con.close()
etf_con.close()
crypto_con.close()


load_dotenv()
Expand Down Expand Up @@ -121,33 +135,42 @@ async def save_portfolio_data(self, session, cik):
if isinstance(parsed_data, list) and "https://financialmodelingprep.com/api/v4/institutional-ownership/portfolio-holdings?cik=" in url:
# Handle list response, save as JSON object

parsed_data = [item for item in parsed_data if 'symbol' in item and item['symbol'] is not None and item['symbol'] in symbol_list] #symbol must be included in the database
parsed_data = [
{**item, 'type': ('stocks' if item['symbol'] in stock_symbols else
'crypto' if item['symbol'] in crypto_symbols else
'etf' if item['symbol'] in etf_symbols else None)}
for item in parsed_data
if 'symbol' in item and item['symbol'] is not None and item['symbol'] in total_symbols
]

portfolio_data['holdings'] = json.dumps(parsed_data)


number_of_stocks = len(parsed_data)
total_market_value = sum(item['marketValue'] for item in parsed_data)
avg_performance_percentage = sum(item['performancePercentage'] for item in parsed_data) / len(parsed_data)

#total_market_value = sum(item['marketValue'] for item in parsed_data)
#avg_performance_percentage = sum(item['performancePercentage'] for item in parsed_data) / len(parsed_data)
performance_percentages = [item.get("performancePercentage", 0) for item in parsed_data]


positive_performance_count = sum(1 for percentage in performance_percentages if percentage > 0)
win_rate = round(positive_performance_count / len(performance_percentages) * 100,2)
data_dict = {
'winRate': win_rate,
'numberOfStocks': number_of_stocks,
'marketValue': total_market_value,
'avgPerformancePercentage': avg_performance_percentage,
#'marketValue': total_market_value,
}

portfolio_data.update(data_dict)

elif isinstance(parsed_data, list) and "https://financialmodelingprep.com/api/v4/institutional-ownership/portfolio-holdings-summary" in url:
# Handle list response, save as JSON object
portfolio_data['summary'] = json.dumps(parsed_data)
data_dict = {
#'numberOfStocks': parsed_data[0]['portfolioSize'],
#'marketValue': parsed_data[0]['marketValue'],
'marketValue': parsed_data[0]['marketValue'],
'averageHoldingPeriod': parsed_data[0]['averageHoldingPeriod'],
'turnover': parsed_data[0]['turnover'],
'performancePercentage3year': parsed_data[0]['performancePercentage3year'],
#'performancePercentage': parsed_data[0]['performancePercentage']
}
portfolio_data.update(data_dict)
Expand All @@ -164,7 +187,7 @@ async def save_portfolio_data(self, session, cik):
holdings_list = json.loads(portfolio_data['holdings'])

symbols_to_check = {holding['symbol'] for holding in holdings_list[:3]} # Extract the first two symbols
symbols_not_in_list = not any(symbol in symbol_list for symbol in symbols_to_check)
symbols_not_in_list = not any(symbol in total_symbols for symbol in symbols_to_check)


if symbols_not_in_list or 'industry' not in portfolio_data or len(json.loads(portfolio_data['industry'])) == 0:
Expand Down
105 changes: 105 additions & 0 deletions app/cron_hedge_funds.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import sqlite3
import os
import json


frontend_json_url = "../../frontend/src/lib/hedge-funds"

def format_company_name(company_name):
remove_strings = [', LLC','LLC', ',', 'LP', 'LTD', 'LTD.', 'INC.', 'INC', '.', '/DE/','/MD/','PLC']
preserve_words = ['FMR','MCF']

remove_strings_set = set(remove_strings)
preserve_words_set = set(preserve_words)

words = company_name.split()

formatted_words = []
for word in words:
if word in preserve_words_set:
formatted_words.append(word)
else:
new_word = word
for string in remove_strings_set:
new_word = new_word.replace(string, '')
formatted_words.append(new_word.title())

return ' '.join(formatted_words)


def best_hedge_funds(con):

# Connect to the SQLite database
cursor = con.cursor()

# Execute a SQL query to select the top 10 best performing cik entries by winRate
cursor.execute("SELECT cik, name, numberOfStocks, marketValue, winRate, turnover, performancePercentage3year FROM institutes WHERE marketValue > 200000000 AND numberOfStocks > 15 ORDER BY winRate DESC LIMIT 50")
best_performing_ciks = cursor.fetchall()

res_list = [{
'cik': row[0],
'name': format_company_name(row[1]),
'numberOfStocks': row[2],
'marketValue': row[3],
'winRate': row[4],
'turnover': row[5],
'performancePercentage3year': row[6]
} for row in best_performing_ciks]

with open(f"{frontend_json_url}/best-hedge-funds.json", 'w') as file:
json.dump(res_list, file)


def worst_hedge_funds(con):

# Connect to the SQLite database
cursor = con.cursor()

cursor.execute("SELECT cik, name, numberOfStocks, marketValue, winRate, turnover, performancePercentage3year FROM institutes WHERE marketValue > 200000000 AND numberOfStocks > 15 AND winRate > 0 ORDER BY winRate ASC LIMIT 50")
worst_performing_ciks = cursor.fetchall()

res_list = [{
'cik': row[0],
'name': format_company_name(row[1]),
'numberOfStocks': row[2],
'marketValue': row[3],
'winRate': row[4],
'turnover': row[5],
'performancePercentage3year': row[6]
} for row in worst_performing_ciks]

with open(f"{frontend_json_url}/worst-hedge-funds.json", 'w') as file:
json.dump(res_list, file)


def all_hedge_funds(con):

# Connect to the SQLite database
cursor = con.cursor()

cursor.execute("SELECT cik, name, numberOfStocks, marketValue, winRate, turnover, performancePercentage3year FROM institutes")
all_ciks = cursor.fetchall()

res_list = [{
'cik': row[0],
'name': format_company_name(row[1]),
'numberOfStocks': row[2],
'marketValue': row[3],
'winRate': row[4],
'turnover': row[5],
'performancePercentage3year': row[6]
} for row in all_ciks]

sorted_res_list = sorted(res_list, key=lambda x: x['marketValue'], reverse=True)

with open(f"{frontend_json_url}/all-hedge-funds.json", 'w') as file:
json.dump(sorted_res_list, file)



if __name__ == '__main__':
con = sqlite3.connect('institute.db')
#best_hedge_funds(con)
#worst_hedge_funds(con)
all_hedge_funds(con)
con.close()
4 changes: 2 additions & 2 deletions app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -1410,13 +1410,13 @@ async def get_hedge_funds_data(data: GetCIKData):
cursor = con_inst.cursor()

# Execute a SQL query to select the top 10 best performing cik entries by winRate
cursor.execute("SELECT cik, name, numberOfStocks, avgPerformancePercentage, averageHoldingPeriod, turnover, marketValue, winRate, holdings FROM institutes WHERE cik = ?", (cik,))
cursor.execute("SELECT cik, name, numberOfStocks, performancePercentage3year, averageHoldingPeriod, turnover, marketValue, winRate, holdings FROM institutes WHERE cik = ?", (cik,))
cik_data = cursor.fetchall()
res = [{
'cik': row[0],
'name': row[1],
'numberOfStocks': row[2],
'avgPerformancePercentage': row[3],
'performancePercentage3year': row[3],
'averageHoldingPeriod': row[4],
'turnover': row[5],
'marketValue': row[6],
Expand Down
2 changes: 1 addition & 1 deletion app/restart_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -1060,7 +1060,7 @@ async def save_json_files():

cursor = con.cursor()
cursor.execute("PRAGMA journal_mode = wal")
cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE symbol != ?", ('%5EGSPC',))
cursor.execute("SELECT DISTINCT symbol FROM stocks")
symbols = [row[0] for row in cursor.fetchall()]

etf_cursor = etf_con.cursor()
Expand Down

0 comments on commit 0f44a75

Please sign in to comment.