From 36d6695679ea84b5b3ed7de26c85fa12bb0abc0f Mon Sep 17 00:00:00 2001 From: MuslemRahimi Date: Wed, 8 Jan 2025 15:49:19 +0100 Subject: [PATCH] bugfixing --- app/cron_options_gex.py | 433 ---------------------------------------- app/cron_quote.py | 2 +- app/test.py | 128 +----------- 3 files changed, 9 insertions(+), 554 deletions(-) delete mode 100644 app/cron_options_gex.py diff --git a/app/cron_options_gex.py b/app/cron_options_gex.py deleted file mode 100644 index f44274e..0000000 --- a/app/cron_options_gex.py +++ /dev/null @@ -1,433 +0,0 @@ -import numpy as np -from scipy.stats import norm -from datetime import datetime, date, timedelta -import pandas as pd -from benzinga import financial_data -import ujson -from collections import defaultdict -import sqlite3 -import os -from dotenv import load_dotenv -import math - -# Load API key from environment -load_dotenv() -api_key = os.getenv('BENZINGA_API_KEY') -fin = financial_data.Benzinga(api_key) - -# Function to replace NaN with None in a dictionary -def replace_nan_with_none(obj): - for key, value in obj.items(): - if isinstance(value, float) and math.isnan(value): - obj[key] = None - return obj - -def save_json(symbol, data, file_path,filename=None): - cleaned_data = [replace_nan_with_none(item) for item in data] - if filename == None: - with open(f'{file_path}/{symbol}.json', 'w') as file: - ujson.dump(cleaned_data, file) - else: - with open(f'{file_path}/{filename}.json', 'w') as file: - ujson.dump(cleaned_data, file) - - -# Define the keys to keep -keys_to_keep = {'time', 'sentiment', 'execution_estimate','option_activity_type', 'price', 'underlying_price', 'cost_basis', 'strike_price', 'date', 'date_expiration', 'open_interest', 'put_call', 'volume'} - -def filter_data(item): - # Filter the item to keep only the specified keys and format fields - filtered_item = {key: value for key, value in item.items() if key in keys_to_keep} - filtered_item['type'] = filtered_item['option_activity_type'].capitalize() - filtered_item['sentiment'] = filtered_item['sentiment'].capitalize() - filtered_item['underlying_price'] = round(float(filtered_item['underlying_price']), 2) - filtered_item['put_call'] = 'Calls' if filtered_item['put_call'] == 'CALL' else 'Puts' - filtered_item['execution_estimate'] = filtered_item['execution_estimate'].replace('_',' ').title() - return filtered_item - - -def calculate_volatility(prices_df): - prices_df = prices_df.sort_values(by='date') - prices_df['return'] = prices_df['close'].pct_change() - returns = prices_df['return'].dropna() - return returns.std() * np.sqrt(252) - -def black_scholes_d1(S, K, T, r, sigma): - try: - if sigma <= 0 or np.sqrt(T) <= 0: - return 0 - return (np.log(S / K) + (r + 0.5 * sigma**2) * T) / (sigma * np.sqrt(T)) - except ZeroDivisionError: - return 0 - -def black_scholes_d2(S, K, T, r, sigma): - return black_scholes_d1(S, K, T, r, sigma) - sigma * np.sqrt(T) - -def delta(S, K, T, r, sigma, option_type='CALL'): - d1 = black_scholes_d1(S, K, T, r, sigma) - return norm.cdf(d1) if option_type == 'CALL' else norm.cdf(d1) - 1 - -def gamma(S, K, T, r, sigma): - try: - d1 = black_scholes_d1(S, K, T, r, sigma) - return norm.pdf(d1) / (S * sigma * np.sqrt(T)) if S > 0 and sigma > 0 and np.sqrt(T) > 0 else 0 - except ZeroDivisionError: - return 0 - -def compute_gex_and_dex(option_data, r=0.05, sigma=0.2): - """ - Compute GEX (Gamma Exposure) and DEX (Delta Exposure) for the given option data. - """ - timestamp = datetime.strptime(option_data['date'], "%Y-%m-%d") - - try: - S = float(option_data['underlying_price']) - K = float(option_data['strike_price']) - size = float(option_data['open_interest']) - expiration_date = datetime.strptime(option_data['date_expiration'], "%Y-%m-%d") - T = (expiration_date - timestamp).days / 365.0 - if T < 0: - return 0, 0, timestamp.date() # return 0 for both GEX and DEX if T is negative - elif T == 0: - T = 1 # Consider 0DTE options - - option_type = option_data['put_call'] - delta_value = delta(S, K, T, r, sigma, option_type) - gamma_value = gamma(S, K, T, r, sigma) - notional = size * S - - # Calculate GEX (Gamma Exposure) - gex = gamma_value * size * int(option_data['volume']) * S # gamma_value * notional - - # Calculate DEX (Delta Exposure) - dex = delta_value * size * S # delta_value * notional - - return gex, dex, timestamp.date() - except: - return 0, 0, timestamp.date() - -def compute_daily_gex_and_dex(option_data_list, volatility): - gex_dex_data = [] - for option_data in option_data_list: - gex, dex, trade_date = compute_gex_and_dex(option_data, sigma=volatility) - if gex != 0 or dex != 0: - gex_dex_data.append({'date': trade_date, 'gex': gex, 'dex': dex}) - - gex_dex_df = pd.DataFrame(gex_dex_data) - daily_gex_dex = gex_dex_df.groupby('date').agg({'gex': 'sum', 'dex': 'sum'}).reset_index() - daily_gex_dex['gex'] = round(daily_gex_dex['gex'], 0) - daily_gex_dex['dex'] = round(daily_gex_dex['dex'], 0) - daily_gex_dex['date'] = daily_gex_dex['date'].astype(str) - - return daily_gex_dex - -def calculate_otm_percentage(option_data_list): - otm_count = 0 - total_options = len(option_data_list) - - for option_data in option_data_list: - strike_price = float(option_data['strike_price']) - put_call = option_data['put_call'] - stock_price = float(option_data['stock_price']) # Get stock price for this option - - # Check if the option is out-of-the-money - if (put_call == 'CALL' and strike_price > stock_price) or (put_call == 'PUT' and strike_price < stock_price): - otm_count += 1 - - if total_options > 0: - return (otm_count / total_options) * 100 - else: - return 0 - - -def get_historical_option_data(option_data_list, df_price): - summary_data = [] - - for option_data in option_data_list: - try: - date = datetime.strptime(option_data['date'], "%Y-%m-%d").date() - expiration_date = datetime.strptime(option_data['date_expiration'], "%Y-%m-%d").date() - - open_interest = int(option_data.get('open_interest', 0)) - volume = int(option_data.get('volume', 0)) - strike_price = float(option_data.get('strike_price', 0)) - put_call = option_data.get('put_call', 'CALL') - sentiment = option_data.get('sentiment', 'NEUTRAL') - execution_estimate = option_data.get('execution_estimate', 'UNKNOWN') - - # Safely convert premium to float, default to 0 if missing or invalid - try: - premium = float(option_data.get('cost_basis', 0)) - except (TypeError, ValueError): - premium = 0 - - # Determine the stock price based on expiration date - if expiration_date > date.today(): - stock_price = df_price['close'].iloc[-1] # Latest stock price - else: - # Get the stock price on the option's date - stock_price_row = df_price[df_price['date'] == str(date)] - if not stock_price_row.empty: - stock_price = stock_price_row['close'].values[0] - else: - continue # Skip this option if the price isn't available for the date - - # Calculate Bull/Bear/Neutral premiums based on sentiment - if sentiment == 'BULLISH': - bull_premium = premium - bear_premium = 0 - neutral_premium = 0 - elif sentiment == 'BEARISH': - bull_premium = 0 - bear_premium = premium - neutral_premium = 0 - else: - bull_premium = 0 - bear_premium = 0 - neutral_premium = premium - - # Categorize volume based on execution_estimate - bid_vol = volume if "bid" in execution_estimate.lower() else 0 - ask_vol = volume if "ask" in execution_estimate.lower() else 0 - midpoint_vol = volume if "midpoint" in execution_estimate.lower() else 0 - - - # Append option data for later summarization - summary_data.append({ - 'date': date, - 'open_interest': open_interest, - 'c_vol': volume if put_call == 'CALL' else 0, - 'p_vol': volume if put_call == 'PUT' else 0, - 'bull_premium': bull_premium, - 'bear_premium': bear_premium, - 'neutral_premium': neutral_premium, - 'bid_vol': bid_vol, - 'ask_vol': ask_vol, - 'midpoint_vol': midpoint_vol, - 'put_call': put_call, - 'strike_price': strike_price, - 'stock_price': stock_price - }) - - except Exception as e: - print(f"Error processing option data: {e}") - continue - - # Summarize by date - df_summary = pd.DataFrame(summary_data) - - # Apply OTM percentage calculation for each day - daily_summary = df_summary.groupby('date').agg( - total_oi=('open_interest', 'sum'), - total_bull_prem=('bull_premium', 'sum'), - total_bear_prem=('bear_premium', 'sum'), - total_neutral_prem=('neutral_premium', 'sum'), - c_vol=('c_vol', 'sum'), - p_vol=('p_vol', 'sum'), - bid_vol=('bid_vol', 'sum'), - ask_vol=('ask_vol', 'sum'), - midpoint_vol=('midpoint_vol', 'sum') - ).reset_index() - - # Calculate total volume - daily_summary['total_volume'] = daily_summary['c_vol'] + daily_summary['p_vol'] - # Calculate bid/ask/midpoint ratios - # Check if total_volume > 0 before performing the calculations - daily_summary['bid_ratio'] = daily_summary.apply( - lambda row: round(row['bid_vol'] / row['total_volume'] * 100, 2) if row['total_volume'] > 0 else None, axis=1 - ) - - daily_summary['ask_ratio'] = daily_summary.apply( - lambda row: round(row['ask_vol'] / row['total_volume'] * 100, 2) if row['total_volume'] > 0 else None, axis=1 - ) - - daily_summary['midpoint_ratio'] = daily_summary.apply( - lambda row: round(row['midpoint_vol'] / row['total_volume'] * 100, 2) if row['total_volume'] > 0 else None, axis=1 - ) - - - # Calculate OTM percentage for each date and assign it to the daily_summary - daily_summary['otm_ratio'] = df_summary.groupby('date').apply(lambda df: round(calculate_otm_percentage(df.to_dict('records')), 1)).values - - # Calculate Bull/Bear/Neutral ratios - try: - total_prem = daily_summary['total_bull_prem'] + daily_summary['total_bear_prem'] + daily_summary['total_neutral_prem'] - daily_summary['bull_ratio'] = round(daily_summary['total_bull_prem'] / total_prem * 100, 2) - daily_summary['bear_ratio'] = round(daily_summary['total_bear_prem'] / total_prem * 100, 2) - daily_summary['neutral_ratio'] = round(daily_summary['total_neutral_prem'] / total_prem * 100, 2) - except: - daily_summary['bull_ratio'] = None - daily_summary['bear_ratio'] = None - daily_summary['neutral_ratio'] = None - - - # Format other fields - daily_summary['total_neutral_prem'] = round(daily_summary['total_neutral_prem'], 2) - daily_summary['date'] = daily_summary['date'].astype(str) - daily_summary = daily_summary.sort_values(by='date', ascending=False) - # Return the summarized dataframe - return daily_summary - -def get_options_chain(option_data_list): - # Convert raw data to DataFrame and ensure correct data types - df = pd.DataFrame(option_data_list) - type_conversions = { - 'cost_basis': float, - 'volume': int, - 'open_interest': int, - 'strike_price': float, - 'date_expiration': str # Ensuring date_expiration is initially a string - } - for col, dtype in type_conversions.items(): - df[col] = df[col].astype(dtype) - - # Convert 'date_expiration' to datetime - df['date_expiration'] = pd.to_datetime(df['date_expiration']) - - # Filter out rows where 'date_expiration' is in the past - current_date = datetime.now() - df = df[df['date_expiration'] >= current_date] - - # Calculate total premium during grouping - df['total_premium'] = df['cost_basis'] - - # Group and aggregate data - grouped = df.groupby(['date_expiration', 'strike_price', 'put_call']).agg( - total_open_interest=('open_interest', 'sum'), - total_volume=('volume', 'sum'), - total_premium=('total_premium', 'sum') - ).reset_index() - - # Pivot the data for puts and calls - pivoted = grouped.pivot_table( - index=['date_expiration', 'strike_price'], - columns='put_call', - values=['total_open_interest', 'total_volume', 'total_premium'], - fill_value=0 - ).reset_index() - - # Flatten column names - pivoted.columns = [' '.join(col).strip() for col in pivoted.columns.values] - - # Rename columns for clarity - new_column_names = { - 'total_open_interest CALL': 'total_open_interest_call', - 'total_open_interest PUT': 'total_open_interest_put', - 'total_volume CALL': 'total_volume_call', - 'total_volume PUT': 'total_volume_put', - 'total_premium CALL': 'total_premium_call', - 'total_premium PUT': 'total_premium_put' - } - pivoted = pivoted.rename(columns=new_column_names) - - # Convert 'date_expiration' to string in ISO format - pivoted['date_expiration'] = pivoted['date_expiration'].dt.strftime('%Y-%m-%dT%H:%M:%S') - - # Ensure we capture all relevant columns - columns_to_keep = ['strike_price'] + [col for col in pivoted.columns if col not in ['strike_price', 'date_expiration']] - - # Construct the options chain - option_chain = pivoted.groupby('date_expiration').apply( - lambda x: x[columns_to_keep].to_dict(orient='records') - ).reset_index(name='chain') - - return option_chain - -def get_data(ticker): - res_list = [] - - for page in range(0,5000): - try: - data = fin.options_activity(date_from=start_date_str, date_to=end_date_str, company_tickers=ticker, page=page, pagesize=1000) - data = ujson.loads(fin.output(data))['option_activity'] - filtered_data = [{key: value for key, value in item.items() if key not in ['description_extended', 'updated']} for item in data] - res_list += filtered_data - except: - break - return res_list - - -# Define date range -end_date = date.today() -start_date = end_date - timedelta(180) -end_date_str = end_date.strftime('%Y-%m-%d') -start_date_str = start_date.strftime('%Y-%m-%d') - -# Connect to SQLite database -stock_con = sqlite3.connect('stocks.db') -etf_con = sqlite3.connect('etf.db') - -stock_cursor = stock_con.cursor() -stock_cursor.execute("PRAGMA journal_mode = wal") -stock_cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE symbol NOT LIKE '%.%' AND marketCap >= 500E6") -stock_symbols = [row[0] for row in stock_cursor.fetchall()] - -etf_cursor = etf_con.cursor() -etf_cursor.execute("PRAGMA journal_mode = wal") -etf_cursor.execute("SELECT DISTINCT symbol FROM etfs") -etf_symbols = [row[0] for row in etf_cursor.fetchall()] - -total_symbols = stock_symbols + etf_symbols - -query_template = """ - SELECT date, close,change_percent - FROM "{ticker}" - WHERE date BETWEEN ? AND ? -""" - -# Process each symbol -for ticker in total_symbols: - try: - query = query_template.format(ticker=ticker) - df_price = pd.read_sql_query(query, stock_con if ticker in stock_symbols else etf_con, params=(start_date_str, end_date_str)).round(2) - df_price = df_price.rename(columns={"change_percent": "changesPercentage"}) - - volatility = calculate_volatility(df_price) - - ticker_data = get_data(ticker) - # Group ticker_data by 'date' and collect all items for each date - grouped_history = defaultdict(list) - for item in ticker_data: - try: - filtered_item = filter_data(item) - grouped_history[filtered_item['date']].append(filtered_item) - # Save each date's transactions separately - except: - pass - #save all single transaction from the daily date separately for faster performance of the end user. File would be too big. - for date, data in grouped_history.items(): - try: - # Create a filename based on ticker and date, e.g., "AAPL_2024-09-07.json" - filename = f"{ticker}-{date}" - - # Save the JSON to the specified folder for historical data - save_json(ticker, data, 'json/options-historical-data/history', filename) - except: - pass - - daily_historical_option_data = get_historical_option_data(ticker_data, df_price) - daily_historical_option_data = daily_historical_option_data.merge(df_price[['date', 'changesPercentage']], on='date', how='inner') - # Add "history" column containing all filtered items with the same date - #daily_historical_option_data['history'] = daily_historical_option_data['date'].apply(lambda x: grouped_history.get(x, [])) - - if not daily_historical_option_data.empty: - save_json(ticker, daily_historical_option_data.to_dict('records'), 'json/options-historical-data/companies') - - - option_chain_data = get_options_chain(ticker_data) - if not option_chain_data.empty: - save_json(ticker, option_chain_data.to_dict('records'), 'json/options-chain/companies') - - - daily_gex = compute_daily_gex_and_dex(ticker_data, volatility) - daily_gex = daily_gex.merge(df_price[['date', 'close']], on='date', how='inner') - if not daily_gex.empty: - save_json(ticker, daily_gex.to_dict('records'), 'json/options-gex/companies') - - except Exception as e: - print(e) - pass - -# Close the database connection -stock_con.close() -etf_con.close() \ No newline at end of file diff --git a/app/cron_quote.py b/app/cron_quote.py index d3c8dfa..59419e9 100755 --- a/app/cron_quote.py +++ b/app/cron_quote.py @@ -27,7 +27,7 @@ def delete_files_in_directory(directory): async def get_quote_of_stocks(ticker_list): ticker_str = ','.join(ticker_list) async with aiohttp.ClientSession() as session: - url = f"https://financialmodelingprep.com/stable/batch-quote?symbols={ticker_str}&apikey={api_key}" + url = f"https://financialmodelingprep.com/api/v3/quote/{ticker_str}?apikey={api_key}" async with session.get(url) as response: if response.status == 200: return await response.json() diff --git a/app/test.py b/app/test.py index 75293eb..e30b212 100644 --- a/app/test.py +++ b/app/test.py @@ -1,120 +1,8 @@ -import requests -import orjson -import re -from datetime import datetime -from dotenv import load_dotenv -import os -import sqlite3 -import time -from tqdm import tqdm - -load_dotenv() - -api_key = os.getenv('UNUSUAL_WHALES_API_KEY') - -# Connect to the databases -con = sqlite3.connect('stocks.db') -etf_con = sqlite3.connect('etf.db') -cursor = con.cursor() -cursor.execute("PRAGMA journal_mode = wal") -#cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE symbol NOT LIKE '%.%' AND marketCap > 1E9") -cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE symbol NOT LIKE '%.%'") -stocks_symbols = [row[0] for row in cursor.fetchall()] - -etf_cursor = etf_con.cursor() -etf_cursor.execute("PRAGMA journal_mode = wal") -#etf_cursor.execute("SELECT DISTINCT symbol FROM etfs WHERE marketCap > 1E9") -etf_cursor.execute("SELECT DISTINCT symbol FROM etfs") -etf_symbols = [row[0] for row in etf_cursor.fetchall()] - -con.close() -etf_con.close() - -# Combine the lists of stock and ETF symbols -total_symbols = stocks_symbols + etf_symbols - -print(len(total_symbols)) - -def save_json(data, symbol,directory="json/hottest-contracts/companies"): - os.makedirs(directory, exist_ok=True) # Ensure the directory exists - with open(f"{directory}/{symbol}.json", 'wb') as file: # Use binary mode for orjson - file.write(orjson.dumps(data)) - - -def parse_option_symbol(option_symbol): - # Define regex pattern to match the symbol structure - match = re.match(r"([A-Z]+)(\d{6})([CP])(\d+)", option_symbol) - if not match: - raise ValueError(f"Invalid option_symbol format: {option_symbol}") - - ticker, expiration, option_type, strike_price = match.groups() - - # Convert expiration to datetime - date_expiration = datetime.strptime(expiration, "%y%m%d").date() - - # Convert strike price to float - strike_price = int(strike_price) / 1000 - - return date_expiration, option_type, strike_price - -def safe_round(value, decimals=2): - try: - return round(float(value), decimals) - except (ValueError, TypeError): - return value - - -def prepare_data(data, symbol): - - res_list = [] - for item in data: - if float(item['volume']) > 0: - # Parse option_symbol - date_expiration, option_type, strike_price = parse_option_symbol(item['option_symbol']) - - # Round numerical and numerical-string values - new_item = { - key: safe_round(value) if isinstance(value, (int, float, str)) else value - for key, value in item.items() - } - - # Add parsed fields - new_item['date_expiration'] = date_expiration - new_item['option_type'] = option_type - new_item['strike_price'] = strike_price - - # Calculate open_interest_change - new_item['open_interest_change'] = safe_round( - new_item.get('open_interest', 0) - new_item.get('prev_oi', 0) - ) - - res_list.append(new_item) - - if res_list: - save_json(res_list, symbol,"json/hottest-contracts/companies") - - -counter = 0 -for symbol in tqdm(total_symbols): - try: - - url = f"https://api.unusualwhales.com/api/stock/{symbol}/option-contracts" - - headers = { - "Accept": "application/json, text/plain", - "Authorization": api_key - } - - response = requests.get(url, headers=headers) - if response.status_code == 200: - data = response.json()['data'] - prepare_data(data, symbol) - counter +=1 - # If 50 chunks have been processed, sleep for 60 seconds - if counter == 100: - print("Sleeping...") - time.sleep(30) # Sleep for 60 seconds - counter = 0 - - except Exception as e: - print(f"Error for {symbol}:{e}") +import plotly.express as px +fig = px.treemap( + names = ["Eve","Cain", "Seth", "Enos", "Noam", "Abel", "Awan", "Enoch", "Azura"], + parents = ["", "Eve", "Eve", "Seth", "Seth", "Eve", "Eve", "Awan", "Eve"] +) +fig.update_traces(root_color="lightgrey") +fig.update_layout(margin = dict(t=50, l=25, r=25, b=25)) +fig.show() \ No newline at end of file