Skip to content

Commit

Permalink
update profile
Browse files Browse the repository at this point in the history
  • Loading branch information
MuslemRahimi committed Dec 13, 2024
1 parent 810b28b commit 407592f
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 61 deletions.
101 changes: 45 additions & 56 deletions app/cron_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,15 @@
load_dotenv()
api_key = os.getenv('FMP_API_KEY')

query_template = """
SELECT
profile
FROM
stocks
WHERE
symbol = ?
"""

MONTH_MAP = {
'01': 'January', '02': 'February', '03': 'March', '04': 'April',
'05': 'May', '06': 'June', '07': 'July', '08': 'August',
Expand All @@ -34,46 +43,11 @@
'VA': 'Virginia', 'WA': 'Washington', 'WV': 'West Virginia', 'WI': 'Wisconsin', 'WY': 'Wyoming'
}

def extract_phone_and_state(business_address):
"""Extracts phone number and state from the business address string."""
# Regular expression to match phone numbers, including those with parentheses
phone_match = re.search(r'\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}', business_address)
phone = phone_match.group(0) if phone_match else ''

# Remove the phone number and extract the state and zip code
address_without_phone = re.sub(r'\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}', '', business_address).strip(', ')
parts = address_without_phone.split(',')
state_zip = parts[-1].strip() if len(parts) > 1 else ''

# Replace state abbreviation with full state name
state_zip_parts = state_zip.split()
if state_zip_parts:
city = state_zip_parts[0]
state_abbr = state_zip_parts[1]
zip_code = state_zip_parts[2] if len(state_zip_parts) > 2 else ''

# Capitalize the city properly (if needed)
city = city.title()

# Map state abbreviation to full state name
full_state_name = STATE_MAP.get(state_abbr, state_abbr)

# Format the final state string
state_formatted = f"{city} {full_state_name} {zip_code}".strip()
else:
state_formatted = state_zip

return phone, state_formatted


def format_address(address):
"""Formats the address string to proper capitalization."""
if not address:
return ''

# Replace multiple commas with a single comma and split by comma
parts = [part.strip().title() for part in address.replace(',,', ',').split(',')]
return ', '.join(parts)
async def save_json(symbol, data):
with open(f"json/profile/{symbol}.json", 'w') as file:
file.write(orjson.dumps(data).decode('utf-8'))

def custom_sort(entry):
title_lower = entry['position'].lower()
Expand Down Expand Up @@ -181,21 +155,34 @@ async def fetch_company_core_information(session, symbol):

company_info['fiscalYearRange'] = f"{month_name_start} - {month_name_end}"

# Format the mailing address
if 'mailingAddress' in company_info:
company_info['mailingAddress'] = format_address(company_info['mailingAddress'])

# Extract phone number and state from businessAddress
business_address = company_info.get('businessAddress')
if business_address:
phone, state = extract_phone_and_state(business_address)
company_info['phone'] = phone
company_info['state'] = state
keys_to_remove = ['businessAddress', 'mailingAddress','sicDescription', 'registrantName', 'stateOfIncorporation', 'fiscalYearEnd']

# Creating a new dictionary without the unwanted keys
company_info = {key: value for key, value in company_info.items() if key not in keys_to_remove}

return company_info

async def get_data(session, symbol):
async def get_data(session, symbol, con):
try:
df = pd.read_sql_query(query_template, con, params=(symbol,))
if df.empty:
return

data= df.to_dict(orient='records')[0]
company_profile = orjson.loads(data['profile'])[0]
if company_profile['state'] in STATE_MAP:
company_profile['state'] = STATE_MAP[company_profile['state']]



company_profile['ceo'] = company_profile['ceo'].replace("Ms.","").replace("Mr.","").replace("Mrs.","").replace("Ms","").replace("Mr","").strip()

keys_to_keep = ['currency', 'country', 'description', 'isin', 'cusip', 'sector','industry', 'ceo','website','fullTimeEmployees','address','city','state','ipoDate']

# Creating a new dictionary without the unwanted keys
company_profile = {key: value for key, value in company_profile.items() if key in keys_to_keep}

# Fetch SEC filings
filings = await fetch_sec_filings(session, symbol)

Expand All @@ -207,7 +194,11 @@ async def get_data(session, symbol):

#print(filings)
#print(executives)
print(core_info)
res = {**company_profile,**core_info, 'executives': executives, 'filings': filings}

if len(res) > 0:
await save_json(symbol, res)

except Exception as e:
print(f"Error processing {symbol}: {e}")

Expand All @@ -216,20 +207,17 @@ async def run():
con = sqlite3.connect('stocks.db')
cursor = con.cursor()
cursor.execute("PRAGMA journal_mode = wal")
cursor.execute("SELECT DISTINCT symbol FROM stocks")
cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE symbol NOT LIKE '%.%'")
symbols = [row[0] for row in cursor.fetchall()]

# For testing, limit to AAPL
symbols = ['AAPL']
con.close()


async with aiohttp.ClientSession() as session:
tasks = []
for i, symbol in enumerate(tqdm(symbols), 1):
tasks.append(get_data(session, symbol))
tasks.append(get_data(session, symbol, con))

# Batch processing and rate limiting
if i % 300 == 0:
if i % 100 == 0:
await asyncio.gather(*tasks)
tasks = []
print(f'Processed {i} symbols, sleeping...')
Expand All @@ -239,6 +227,7 @@ async def run():
if tasks:
await asyncio.gather(*tasks)

con.close()

def main():
"""
Expand Down
36 changes: 32 additions & 4 deletions app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2076,7 +2076,6 @@ async def get_congress_rss_feed(api_key: str = Security(get_api_key)):
@app.post("/historical-sector-price")
async def historical_sector_price(data:FilterStockList, api_key: str = Security(get_api_key)):
data = data.dict()
print(data)
sector = data['filterList']
cache_key = f"history-price-sector-{sector}"
cached_result = redis_client.get(cache_key)
Expand Down Expand Up @@ -2647,7 +2646,7 @@ async def get_raw_options_flow_ticker(data:OptionsFlowData, request: Request, ap
pagesize = data.pagesize
page = data.page
cache_key = f"raw-options-flow-{ticker}-{start_date}-{end_date}-{pagesize}-{page}"
print(ticker, start_date, end_date, pagesize, page)
#print(ticker, start_date, end_date, pagesize, page)
cached_result = redis_client.get(cache_key)
if cached_result:
return StreamingResponse(
Expand Down Expand Up @@ -2828,7 +2827,6 @@ async def get_options_chain(data:TransactionId, api_key: str = Security(get_api_
@app.post("/options-historical-flow")
async def get_options_chain(data:HistoricalDate, api_key: str = Security(get_api_key)):
selected_date = data.date
print(selected_date)
cache_key = f"options-historical-flow-{selected_date}"
cached_result = redis_client.get(cache_key)
if cached_result:
Expand Down Expand Up @@ -4056,7 +4054,6 @@ async def get_statistics(data: FilterStockList, api_key: str = Security(get_api_
except:
res = []
data = orjson.dumps(res)
print(res)
compressed_data = gzip.compress(data)

redis_client.set(cache_key, compressed_data)
Expand Down Expand Up @@ -4100,6 +4097,37 @@ async def get_statistics(data: ParamsData, api_key: str = Security(get_api_key))
)


@app.post("/profile")
async def get_statistics(data: TickerData, api_key: str = Security(get_api_key)):
ticker = data.ticker.upper()
cache_key = f"profile-{ticker}"
cached_result = redis_client.get(cache_key)
if cached_result:
return StreamingResponse(
io.BytesIO(cached_result),
media_type="application/json",
headers={"Content-Encoding": "gzip"}
)

try:
with open(f"json/profile/{ticker}.json", 'rb') as file:
res = orjson.loads(file.read())
except:
res = {}

data = orjson.dumps(res)
compressed_data = gzip.compress(data)

redis_client.set(cache_key, compressed_data)
redis_client.expire(cache_key,3600*3600)

return StreamingResponse(
io.BytesIO(compressed_data),
media_type="application/json",
headers={"Content-Encoding": "gzip"}
)


@app.get("/newsletter")
async def get_newsletter():
try:
Expand Down
2 changes: 1 addition & 1 deletion fastify/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ fastify.register(async function (fastify) {
sendData();

// Start sending data periodically
sendInterval = setInterval(sendData, 5000);
sendInterval = setInterval(sendData, 1000);

// Handle client disconnect
connection.socket.on("close", () => {
Expand Down

0 comments on commit 407592f

Please sign in to comment.