From 3754755ae1697224893a2e27ba071ec361f0ee08 Mon Sep 17 00:00:00 2001 From: MuslemRahimi Date: Fri, 5 Jul 2024 21:03:03 +0200 Subject: [PATCH] add cron job government contract --- app/cron_government_contract.py | 94 +++++++++++++++++++++++++++++++++ app/primary_cron_job.py | 11 ++++ 2 files changed, 105 insertions(+) create mode 100644 app/cron_government_contract.py diff --git a/app/cron_government_contract.py b/app/cron_government_contract.py new file mode 100644 index 0000000..69547ea --- /dev/null +++ b/app/cron_government_contract.py @@ -0,0 +1,94 @@ +import requests +import pandas as pd +import time +import ujson +from datetime import datetime +from tqdm import tqdm +from collections import defaultdict + + + +start_date = '2015-01-01' +end_date = datetime.today().strftime("%Y-%m-%d") + +# API endpoint for spending by award +url = "https://api.usaspending.gov/api/v2/search/spending_by_award/" + +# Headers +headers = { + "Content-Type": "application/json", +} + + +def save_json(symbol, data): + with open(f"json/government-contract/{symbol}.json", 'w') as file: + ujson.dump(data, file) + +# Define a function to remove duplicates based on a key +def remove_duplicates(data, key): + seen = set() + new_data = [] + for item in data: + if item[key] not in seen: + seen.add(item[key]) + new_data.append(item) + return new_data + + +def sum_contract(symbol, data): + aggregated_data = {} + for entry in data: + year = entry['date'][:4] + expenses = entry.get('amount') # Retrieve expenses or default to None + if expenses is not None: # Check if expenses is not None + if year not in aggregated_data: + aggregated_data[year] = { + 'year': year, + 'amount': 0, + 'numOfContracts': 0, + } + aggregated_data[year]['amount'] += int(expenses) + aggregated_data[year]['numOfContracts'] += 1 + + data = list(aggregated_data.values()) + save_json(symbol, data) + + +def get_data(symbol, name): + res = [] + for page in tqdm(range(1,2000)): + try: + data = { + "filters": { + "recipient_search_text": [name], + "time_period": [{"start_date": start_date, "end_date": end_date}], + "award_type_codes": ["A", "B", "C", "D"], # Contract award types + }, + "fields": ["Award ID", "Recipient Name", "Award Amount", "Last Modified Date"], + "page": page, + "limit": 100 # Adjust as needed + } + response = requests.post(url, json=data, headers=headers) + response_data = (response.json())['results'] + res += [{'id': item['Award ID'], 'amount': item['Award Amount'], 'date': item['Last Modified Date']} for item in response_data] + time.sleep(1) + except Exception as e: + break + + sorted_res = sorted(res, key=lambda x: datetime.strptime(x['date'], '%Y-%m-%d')) + sorted_res = remove_duplicates(sorted_res, 'id') + + if len(sorted_res) > 0: + sum_contract(symbol, sorted_res) + +try: + company_data = [{'symbol': 'J', 'name': 'Jacobs Engineering'},{'symbol': 'CRWD', 'name': 'CrowdStrike'},{'symbol': 'FLR', 'name': 'Fluor'},{'symbol': 'GD', 'name': 'General Dynamics'},{'symbol': 'NOC', 'name': 'Northrop Grumman'},{'symbol': 'RTX', 'name': 'Raytheon Technologies'},{'symbol': 'LHX', 'name': 'L3Harris Technologies'},{'symbol': 'CAT', 'name': 'Caterpillar'},{'symbol': 'JNJ', 'name': 'Johnson & Johnson'},{'symbol': 'CVX', 'name': 'Chevron'},{'symbol': 'XOM', 'name': 'Exxon Mobil'},{'symbol': 'UNH', 'name': 'UnitedHealth'},{'symbol': 'PFE', 'name': 'Pfizer'},{'symbol': 'BAH', 'name': 'Booz Allen Hamilton'},{'symbol': 'NEE', 'name': 'NextEra'},{'symbol': 'LDOS', 'name': 'Leidos'},{'symbol': 'PLTR', 'name': 'Palantir'},{'symbol': 'HII', 'name': 'Huntington Ingalls'},{'symbol': 'CACI', 'name': 'CACI International'},{'symbol': 'SAIC', 'name': 'Science Applications'},{'symbol': 'BA', 'name': 'Boeing'},{'symbol': 'LMT', 'name': 'Lockheed Martin'}] + for item in company_data: + symbol = item['symbol'] + name = item['name'] + get_data(symbol, name) + +except Exception as e: + print(e) + + diff --git a/app/primary_cron_job.py b/app/primary_cron_job.py index 62a4a64..26fef79 100755 --- a/app/primary_cron_job.py +++ b/app/primary_cron_job.py @@ -380,6 +380,15 @@ def run_options_net_flow(): ] subprocess.run(command) +def run_government_contract(): + subprocess.run(["python3", "cron_government_contract.py"]) + command = [ + "sudo", "rsync", "-avz", "-e", "ssh", + "/root/backend/app/json/government-contract", + f"root@{useast_ip_address}:/root/backend/app/json" + ] + subprocess.run(command) + # Create functions to run each schedule in a separate thread def run_threaded(job_func): job_thread = threading.Thread(target=job_func) @@ -395,6 +404,8 @@ def run_threaded(job_func): schedule.every().day.at("06:30").do(run_threaded, run_pocketbase).tag('pocketbase_job') schedule.every().day.at("07:00").do(run_threaded, run_ta_rating).tag('ta_rating_job') +schedule.every().day.at("07:30").do(run_threaded, run_government_contract).tag('government_contract_job') + schedule.every().day.at("08:00").do(run_threaded, run_cron_insider_trading).tag('insider_trading_job') schedule.every().day.at("09:00").do(run_threaded, run_congress_trading).tag('congress_job') schedule.every().day.at("10:00").do(run_threaded, run_shareholders).tag('shareholders_job')