From 4bcbc00b4744bb61e9a5fd91f525c724fd30e651 Mon Sep 17 00:00:00 2001 From: MuslemRahimi Date: Wed, 21 Aug 2024 17:57:44 +0200 Subject: [PATCH] add top sectors to congress db --- app/cron_congress_trading.py | 84 +++++++++++++++++++++++++++--------- app/main.py | 2 +- 2 files changed, 65 insertions(+), 21 deletions(-) diff --git a/app/cron_congress_trading.py b/app/cron_congress_trading.py index ab2d5df..906cabe 100755 --- a/app/cron_congress_trading.py +++ b/app/cron_congress_trading.py @@ -6,8 +6,8 @@ import sqlite3 import pandas as pd import time import hashlib -from collections import defaultdict - +from collections import defaultdict, Counter +from tqdm import tqdm from dotenv import load_dotenv import os @@ -229,19 +229,47 @@ def create_politician_db(data, stock_symbols, stock_raw_data, etf_symbols, etf_r grouped_data[item['id']].append(item) - # Convert defaultdict to list grouped_data_list = list(grouped_data.values()) - for item in grouped_data_list: + + for item in tqdm(grouped_data_list): + # Sort items by 'transactionDate' item = sorted(item, key=lambda x: x['transactionDate'], reverse=True) + + # Calculate top sectors + sector_counts = Counter() + for holding in item: + symbol = holding['ticker'] + sector = next((entry['sector'] for entry in stock_raw_data if entry['symbol'] == symbol), None) + if sector: + sector_counts[sector] += 1 + + # Calculate the total number of holdings + total_holdings = sum(sector_counts.values()) + + # Calculate the percentage for each sector and get the top 5 + top_5_sectors_percentage = [ + {sector: round((count / total_holdings) * 100, 2)} + for sector, count in sector_counts.most_common(5) + ] + + # Prepare the data to save in the file + result = { + 'topSectors': top_5_sectors_percentage, + 'history': item + } + + # Save to JSON file with open(f"json/congress-trading/politician-db/{item[0]['id']}.json", 'w') as file: - ujson.dump(item, file) + ujson.dump(result, file) def create_search_list(): folder_path = 'json/congress-trading/politician-db/' - # Loop through all files in the folder + # Initialize the list that will hold the search data search_politician_list = [] + + # Loop through all files in the folder for filename in os.listdir(folder_path): # Check if the file is a JSON file if filename.endswith('.json'): @@ -249,19 +277,32 @@ def create_search_list(): # Open and read the JSON file with open(file_path, 'r') as file: data = ujson.load(file) - first_item = data[0] - if 'Senator' in first_item['representative']: - pass - else: - search_politician_list.append({ - 'representative': first_item['representative'], - 'id': first_item['id'], - 'totalTrades': len(data), - 'district': first_item['district'] if 'district' in first_item else '', - 'lastTrade': first_item['transactionDate'], - }) + + # Access the history, which is a list of transactions + history = data.get('history', []) + if not history: + continue # Skip if there is no history + + # Get the first item in the history list + first_item = history[0] + # Filter out senators (assuming you only want to process non-senators) + if 'Senator' in first_item['representative']: + continue + + # Create the politician search entry + search_politician_list.append({ + 'representative': first_item['representative'], + 'id': first_item['id'], + 'totalTrades': len(history), + 'district': first_item.get('district', ''), + 'lastTrade': first_item['transactionDate'], + }) + + # Sort the list by the 'lastTrade' date in descending order search_politician_list = sorted(search_politician_list, key=lambda x: x['lastTrade'], reverse=True) + + # Write the search list to a JSON file with open('json/congress-trading/search_list.json', 'w') as file: ujson.dump(search_politician_list, file) @@ -271,17 +312,20 @@ async def run(): con = sqlite3.connect('stocks.db') cursor = con.cursor() cursor.execute("PRAGMA journal_mode = wal") - cursor.execute("SELECT symbol, name FROM stocks WHERE symbol NOT LIKE '%.%'") + cursor.execute("SELECT symbol, name, sector FROM stocks WHERE symbol NOT LIKE '%.%'") stock_raw_data = cursor.fetchall() stock_raw_data = [{ 'symbol': row[0], 'name': row[1], + 'sector': row[2], } for row in stock_raw_data] stock_symbols = [item['symbol'] for item in stock_raw_data] con.close() + + etf_con = sqlite3.connect('etf.db') etf_cursor = etf_con.cursor() etf_cursor.execute("PRAGMA journal_mode = wal") @@ -307,7 +351,6 @@ async def run(): crypto_con.close() total_symbols = crypto_symbols +etf_symbols + stock_symbols - total_raw_data = stock_raw_data + etf_raw_data + crypto_raw_data chunk_size = 500 politician_list = [] @@ -319,13 +362,14 @@ async def run(): connector = aiohttp.TCPConnector(limit=100) # Adjust the limit as needed async with aiohttp.ClientSession(connector=connector) as session: - for i in range(0, len(total_symbols), chunk_size): + for i in tqdm(range(0, len(total_symbols), chunk_size)): symbols_chunk = total_symbols[i:i + chunk_size] data = await get_congress_data(symbols_chunk,session) politician_list +=data print('sleeping for 60 sec') await asyncio.sleep(60) # Wait for 60 seconds between chunks + create_politician_db(politician_list, stock_symbols, stock_raw_data, etf_symbols, etf_raw_data, crypto_symbols, crypto_raw_data) create_search_list() diff --git a/app/main.py b/app/main.py index b22b90b..2d8b623 100755 --- a/app/main.py +++ b/app/main.py @@ -2905,7 +2905,7 @@ async def get_politician_stats(data:PoliticianId, api_key: str = Security(get_ap with open(f"json/congress-trading/politician-db/{politician_id}.json", 'rb') as file: res_list = orjson.loads(file.read()) except: - res_list = [] + res_list = {} data = orjson.dumps(res_list) compressed_data = gzip.compress(data)