add top sectors to congress db

This commit is contained in:
MuslemRahimi 2024-08-21 17:57:44 +02:00
parent 6441fa3cfe
commit 4bcbc00b47
2 changed files with 65 additions and 21 deletions

View File

@ -6,8 +6,8 @@ import sqlite3
import pandas as pd import pandas as pd
import time import time
import hashlib import hashlib
from collections import defaultdict from collections import defaultdict, Counter
from tqdm import tqdm
from dotenv import load_dotenv from dotenv import load_dotenv
import os import os
@ -229,19 +229,47 @@ def create_politician_db(data, stock_symbols, stock_raw_data, etf_symbols, etf_r
grouped_data[item['id']].append(item) grouped_data[item['id']].append(item)
# Convert defaultdict to list # Convert defaultdict to list
grouped_data_list = list(grouped_data.values()) grouped_data_list = list(grouped_data.values())
for item in grouped_data_list:
for item in tqdm(grouped_data_list):
# Sort items by 'transactionDate'
item = sorted(item, key=lambda x: x['transactionDate'], reverse=True) item = sorted(item, key=lambda x: x['transactionDate'], reverse=True)
# Calculate top sectors
sector_counts = Counter()
for holding in item:
symbol = holding['ticker']
sector = next((entry['sector'] for entry in stock_raw_data if entry['symbol'] == symbol), None)
if sector:
sector_counts[sector] += 1
# Calculate the total number of holdings
total_holdings = sum(sector_counts.values())
# Calculate the percentage for each sector and get the top 5
top_5_sectors_percentage = [
{sector: round((count / total_holdings) * 100, 2)}
for sector, count in sector_counts.most_common(5)
]
# Prepare the data to save in the file
result = {
'topSectors': top_5_sectors_percentage,
'history': item
}
# Save to JSON file
with open(f"json/congress-trading/politician-db/{item[0]['id']}.json", 'w') as file: with open(f"json/congress-trading/politician-db/{item[0]['id']}.json", 'w') as file:
ujson.dump(item, file) ujson.dump(result, file)
def create_search_list(): def create_search_list():
folder_path = 'json/congress-trading/politician-db/' folder_path = 'json/congress-trading/politician-db/'
# Loop through all files in the folder # Initialize the list that will hold the search data
search_politician_list = [] search_politician_list = []
# Loop through all files in the folder
for filename in os.listdir(folder_path): for filename in os.listdir(folder_path):
# Check if the file is a JSON file # Check if the file is a JSON file
if filename.endswith('.json'): if filename.endswith('.json'):
@ -249,19 +277,32 @@ def create_search_list():
# Open and read the JSON file # Open and read the JSON file
with open(file_path, 'r') as file: with open(file_path, 'r') as file:
data = ujson.load(file) data = ujson.load(file)
first_item = data[0]
if 'Senator' in first_item['representative']: # Access the history, which is a list of transactions
pass history = data.get('history', [])
else: if not history:
search_politician_list.append({ continue # Skip if there is no history
'representative': first_item['representative'],
'id': first_item['id'], # Get the first item in the history list
'totalTrades': len(data), first_item = history[0]
'district': first_item['district'] if 'district' in first_item else '',
'lastTrade': first_item['transactionDate'],
})
# Filter out senators (assuming you only want to process non-senators)
if 'Senator' in first_item['representative']:
continue
# Create the politician search entry
search_politician_list.append({
'representative': first_item['representative'],
'id': first_item['id'],
'totalTrades': len(history),
'district': first_item.get('district', ''),
'lastTrade': first_item['transactionDate'],
})
# Sort the list by the 'lastTrade' date in descending order
search_politician_list = sorted(search_politician_list, key=lambda x: x['lastTrade'], reverse=True) search_politician_list = sorted(search_politician_list, key=lambda x: x['lastTrade'], reverse=True)
# Write the search list to a JSON file
with open('json/congress-trading/search_list.json', 'w') as file: with open('json/congress-trading/search_list.json', 'w') as file:
ujson.dump(search_politician_list, file) ujson.dump(search_politician_list, file)
@ -271,17 +312,20 @@ async def run():
con = sqlite3.connect('stocks.db') con = sqlite3.connect('stocks.db')
cursor = con.cursor() cursor = con.cursor()
cursor.execute("PRAGMA journal_mode = wal") cursor.execute("PRAGMA journal_mode = wal")
cursor.execute("SELECT symbol, name FROM stocks WHERE symbol NOT LIKE '%.%'") cursor.execute("SELECT symbol, name, sector FROM stocks WHERE symbol NOT LIKE '%.%'")
stock_raw_data = cursor.fetchall() stock_raw_data = cursor.fetchall()
stock_raw_data = [{ stock_raw_data = [{
'symbol': row[0], 'symbol': row[0],
'name': row[1], 'name': row[1],
'sector': row[2],
} for row in stock_raw_data] } for row in stock_raw_data]
stock_symbols = [item['symbol'] for item in stock_raw_data] stock_symbols = [item['symbol'] for item in stock_raw_data]
con.close() con.close()
etf_con = sqlite3.connect('etf.db') etf_con = sqlite3.connect('etf.db')
etf_cursor = etf_con.cursor() etf_cursor = etf_con.cursor()
etf_cursor.execute("PRAGMA journal_mode = wal") etf_cursor.execute("PRAGMA journal_mode = wal")
@ -307,7 +351,6 @@ async def run():
crypto_con.close() crypto_con.close()
total_symbols = crypto_symbols +etf_symbols + stock_symbols total_symbols = crypto_symbols +etf_symbols + stock_symbols
total_raw_data = stock_raw_data + etf_raw_data + crypto_raw_data
chunk_size = 500 chunk_size = 500
politician_list = [] politician_list = []
@ -319,13 +362,14 @@ async def run():
connector = aiohttp.TCPConnector(limit=100) # Adjust the limit as needed connector = aiohttp.TCPConnector(limit=100) # Adjust the limit as needed
async with aiohttp.ClientSession(connector=connector) as session: async with aiohttp.ClientSession(connector=connector) as session:
for i in range(0, len(total_symbols), chunk_size): for i in tqdm(range(0, len(total_symbols), chunk_size)):
symbols_chunk = total_symbols[i:i + chunk_size] symbols_chunk = total_symbols[i:i + chunk_size]
data = await get_congress_data(symbols_chunk,session) data = await get_congress_data(symbols_chunk,session)
politician_list +=data politician_list +=data
print('sleeping for 60 sec') print('sleeping for 60 sec')
await asyncio.sleep(60) # Wait for 60 seconds between chunks await asyncio.sleep(60) # Wait for 60 seconds between chunks
create_politician_db(politician_list, stock_symbols, stock_raw_data, etf_symbols, etf_raw_data, crypto_symbols, crypto_raw_data) create_politician_db(politician_list, stock_symbols, stock_raw_data, etf_symbols, etf_raw_data, crypto_symbols, crypto_raw_data)
create_search_list() create_search_list()

View File

@ -2905,7 +2905,7 @@ async def get_politician_stats(data:PoliticianId, api_key: str = Security(get_ap
with open(f"json/congress-trading/politician-db/{politician_id}.json", 'rb') as file: with open(f"json/congress-trading/politician-db/{politician_id}.json", 'rb') as file:
res_list = orjson.loads(file.read()) res_list = orjson.loads(file.read())
except: except:
res_list = [] res_list = {}
data = orjson.dumps(res_list) data = orjson.dumps(res_list)
compressed_data = gzip.compress(data) compressed_data = gzip.compress(data)