From 4bcbc00b4744bb61e9a5fd91f525c724fd30e651 Mon Sep 17 00:00:00 2001
From: MuslemRahimi <moslem_rahimi@hotmail.de>
Date: Wed, 21 Aug 2024 17:57:44 +0200
Subject: [PATCH] add top sectors to congress db

---
 app/cron_congress_trading.py | 84 +++++++++++++++++++++++++++---------
 app/main.py                  |  2 +-
 2 files changed, 65 insertions(+), 21 deletions(-)

diff --git a/app/cron_congress_trading.py b/app/cron_congress_trading.py
index ab2d5df..906cabe 100755
--- a/app/cron_congress_trading.py
+++ b/app/cron_congress_trading.py
@@ -6,8 +6,8 @@ import sqlite3
 import pandas as pd
 import time
 import hashlib
-from collections import defaultdict
-
+from collections import defaultdict, Counter
+from tqdm import tqdm
 from dotenv import load_dotenv
 import os
 
@@ -229,19 +229,47 @@ def create_politician_db(data, stock_symbols, stock_raw_data, etf_symbols, etf_r
 
         grouped_data[item['id']].append(item)
 
-
     # Convert defaultdict to list
     grouped_data_list = list(grouped_data.values())
-    for item in grouped_data_list:
+
+    for item in tqdm(grouped_data_list):
+        # Sort items by 'transactionDate'
         item = sorted(item, key=lambda x: x['transactionDate'], reverse=True)
+
+        # Calculate top sectors
+        sector_counts = Counter()
+        for holding in item:
+            symbol = holding['ticker']
+            sector = next((entry['sector'] for entry in stock_raw_data if entry['symbol'] == symbol), None)
+            if sector:
+                sector_counts[sector] += 1
+
+        # Calculate the total number of holdings
+        total_holdings = sum(sector_counts.values())
+
+        # Calculate the percentage for each sector and get the top 5
+        top_5_sectors_percentage = [
+            {sector: round((count / total_holdings) * 100, 2)}
+            for sector, count in sector_counts.most_common(5)
+        ]
+
+        # Prepare the data to save in the file
+        result = {
+            'topSectors': top_5_sectors_percentage,
+            'history': item
+        }
+
+        # Save to JSON file
         with open(f"json/congress-trading/politician-db/{item[0]['id']}.json", 'w') as file:
-            ujson.dump(item, file)
+            ujson.dump(result, file)
 
 
 def create_search_list():
     folder_path = 'json/congress-trading/politician-db/'
-    # Loop through all files in the folder
+    # Initialize the list that will hold the search data
     search_politician_list = []
+
+    # Loop through all files in the folder
     for filename in os.listdir(folder_path):
         # Check if the file is a JSON file
         if filename.endswith('.json'):
@@ -249,19 +277,32 @@ def create_search_list():
             # Open and read the JSON file
             with open(file_path, 'r') as file:
                 data = ujson.load(file)
-                first_item = data[0]
-                if 'Senator' in first_item['representative']:
-                    pass
-                else:
-                    search_politician_list.append({
-                        'representative': first_item['representative'],
-                        'id': first_item['id'],
-                        'totalTrades': len(data),
-                        'district': first_item['district'] if 'district' in first_item else '',
-                        'lastTrade': first_item['transactionDate'],
-                        })
+                
+                # Access the history, which is a list of transactions
+                history = data.get('history', [])
+                if not history:
+                    continue  # Skip if there is no history
+                
+                # Get the first item in the history list
+                first_item = history[0]
 
+                # Filter out senators (assuming you only want to process non-senators)
+                if 'Senator' in first_item['representative']:
+                    continue
+
+                # Create the politician search entry
+                search_politician_list.append({
+                    'representative': first_item['representative'],
+                    'id': first_item['id'],
+                    'totalTrades': len(history),
+                    'district': first_item.get('district', ''),
+                    'lastTrade': first_item['transactionDate'],
+                })
+
+    # Sort the list by the 'lastTrade' date in descending order
     search_politician_list = sorted(search_politician_list, key=lambda x: x['lastTrade'], reverse=True)
+
+    # Write the search list to a JSON file
     with open('json/congress-trading/search_list.json', 'w') as file:
         ujson.dump(search_politician_list, file)
 
@@ -271,17 +312,20 @@ async def run():
         con = sqlite3.connect('stocks.db')
         cursor = con.cursor()
         cursor.execute("PRAGMA journal_mode = wal")
-        cursor.execute("SELECT symbol, name FROM stocks WHERE symbol NOT LIKE '%.%'")
+        cursor.execute("SELECT symbol, name, sector FROM stocks WHERE symbol NOT LIKE '%.%'")
         stock_raw_data = cursor.fetchall()
         stock_raw_data = [{
             'symbol': row[0],
             'name': row[1],
+            'sector': row[2],
         } for row in stock_raw_data]
 
         stock_symbols = [item['symbol'] for item in stock_raw_data]
 
         con.close()
 
+
+
         etf_con = sqlite3.connect('etf.db')
         etf_cursor = etf_con.cursor()
         etf_cursor.execute("PRAGMA journal_mode = wal")
@@ -307,7 +351,6 @@ async def run():
         crypto_con.close()
 
         total_symbols = crypto_symbols +etf_symbols + stock_symbols
-        total_raw_data = stock_raw_data + etf_raw_data + crypto_raw_data
         chunk_size = 500
         politician_list = []
 
@@ -319,13 +362,14 @@ async def run():
         
         connector = aiohttp.TCPConnector(limit=100)  # Adjust the limit as needed
         async with aiohttp.ClientSession(connector=connector) as session:
-            for i in range(0, len(total_symbols), chunk_size):
+            for i in tqdm(range(0, len(total_symbols), chunk_size)):
                 symbols_chunk = total_symbols[i:i + chunk_size]
                 data = await get_congress_data(symbols_chunk,session)
                 politician_list +=data
                 print('sleeping for 60 sec')
                 await asyncio.sleep(60)  # Wait for 60 seconds between chunks
         
+        
         create_politician_db(politician_list, stock_symbols, stock_raw_data, etf_symbols, etf_raw_data, crypto_symbols, crypto_raw_data)
         create_search_list()
 
diff --git a/app/main.py b/app/main.py
index b22b90b..2d8b623 100755
--- a/app/main.py
+++ b/app/main.py
@@ -2905,7 +2905,7 @@ async def get_politician_stats(data:PoliticianId, api_key: str = Security(get_ap
         with open(f"json/congress-trading/politician-db/{politician_id}.json", 'rb') as file:
             res_list = orjson.loads(file.read())
     except:
-        res_list = []
+        res_list = {}
 
     data = orjson.dumps(res_list)
     compressed_data = gzip.compress(data)