From 5d9e1eb6088f15a894ee2554120bd2db2c7ac88e Mon Sep 17 00:00:00 2001
From: MuslemRahimi <moslem_rahimi@hotmail.de>
Date: Mon, 19 Aug 2024 13:18:08 +0200
Subject: [PATCH] update hedge fund cron job

---
 app/cron_hedge_funds.py  | 136 ++++++++++++++++++++++++---------------
 app/cron_shareholders.py |  81 -----------------------
 app/main.py              |  29 ++-------
 app/primary_cron_job.py  |  10 +++
 4 files changed, 102 insertions(+), 154 deletions(-)

diff --git a/app/cron_hedge_funds.py b/app/cron_hedge_funds.py
index 37cdacc..eec36a4 100644
--- a/app/cron_hedge_funds.py
+++ b/app/cron_hedge_funds.py
@@ -1,9 +1,15 @@
 import sqlite3
 import os
-import json
+import ujson
+import time
+from collections import Counter
+from tqdm import tqdm
 
-
-frontend_json_url = "../../frontend/src/lib/hedge-funds"
+keys_to_keep = [
+    "type", "securityName", "symbol", "weight", 
+    "changeInSharesNumberPercentage", "sharesNumber", 
+    "marketValue", "avgPricePaid", "putCallShare"
+]
 
 def format_company_name(company_name):
     remove_strings = [', LLC','LLC', ',', 'LP', 'LTD', 'LTD.', 'INC.', 'INC', '.', '/DE/','/MD/','PLC']
@@ -27,50 +33,6 @@ def format_company_name(company_name):
     return ' '.join(formatted_words)
 
 
-def best_hedge_funds(con):
-    
-    # Connect to the SQLite database
-    cursor = con.cursor()
-
-    # Execute a SQL query to select the top 10 best performing cik entries by winRate
-    cursor.execute("SELECT cik, name, numberOfStocks, marketValue, winRate, turnover, performancePercentage3year FROM institutes WHERE marketValue > 200000000 AND numberOfStocks > 15 ORDER BY winRate DESC LIMIT 50")
-    best_performing_ciks = cursor.fetchall()
-
-    res_list = [{
-        'cik': row[0],
-        'name': format_company_name(row[1]),
-        'numberOfStocks': row[2],
-        'marketValue': row[3],
-        'winRate': row[4],
-        'turnover': row[5],
-        'performancePercentage3year': row[6]
-    } for row in best_performing_ciks]
-
-    with open(f"json/hedge-funds/best-hedge-funds.json", 'w') as file:
-        json.dump(res_list, file)
-
-
-def worst_hedge_funds(con):
-    
-    # Connect to the SQLite database
-    cursor = con.cursor()
-
-    cursor.execute("SELECT cik, name, numberOfStocks, marketValue, winRate, turnover, performancePercentage3year FROM institutes WHERE marketValue > 200000000 AND numberOfStocks > 15 AND winRate > 0 ORDER BY winRate ASC LIMIT 50")
-    worst_performing_ciks = cursor.fetchall()
-
-    res_list = [{
-        'cik': row[0],
-        'name': format_company_name(row[1]),
-        'numberOfStocks': row[2],
-        'marketValue': row[3],
-        'winRate': row[4],
-        'turnover': row[5],
-        'performancePercentage3year': row[6]
-    } for row in worst_performing_ciks]
-
-    with open(f"json/hedge-funds/worst-hedge-funds.json", 'w') as file:
-        json.dump(res_list, file)
-
 
 def all_hedge_funds(con):
     
@@ -93,7 +55,7 @@ def all_hedge_funds(con):
     sorted_res_list = sorted(res_list, key=lambda x: x['marketValue'], reverse=True)
 
     with open(f"json/hedge-funds/all-hedge-funds.json", 'w') as file:
-        json.dump(sorted_res_list, file)
+        ujson.dump(sorted_res_list, file)
 
 
 def spy_performance():
@@ -106,7 +68,7 @@ def spy_performance():
     end_date = datetime.today().strftime('%Y-%m-%d')
 
     # Generate the range of dates with quarterly frequency
-    date_range = pd.date_range(start=start_date, end=end_date, freq='Q')
+    date_range = pd.date_range(start=start_date, end=end_date, freq='QE')
 
     # Convert the dates to the desired format (end of quarter dates)
     end_of_quarters = date_range.strftime('%Y-%m-%d').tolist()
@@ -129,11 +91,83 @@ def spy_performance():
         data.append({'date': original_date, 'price': close_price})
 
 
+def get_data(cik, stock_sectors):
+    cursor.execute("SELECT cik, name, numberOfStocks, performancePercentage3year, performancePercentage5year, performanceSinceInceptionPercentage, averageHoldingPeriod, turnover, marketValue, winRate, holdings, summary FROM institutes WHERE cik = ?", (cik,))
+    cik_data = cursor.fetchall()
+    res = [{
+        'cik': row[0],
+        'name': row[1],
+        'numberOfStocks': row[2],
+        'performancePercentage3year': row[3],
+        'performancePercentage5year': row[4],
+        'performanceSinceInceptionPercentage': row[5],
+        'averageHoldingPeriod': row[6],
+        'turnover': row[7],
+        'marketValue': row[8],
+        'winRate': row[9],
+        'holdings': ujson.loads(row[10]),
+        'summary': ujson.loads(row[11]),
+    } for row in cik_data]
+
+    if not res:
+        return None  # Exit if no data is found
+
+    res = res[0] #latest data
+
+    filtered_holdings = [
+        {key: holding[key] for key in keys_to_keep}
+        for holding in res['holdings']
+    ]
+
+    res['holdings'] = filtered_holdings
+
+    # Cross-reference symbols in holdings with stock_sectors to determine sectors
+    sector_counts = Counter()
+    for holding in res['holdings']:
+        symbol = holding['symbol']
+        sector = next((item['sector'] for item in stock_sectors if item['symbol'] == symbol), None)
+        if sector:
+            sector_counts[sector] += 1
+
+    # Calculate the total number of holdings
+    total_holdings = sum(sector_counts.values())
+
+    # Calculate the percentage for each sector and get the top 5
+    top_5_sectors_percentage = [
+        {sector: round((count / total_holdings) * 100, 2)}
+        for sector, count in sector_counts.most_common(5)
+    ]
+
+    # Add the top 5 sectors information to the result
+    res['topSectors'] = top_5_sectors_percentage
+    if res:
+        with open(f"json/hedge-funds/companies/{cik}.json", 'w') as file:
+            ujson.dump(res, file)
 
 if __name__ == '__main__':
     con = sqlite3.connect('institute.db')
-    #best_hedge_funds(con)
-    #worst_hedge_funds(con)
+    stock_con = sqlite3.connect('stocks.db')
+    
+    cursor = con.cursor()
+    cursor.execute("PRAGMA journal_mode = wal")
+    cursor.execute("SELECT DISTINCT cik FROM institutes")
+    cik_symbols = [row[0] for row in cursor.fetchall()]
+
+    try:
+        stock_cursor = stock_con.cursor()
+        stock_cursor.execute("SELECT DISTINCT symbol, sector FROM stocks")
+        stock_sectors = [{'symbol': row[0], 'sector': row[1]} for row in stock_cursor.fetchall()]
+    finally:
+        # Ensure that the cursor and connection are closed even if an error occurs
+        stock_cursor.close()
+        stock_con.close()
+
     all_hedge_funds(con)
     spy_performance()
+    for cik in tqdm(cik_symbols):
+        try:
+            get_data(cik, stock_sectors)
+        except Exception as e:
+            print(e)
+
     con.close()
\ No newline at end of file
diff --git a/app/cron_shareholders.py b/app/cron_shareholders.py
index 1236965..6bb3c99 100644
--- a/app/cron_shareholders.py
+++ b/app/cron_shareholders.py
@@ -5,89 +5,8 @@ import pandas as pd
 from tqdm import tqdm
 
 import requests
-from bs4 import BeautifulSoup
 import re
 
-class Short_Data:
-    def __init__(self, data):
-        self.short_interest_ratio_days_to_cover = data.get('shortInterestRatioDaysToCover')
-        self.short_percent_of_float = data.get('shortPercentOfFloat')
-        self.short_percent_increase_decrease = data.get('shortPercentIncreaseDecrease')
-        self.short_interest_current_shares_short = data.get('shortInterestCurrentSharesShort')
-        self.shares_float = data.get('sharesFloat')
-        self.short_interest_prior_shares_short = data.get('shortInterestPriorSharesShort')
-        self.percent_from_52_wk_high = data.get('percentFrom52WkHigh')
-        self.percent_from_50_day_ma = data.get('percentFrom50DayMa')
-        self.percent_from_200_day_ma = data.get('percentFrom200DayMa')
-        self.percent_from_52_wk_low = data.get('percentFrom52WkLow')
-        self.n_52_week_performance = data.get('n52WeekPerformance')
-        self.trading_volume_today_vs_avg = data.get('tradingVolumeTodayVsAvg')
-        self.trading_volume_today = data.get('tradingVolumeToday')
-        self.trading_volume_average = data.get('tradingVolumeAverage')
-        self.market_cap = data.get('marketCap')
-        self.percent_owned_by_insiders = data.get('percentOwnedByInsiders')
-        self.percent_owned_by_institutions = data.get('percentOwnedByInstitutions')
-        self.price = data.get('price')
-        self.name = data.get('name')
-        self.ticker = data.get('ticker')
-
-def camel_case(s):
-    s = re.sub(r'[^A-Za-z0-9 ]+', '', s)
-    s = s.replace('%', 'Percent')
-    s = re.sub(r'(\d)', r'n\1', s)
-    s = re.sub(r'(\d+)', '', s)
-    parts = s.split()
-    return parts[0].lower() + ''.join(word.capitalize() for word in parts[1:])
-
-def parse_stock_data(html):
-    soup = BeautifulSoup(html, 'html.parser')
-    table_rows = soup.select('div.inner_box_2 > table > tr')
-    parsed_data = {}
-    
-    for row in table_rows:
-        try:
-            key_element = row.select_one('td:nth-child(1)')
-            value_element = row.select_one('td:nth-child(2)')
-            if key_element and value_element:
-                key = camel_case(key_element.get_text().strip())
-                value = value_element.get_text().strip()
-
-                # Clean and convert value
-                if 'view' in value.lower():
-                    value = None
-                else:
-                    value = re.sub(r'[\s%,\$]', '', value)
-                    value = float(value) if value and value.replace('.', '', 1).isdigit() else value
-
-                if key:
-                    parsed_data[key] = value
-        except:
-            pass
-
-    # Add price, name, and ticker separately
-    price = float(table_rows[0].select_one('td:nth-child(2)').get_text().strip().replace('$', '') or 'NaN')
-    name = table_rows[0].select_one('td').get_text().strip()
-    ticker = table_rows[1].select_one('td').get_text().strip()
-
-    parsed_data.update({
-        'price': price,
-        'name': name,
-        'ticker': ticker
-    })
-
-    return Short_Data(parsed_data) if name.lower() != 'not available - try again' else None
-
-def shortsqueeze(ticker=''):
-    try:
-        url = f'https://shortsqueeze.com/?symbol={ticker}'
-        response = requests.get(url, allow_redirects=False)
-        if response.status_code == 200:
-            return parse_stock_data(response.text)
-        else:
-            return None
-    except Exception as e:
-        print(f"An error occurred: {e}")
-        return None
 
 
 async def save_as_json(symbol, data):
diff --git a/app/main.py b/app/main.py
index d1c87b0..0e75bb5 100755
--- a/app/main.py
+++ b/app/main.py
@@ -1380,29 +1380,14 @@ async def get_hedge_funds_data(data: GetCIKData, api_key: str = Security(get_api
             headers={"Content-Encoding": "gzip"}
         )
     
-    cursor = con_inst.cursor()
+    try:
+        with open(f"json/hedge-funds/companies/{cik}.json", 'rb') as file:
+            res = orjson.loads(file.read())
+    except:
+        res = []
 
-    # Execute a SQL query to select the top 10 best performing cik entries by winRate
-    cursor.execute("SELECT cik, name, numberOfStocks, performancePercentage3year, performancePercentage5year, performanceSinceInceptionPercentage, averageHoldingPeriod, turnover, marketValue, winRate, holdings, summary FROM institutes WHERE cik = ?", (cik,))
-    cik_data = cursor.fetchall()
-    res = [{
-        'cik': row[0],
-        'name': row[1],
-        'numberOfStocks': row[2],
-        'performancePercentage3year': row[3],
-        'performancePercentage5year': row[4],
-        'performanceSinceInceptionPercentage': row[5],
-        'averageHoldingPeriod': row[6],
-        'turnover': row[7],
-        'marketValue': row[8],
-        'winRate': row[9],
-        'holdings': orjson.loads(row[10]),
-        'summary': orjson.loads(row[11]),
-    } for row in cik_data]
-
-
-    res_json = orjson.dumps(res[0])
-    compressed_data = gzip.compress(res_json)
+    res = orjson.dumps(res)
+    compressed_data = gzip.compress(res)
 
     redis_client.set(cache_key, compressed_data)
     redis_client.expire(cache_key, 3600 * 3600) # Set cache expiration time to Infinity
diff --git a/app/primary_cron_job.py b/app/primary_cron_job.py
index a93c2ca..6493b3d 100755
--- a/app/primary_cron_job.py
+++ b/app/primary_cron_job.py
@@ -452,6 +452,15 @@ def run_government_contract():
     ]
     run_command(command)
 
+def run_hedge_fund():
+    run_command(["python3", "cron_hedge_funds.py"])
+    command = [
+        "sudo", "rsync", "-avz", "-e", "ssh",
+        "/root/backend/app/json/hedge-funds",
+        f"root@{useast_ip_address}:/root/backend/app/json"
+    ]
+    run_command(command)
+
 def run_dashboard():
     run_command(["python3", "cron_dashboard.py"])
     command = [
@@ -508,6 +517,7 @@ schedule.every().day.at("06:00").do(run_threaded, run_historical_price).tag('his
 schedule.every().day.at("06:30").do(run_threaded, run_pocketbase).tag('pocketbase_job')
 
 schedule.every().day.at("07:00").do(run_threaded, run_ta_rating).tag('ta_rating_job')
+schedule.every().day.at("07:00").do(run_threaded, run_hedge_fund).tag('hedge_fund_job')
 schedule.every().day.at("07:30").do(run_threaded, run_government_contract).tag('government_contract_job')
 schedule.every().day.at("07:30").do(run_threaded, run_financial_statements).tag('financial_statements_job')