From 5d9e1eb6088f15a894ee2554120bd2db2c7ac88e Mon Sep 17 00:00:00 2001 From: MuslemRahimi Date: Mon, 19 Aug 2024 13:18:08 +0200 Subject: [PATCH] update hedge fund cron job --- app/cron_hedge_funds.py | 136 ++++++++++++++++++++++++--------------- app/cron_shareholders.py | 81 ----------------------- app/main.py | 29 ++------- app/primary_cron_job.py | 10 +++ 4 files changed, 102 insertions(+), 154 deletions(-) diff --git a/app/cron_hedge_funds.py b/app/cron_hedge_funds.py index 37cdacc..eec36a4 100644 --- a/app/cron_hedge_funds.py +++ b/app/cron_hedge_funds.py @@ -1,9 +1,15 @@ import sqlite3 import os -import json +import ujson +import time +from collections import Counter +from tqdm import tqdm - -frontend_json_url = "../../frontend/src/lib/hedge-funds" +keys_to_keep = [ + "type", "securityName", "symbol", "weight", + "changeInSharesNumberPercentage", "sharesNumber", + "marketValue", "avgPricePaid", "putCallShare" +] def format_company_name(company_name): remove_strings = [', LLC','LLC', ',', 'LP', 'LTD', 'LTD.', 'INC.', 'INC', '.', '/DE/','/MD/','PLC'] @@ -27,50 +33,6 @@ def format_company_name(company_name): return ' '.join(formatted_words) -def best_hedge_funds(con): - - # Connect to the SQLite database - cursor = con.cursor() - - # Execute a SQL query to select the top 10 best performing cik entries by winRate - cursor.execute("SELECT cik, name, numberOfStocks, marketValue, winRate, turnover, performancePercentage3year FROM institutes WHERE marketValue > 200000000 AND numberOfStocks > 15 ORDER BY winRate DESC LIMIT 50") - best_performing_ciks = cursor.fetchall() - - res_list = [{ - 'cik': row[0], - 'name': format_company_name(row[1]), - 'numberOfStocks': row[2], - 'marketValue': row[3], - 'winRate': row[4], - 'turnover': row[5], - 'performancePercentage3year': row[6] - } for row in best_performing_ciks] - - with open(f"json/hedge-funds/best-hedge-funds.json", 'w') as file: - json.dump(res_list, file) - - -def worst_hedge_funds(con): - - # Connect to the SQLite database - cursor = con.cursor() - - cursor.execute("SELECT cik, name, numberOfStocks, marketValue, winRate, turnover, performancePercentage3year FROM institutes WHERE marketValue > 200000000 AND numberOfStocks > 15 AND winRate > 0 ORDER BY winRate ASC LIMIT 50") - worst_performing_ciks = cursor.fetchall() - - res_list = [{ - 'cik': row[0], - 'name': format_company_name(row[1]), - 'numberOfStocks': row[2], - 'marketValue': row[3], - 'winRate': row[4], - 'turnover': row[5], - 'performancePercentage3year': row[6] - } for row in worst_performing_ciks] - - with open(f"json/hedge-funds/worst-hedge-funds.json", 'w') as file: - json.dump(res_list, file) - def all_hedge_funds(con): @@ -93,7 +55,7 @@ def all_hedge_funds(con): sorted_res_list = sorted(res_list, key=lambda x: x['marketValue'], reverse=True) with open(f"json/hedge-funds/all-hedge-funds.json", 'w') as file: - json.dump(sorted_res_list, file) + ujson.dump(sorted_res_list, file) def spy_performance(): @@ -106,7 +68,7 @@ def spy_performance(): end_date = datetime.today().strftime('%Y-%m-%d') # Generate the range of dates with quarterly frequency - date_range = pd.date_range(start=start_date, end=end_date, freq='Q') + date_range = pd.date_range(start=start_date, end=end_date, freq='QE') # Convert the dates to the desired format (end of quarter dates) end_of_quarters = date_range.strftime('%Y-%m-%d').tolist() @@ -129,11 +91,83 @@ def spy_performance(): data.append({'date': original_date, 'price': close_price}) +def get_data(cik, stock_sectors): + cursor.execute("SELECT cik, name, numberOfStocks, performancePercentage3year, performancePercentage5year, performanceSinceInceptionPercentage, averageHoldingPeriod, turnover, marketValue, winRate, holdings, summary FROM institutes WHERE cik = ?", (cik,)) + cik_data = cursor.fetchall() + res = [{ + 'cik': row[0], + 'name': row[1], + 'numberOfStocks': row[2], + 'performancePercentage3year': row[3], + 'performancePercentage5year': row[4], + 'performanceSinceInceptionPercentage': row[5], + 'averageHoldingPeriod': row[6], + 'turnover': row[7], + 'marketValue': row[8], + 'winRate': row[9], + 'holdings': ujson.loads(row[10]), + 'summary': ujson.loads(row[11]), + } for row in cik_data] + + if not res: + return None # Exit if no data is found + + res = res[0] #latest data + + filtered_holdings = [ + {key: holding[key] for key in keys_to_keep} + for holding in res['holdings'] + ] + + res['holdings'] = filtered_holdings + + # Cross-reference symbols in holdings with stock_sectors to determine sectors + sector_counts = Counter() + for holding in res['holdings']: + symbol = holding['symbol'] + sector = next((item['sector'] for item in stock_sectors if item['symbol'] == symbol), None) + if sector: + sector_counts[sector] += 1 + + # Calculate the total number of holdings + total_holdings = sum(sector_counts.values()) + + # Calculate the percentage for each sector and get the top 5 + top_5_sectors_percentage = [ + {sector: round((count / total_holdings) * 100, 2)} + for sector, count in sector_counts.most_common(5) + ] + + # Add the top 5 sectors information to the result + res['topSectors'] = top_5_sectors_percentage + if res: + with open(f"json/hedge-funds/companies/{cik}.json", 'w') as file: + ujson.dump(res, file) if __name__ == '__main__': con = sqlite3.connect('institute.db') - #best_hedge_funds(con) - #worst_hedge_funds(con) + stock_con = sqlite3.connect('stocks.db') + + cursor = con.cursor() + cursor.execute("PRAGMA journal_mode = wal") + cursor.execute("SELECT DISTINCT cik FROM institutes") + cik_symbols = [row[0] for row in cursor.fetchall()] + + try: + stock_cursor = stock_con.cursor() + stock_cursor.execute("SELECT DISTINCT symbol, sector FROM stocks") + stock_sectors = [{'symbol': row[0], 'sector': row[1]} for row in stock_cursor.fetchall()] + finally: + # Ensure that the cursor and connection are closed even if an error occurs + stock_cursor.close() + stock_con.close() + all_hedge_funds(con) spy_performance() + for cik in tqdm(cik_symbols): + try: + get_data(cik, stock_sectors) + except Exception as e: + print(e) + con.close() \ No newline at end of file diff --git a/app/cron_shareholders.py b/app/cron_shareholders.py index 1236965..6bb3c99 100644 --- a/app/cron_shareholders.py +++ b/app/cron_shareholders.py @@ -5,89 +5,8 @@ import pandas as pd from tqdm import tqdm import requests -from bs4 import BeautifulSoup import re -class Short_Data: - def __init__(self, data): - self.short_interest_ratio_days_to_cover = data.get('shortInterestRatioDaysToCover') - self.short_percent_of_float = data.get('shortPercentOfFloat') - self.short_percent_increase_decrease = data.get('shortPercentIncreaseDecrease') - self.short_interest_current_shares_short = data.get('shortInterestCurrentSharesShort') - self.shares_float = data.get('sharesFloat') - self.short_interest_prior_shares_short = data.get('shortInterestPriorSharesShort') - self.percent_from_52_wk_high = data.get('percentFrom52WkHigh') - self.percent_from_50_day_ma = data.get('percentFrom50DayMa') - self.percent_from_200_day_ma = data.get('percentFrom200DayMa') - self.percent_from_52_wk_low = data.get('percentFrom52WkLow') - self.n_52_week_performance = data.get('n52WeekPerformance') - self.trading_volume_today_vs_avg = data.get('tradingVolumeTodayVsAvg') - self.trading_volume_today = data.get('tradingVolumeToday') - self.trading_volume_average = data.get('tradingVolumeAverage') - self.market_cap = data.get('marketCap') - self.percent_owned_by_insiders = data.get('percentOwnedByInsiders') - self.percent_owned_by_institutions = data.get('percentOwnedByInstitutions') - self.price = data.get('price') - self.name = data.get('name') - self.ticker = data.get('ticker') - -def camel_case(s): - s = re.sub(r'[^A-Za-z0-9 ]+', '', s) - s = s.replace('%', 'Percent') - s = re.sub(r'(\d)', r'n\1', s) - s = re.sub(r'(\d+)', '', s) - parts = s.split() - return parts[0].lower() + ''.join(word.capitalize() for word in parts[1:]) - -def parse_stock_data(html): - soup = BeautifulSoup(html, 'html.parser') - table_rows = soup.select('div.inner_box_2 > table > tr') - parsed_data = {} - - for row in table_rows: - try: - key_element = row.select_one('td:nth-child(1)') - value_element = row.select_one('td:nth-child(2)') - if key_element and value_element: - key = camel_case(key_element.get_text().strip()) - value = value_element.get_text().strip() - - # Clean and convert value - if 'view' in value.lower(): - value = None - else: - value = re.sub(r'[\s%,\$]', '', value) - value = float(value) if value and value.replace('.', '', 1).isdigit() else value - - if key: - parsed_data[key] = value - except: - pass - - # Add price, name, and ticker separately - price = float(table_rows[0].select_one('td:nth-child(2)').get_text().strip().replace('$', '') or 'NaN') - name = table_rows[0].select_one('td').get_text().strip() - ticker = table_rows[1].select_one('td').get_text().strip() - - parsed_data.update({ - 'price': price, - 'name': name, - 'ticker': ticker - }) - - return Short_Data(parsed_data) if name.lower() != 'not available - try again' else None - -def shortsqueeze(ticker=''): - try: - url = f'https://shortsqueeze.com/?symbol={ticker}' - response = requests.get(url, allow_redirects=False) - if response.status_code == 200: - return parse_stock_data(response.text) - else: - return None - except Exception as e: - print(f"An error occurred: {e}") - return None async def save_as_json(symbol, data): diff --git a/app/main.py b/app/main.py index d1c87b0..0e75bb5 100755 --- a/app/main.py +++ b/app/main.py @@ -1380,29 +1380,14 @@ async def get_hedge_funds_data(data: GetCIKData, api_key: str = Security(get_api headers={"Content-Encoding": "gzip"} ) - cursor = con_inst.cursor() + try: + with open(f"json/hedge-funds/companies/{cik}.json", 'rb') as file: + res = orjson.loads(file.read()) + except: + res = [] - # Execute a SQL query to select the top 10 best performing cik entries by winRate - cursor.execute("SELECT cik, name, numberOfStocks, performancePercentage3year, performancePercentage5year, performanceSinceInceptionPercentage, averageHoldingPeriod, turnover, marketValue, winRate, holdings, summary FROM institutes WHERE cik = ?", (cik,)) - cik_data = cursor.fetchall() - res = [{ - 'cik': row[0], - 'name': row[1], - 'numberOfStocks': row[2], - 'performancePercentage3year': row[3], - 'performancePercentage5year': row[4], - 'performanceSinceInceptionPercentage': row[5], - 'averageHoldingPeriod': row[6], - 'turnover': row[7], - 'marketValue': row[8], - 'winRate': row[9], - 'holdings': orjson.loads(row[10]), - 'summary': orjson.loads(row[11]), - } for row in cik_data] - - - res_json = orjson.dumps(res[0]) - compressed_data = gzip.compress(res_json) + res = orjson.dumps(res) + compressed_data = gzip.compress(res) redis_client.set(cache_key, compressed_data) redis_client.expire(cache_key, 3600 * 3600) # Set cache expiration time to Infinity diff --git a/app/primary_cron_job.py b/app/primary_cron_job.py index a93c2ca..6493b3d 100755 --- a/app/primary_cron_job.py +++ b/app/primary_cron_job.py @@ -452,6 +452,15 @@ def run_government_contract(): ] run_command(command) +def run_hedge_fund(): + run_command(["python3", "cron_hedge_funds.py"]) + command = [ + "sudo", "rsync", "-avz", "-e", "ssh", + "/root/backend/app/json/hedge-funds", + f"root@{useast_ip_address}:/root/backend/app/json" + ] + run_command(command) + def run_dashboard(): run_command(["python3", "cron_dashboard.py"]) command = [ @@ -508,6 +517,7 @@ schedule.every().day.at("06:00").do(run_threaded, run_historical_price).tag('his schedule.every().day.at("06:30").do(run_threaded, run_pocketbase).tag('pocketbase_job') schedule.every().day.at("07:00").do(run_threaded, run_ta_rating).tag('ta_rating_job') +schedule.every().day.at("07:00").do(run_threaded, run_hedge_fund).tag('hedge_fund_job') schedule.every().day.at("07:30").do(run_threaded, run_government_contract).tag('government_contract_job') schedule.every().day.at("07:30").do(run_threaded, run_financial_statements).tag('financial_statements_job')