From fe574eb0158049cf80207226916ae076e0be37ff Mon Sep 17 00:00:00 2001 From: MuslemRahimi Date: Sun, 9 Jun 2024 18:37:54 +0200 Subject: [PATCH] add cron job && endpoint share statistics --- app/cron_share_statistics.py | 81 ++++++++++++++++++++++ app/cron_shareholders.py | 130 ++++++++++++++++++++++++++--------- app/main.py | 19 +++++ app/primary_cron_job.py | 12 ++++ 4 files changed, 209 insertions(+), 33 deletions(-) create mode 100644 app/cron_share_statistics.py diff --git a/app/cron_share_statistics.py b/app/cron_share_statistics.py new file mode 100644 index 0000000..233cea2 --- /dev/null +++ b/app/cron_share_statistics.py @@ -0,0 +1,81 @@ +import ujson +import sqlite3 +import asyncio +import pandas as pd +from tqdm import tqdm +from datetime import datetime + + + +async def save_as_json(symbol, data): + with open(f"json/share-statistics/{symbol}.json", 'w') as file: + ujson.dump(data, file) + + +query_template = f""" + SELECT + historicalShares + FROM + stocks + WHERE + symbol = ? +""" + + +def filter_data_quarterly(data): + # Generate a range of quarter-end dates from the start to the end date + start_date = data[0]['date'] + end_date = datetime.today().strftime('%Y-%m-%d') + quarter_ends = pd.date_range(start=start_date, end=end_date, freq='QE').strftime('%Y-%m-%d').tolist() + + # Filter data to keep only entries with dates matching quarter-end dates + filtered_data = [entry for entry in data if entry['date'] in quarter_ends] + + return filtered_data + + +async def get_data(ticker, con): + + try: + df = pd.read_sql_query(query_template, con, params=(ticker,)) + shareholder_statistics = ujson.loads(df.to_dict()['historicalShares'][0]) + # Keys to keep + keys_to_keep = ["date","floatShares", "outstandingShares"] + + # Create new list with only the specified keys and convert floatShares and outstandingShares to integers + shareholder_statistics = [ + {key: int(d[key]) if key in ["floatShares", "outstandingShares"] else d[key] + for key in keys_to_keep} + for d in shareholder_statistics + ] + + shareholder_statistics = sorted(shareholder_statistics, key=lambda x: datetime.strptime(x['date'], '%Y-%m-%d'), reverse=False) + # Filter out only quarter-end dates + shareholder_statistics = filter_data_quarterly(shareholder_statistics) + except Exception as e: + #print(e) + shareholder_statistics = [] + + return shareholder_statistics + + +async def run(): + + con = sqlite3.connect('stocks.db') + + cursor = con.cursor() + cursor.execute("PRAGMA journal_mode = wal") + cursor.execute("SELECT DISTINCT symbol FROM stocks") + stock_symbols = [row[0] for row in cursor.fetchall()] + + for ticker in tqdm(stock_symbols): + shareholder_statistics = await get_data(ticker, con) + if len(shareholder_statistics) > 0: + await save_as_json(ticker, shareholder_statistics) + + con.close() + +try: + asyncio.run(run()) +except Exception as e: + print(e) diff --git a/app/cron_shareholders.py b/app/cron_shareholders.py index ff9f272..1236965 100644 --- a/app/cron_shareholders.py +++ b/app/cron_shareholders.py @@ -4,37 +4,110 @@ import asyncio import pandas as pd from tqdm import tqdm +import requests +from bs4 import BeautifulSoup +import re + +class Short_Data: + def __init__(self, data): + self.short_interest_ratio_days_to_cover = data.get('shortInterestRatioDaysToCover') + self.short_percent_of_float = data.get('shortPercentOfFloat') + self.short_percent_increase_decrease = data.get('shortPercentIncreaseDecrease') + self.short_interest_current_shares_short = data.get('shortInterestCurrentSharesShort') + self.shares_float = data.get('sharesFloat') + self.short_interest_prior_shares_short = data.get('shortInterestPriorSharesShort') + self.percent_from_52_wk_high = data.get('percentFrom52WkHigh') + self.percent_from_50_day_ma = data.get('percentFrom50DayMa') + self.percent_from_200_day_ma = data.get('percentFrom200DayMa') + self.percent_from_52_wk_low = data.get('percentFrom52WkLow') + self.n_52_week_performance = data.get('n52WeekPerformance') + self.trading_volume_today_vs_avg = data.get('tradingVolumeTodayVsAvg') + self.trading_volume_today = data.get('tradingVolumeToday') + self.trading_volume_average = data.get('tradingVolumeAverage') + self.market_cap = data.get('marketCap') + self.percent_owned_by_insiders = data.get('percentOwnedByInsiders') + self.percent_owned_by_institutions = data.get('percentOwnedByInstitutions') + self.price = data.get('price') + self.name = data.get('name') + self.ticker = data.get('ticker') + +def camel_case(s): + s = re.sub(r'[^A-Za-z0-9 ]+', '', s) + s = s.replace('%', 'Percent') + s = re.sub(r'(\d)', r'n\1', s) + s = re.sub(r'(\d+)', '', s) + parts = s.split() + return parts[0].lower() + ''.join(word.capitalize() for word in parts[1:]) + +def parse_stock_data(html): + soup = BeautifulSoup(html, 'html.parser') + table_rows = soup.select('div.inner_box_2 > table > tr') + parsed_data = {} + + for row in table_rows: + try: + key_element = row.select_one('td:nth-child(1)') + value_element = row.select_one('td:nth-child(2)') + if key_element and value_element: + key = camel_case(key_element.get_text().strip()) + value = value_element.get_text().strip() + + # Clean and convert value + if 'view' in value.lower(): + value = None + else: + value = re.sub(r'[\s%,\$]', '', value) + value = float(value) if value and value.replace('.', '', 1).isdigit() else value + + if key: + parsed_data[key] = value + except: + pass + + # Add price, name, and ticker separately + price = float(table_rows[0].select_one('td:nth-child(2)').get_text().strip().replace('$', '') or 'NaN') + name = table_rows[0].select_one('td').get_text().strip() + ticker = table_rows[1].select_one('td').get_text().strip() + + parsed_data.update({ + 'price': price, + 'name': name, + 'ticker': ticker + }) + + return Short_Data(parsed_data) if name.lower() != 'not available - try again' else None + +def shortsqueeze(ticker=''): + try: + url = f'https://shortsqueeze.com/?symbol={ticker}' + response = requests.get(url, allow_redirects=False) + if response.status_code == 200: + return parse_stock_data(response.text) + else: + return None + except Exception as e: + print(f"An error occurred: {e}") + return None -query_template = """ - SELECT - analyst_estimates, income - FROM - stocks - WHERE - symbol = ? -""" async def save_as_json(symbol, data): with open(f"json/shareholders/{symbol}.json", 'w') as file: ujson.dump(data, file) -async def get_data(ticker, etf_symbols, con, etf_con): - if ticker in etf_symbols: - table_name = 'etfs' - else: - table_name = 'stocks' +query_template = f""" + SELECT + shareholders + FROM + stocks + WHERE + symbol = ? +""" + +async def get_data(ticker, con): - query_template = f""" - SELECT - shareholders - FROM - {table_name} - WHERE - symbol = ? - """ try: - df = pd.read_sql_query(query_template, etf_con if table_name == 'etfs' else con, params=(ticker,)) + df = pd.read_sql_query(query_template, con, params=(ticker,)) shareholders_list = ujson.loads(df.to_dict()['shareholders'][0]) # Keys to keep keys_to_keep = ["cik","ownership", "investorName", "weight", "sharesNumber", "marketValue"] @@ -54,27 +127,18 @@ async def get_data(ticker, etf_symbols, con, etf_con): async def run(): con = sqlite3.connect('stocks.db') - etf_con = sqlite3.connect('etf.db') cursor = con.cursor() cursor.execute("PRAGMA journal_mode = wal") cursor.execute("SELECT DISTINCT symbol FROM stocks") stock_symbols = [row[0] for row in cursor.fetchall()] - etf_cursor = etf_con.cursor() - etf_cursor.execute("PRAGMA journal_mode = wal") - etf_cursor.execute("SELECT DISTINCT symbol FROM etfs") - etf_symbols = [row[0] for row in etf_cursor.fetchall()] - - total_symbols = stock_symbols + etf_symbols - - for ticker in tqdm(total_symbols): - shareholders_list = await get_data(ticker, etf_symbols, con, etf_con) + for ticker in tqdm(stock_symbols): + shareholders_list = await get_data(ticker, con) if len(shareholders_list) > 0: await save_as_json(ticker, shareholders_list) con.close() - etf_con.close() try: asyncio.run(run()) diff --git a/app/main.py b/app/main.py index 9ee4b86..e2bc540 100755 --- a/app/main.py +++ b/app/main.py @@ -2719,6 +2719,25 @@ async def get_enterprise_values(data:TickerData): redis_client.expire(cache_key, 3600*3600) # Set cache expiration time to 1 day return res + +@app.post("/share-statistics") +async def get_enterprise_values(data:TickerData): + ticker = data.ticker.upper() + cache_key = f"share-statistics-{ticker}" + cached_result = redis_client.get(cache_key) + if cached_result: + return ujson.loads(cached_result) + try: + with open(f"json/share-statistics/{ticker}.json", 'r') as file: + res = ujson.load(file) + except: + res = [] + + redis_client.set(cache_key, ujson.dumps(res)) + redis_client.expire(cache_key, 3600*3600) # Set cache expiration time to 1 day + return res + + @app.post("/politician-stats") async def get_politician_stats(data:PoliticianId): politician_id = data.politicianId.lower() diff --git a/app/primary_cron_job.py b/app/primary_cron_job.py index 75d7d52..c82e364 100755 --- a/app/primary_cron_job.py +++ b/app/primary_cron_job.py @@ -88,6 +88,17 @@ def run_shareholders(): ] subprocess.run(command) +def run_share_statistics(): + week = datetime.today().weekday() + if week <= 5: + subprocess.run(["python3", "cron_share_statistics.py"]) + command = [ + "sudo", "rsync", "-avz", "-e", "ssh", + "/root/backend/app/json/share-statistics", + f"root@{useast_ip_address}:/root/backend/app/json" + ] + subprocess.run(command) + def run_cron_market_movers(): week = datetime.today().weekday() if week <= 4: @@ -271,6 +282,7 @@ schedule.every().day.at("07:00").do(run_threaded, run_ta_rating).tag('ta_rating_ schedule.every().day.at("08:00").do(run_threaded, run_cron_insider_trading).tag('insider_trading_job') schedule.every().day.at("09:00").do(run_threaded, run_congress_trading).tag('congress_job') schedule.every().day.at("10:00").do(run_threaded, run_shareholders).tag('shareholders_job') +schedule.every().day.at("10:15").do(run_threaded, run_share_statistics).tag('share_statistics_job') schedule.every().day.at("13:30").do(run_threaded, run_stockdeck).tag('stockdeck_job') schedule.every().day.at("13:40").do(run_threaded, run_analyst_estimate).tag('analyst_estimate_job')