From d48f92f786adcbffb6bab7c19fd760f33111d4f1 Mon Sep 17 00:00:00 2001 From: MuslemRahimi Date: Sun, 18 Aug 2024 22:35:13 +0200 Subject: [PATCH] optimize share statistics --- app/cron_share_statistics.py | 160 ++++++++++++++++------------------- 1 file changed, 74 insertions(+), 86 deletions(-) diff --git a/app/cron_share_statistics.py b/app/cron_share_statistics.py index 17b7db1..6ca1cd8 100644 --- a/app/cron_share_statistics.py +++ b/app/cron_share_statistics.py @@ -5,108 +5,96 @@ import pandas as pd from tqdm import tqdm from datetime import datetime import yfinance as yf -import time +# Constants +JSON_DIR = "json/" +QUARTERLY_FREQ = 'QE' -async def save_as_json(symbol, forward_pe_dict, short_dict): - with open(f"json/share-statistics/{symbol}.json", 'w') as file: - ujson.dump(short_dict, file) - with open(f"json/forward-pe/{symbol}.json", 'w') as file: - ujson.dump(forward_pe_dict, file) - - -query_template = f""" - SELECT - historicalShares - FROM - stocks - WHERE - symbol = ? +# SQL Query +QUERY_TEMPLATE = """ + SELECT historicalShares + FROM stocks + WHERE symbol = ? """ -def filter_data_quarterly(data): - # Generate a range of quarter-end dates from the start to the end date - start_date = data[0]['date'] - end_date = datetime.today().strftime('%Y-%m-%d') - quarter_ends = pd.date_range(start=start_date, end=end_date, freq='QE').strftime('%Y-%m-%d').tolist() +def filter_quarterly_data(data): + """Filter data to keep only quarter-end dates.""" + quarter_ends = pd.date_range(start=data[0]['date'], end=datetime.now(), freq=QUARTERLY_FREQ).strftime('%Y-%m-%d').tolist() + return [entry for entry in data if entry['date'] in quarter_ends] - # Filter data to keep only entries with dates matching quarter-end dates - filtered_data = [entry for entry in data if entry['date'] in quarter_ends] - - return filtered_data - -def get_yahoo_data(ticker, outstanding_shares, float_shares): +def get_yahoo_finance_data(ticker, shares): + """Fetch and process Yahoo Finance data.""" try: - data_dict = yf.Ticker(ticker).info - forward_pe = round(data_dict['forwardPE'],2) - short_outstanding_percent = round((data_dict['sharesShort']/outstanding_shares)*100,2) - short_float_percent = round((data_dict['sharesShort']/float_shares)*100,2) - return {'forwardPE': forward_pe}, {'sharesShort': data_dict['sharesShort'], 'shortRatio': data_dict['shortRatio'], 'sharesShortPriorMonth': data_dict['sharesShortPriorMonth'], 'shortOutStandingPercent': short_outstanding_percent, 'shortFloatPercent': short_float_percent} - except: - return {'forwardPE': 0}, {'sharesShort': 0, 'shortRatio': 0, 'sharesShortPriorMonth': 0, 'shortOutStandingPercent': 0, 'shortFloatPercent': 0} - - -async def get_data(ticker, con): - - try: - df = pd.read_sql_query(query_template, con, params=(ticker,)) - shareholder_statistics = ujson.loads(df.to_dict()['historicalShares'][0]) - # Keys to keep - keys_to_keep = ["date","floatShares", "outstandingShares"] - - # Create new list with only the specified keys and convert floatShares and outstandingShares to integers - shareholder_statistics = [ - {key: int(d[key]) if key in ["floatShares", "outstandingShares"] else d[key] - for key in keys_to_keep} - for d in shareholder_statistics - ] - - shareholder_statistics = sorted(shareholder_statistics, key=lambda x: datetime.strptime(x['date'], '%Y-%m-%d'), reverse=False) - - latest_outstanding_shares = shareholder_statistics[-1]['outstandingShares'] - latest_float_shares = shareholder_statistics[-1]['floatShares'] - - # Filter out only quarter-end dates - historical_shares = filter_data_quarterly(shareholder_statistics) - - forward_pe_data, short_data = get_yahoo_data(ticker, latest_outstanding_shares, latest_float_shares) - short_data = {**short_data, 'latestOutstandingShares': latest_outstanding_shares, 'latestFloatShares': latest_float_shares,'historicalShares': historical_shares} + info = yf.Ticker(ticker).info + return { + 'forwardPE': round(info.get('forwardPE', 0), 2), + 'short': { + 'shares': info.get('sharesShort', 0), + 'ratio': info.get('shortRatio', 0), + 'priorMonth': info.get('sharesShortPriorMonth', 0), + 'outstandingPercent': round((info.get('sharesShort', 0) / shares['outstandingShares']) * 100, 2), + 'floatPercent': round((info.get('sharesShort', 0) / shares['floatShares']) * 100, 2) + } + } except Exception as e: - print(e) - short_data = {} - forward_pe_data = {} + #print(ticker) + #print(e) + #print("============") + return {'forwardPE': 0, 'short': {k: 0 for k in ['shares', 'ratio', 'priorMonth', 'outstandingPercent', 'floatPercent']}} - return forward_pe_data, short_data +async def save_json(symbol, data): + """Save data to JSON files.""" + for key, path in [("forwardPE", f"{JSON_DIR}forward-pe/{symbol}.json"), ("short", f"{JSON_DIR}share-statistics/{symbol}.json")]: + with open(path, 'w') as file: + ujson.dump(data.get(key, {}), file) +async def process_ticker(ticker, con): + """Process a single ticker.""" + try: + df = pd.read_sql_query(QUERY_TEMPLATE, con, params=(ticker,)) + stats = ujson.loads(df.to_dict()['historicalShares'][0]) + + # Filter and convert data + filtered_stats = [ + {k: int(v) if k in ["floatShares", "outstandingShares"] else v + for k, v in d.items() if k in ["date", "floatShares", "outstandingShares"]} + for d in sorted(stats, key=lambda x: datetime.strptime(x['date'], '%Y-%m-%d')) + ] + + latest_shares = filtered_stats[-1] + + quarterly_stats = filter_quarterly_data(filtered_stats) + + data = get_yahoo_finance_data(ticker, latest_shares) + data['short'].update({ + 'latestOutstandingShares': latest_shares['outstandingShares'], + 'latestFloatShares': latest_shares['floatShares'], + 'historicalShares': quarterly_stats + }) + + await save_json(ticker, data) + return True + except Exception as e: + print(f"Error processing {ticker}: {e}") + return False async def run(): - + """Main function to process all tickers.""" con = sqlite3.connect('stocks.db') - - cursor = con.cursor() - cursor.execute("PRAGMA journal_mode = wal") - cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE symbol NOT LIKE '%.%'") - stock_symbols = [row[0] for row in cursor.fetchall()] + con.execute("PRAGMA journal_mode = wal") - counter = 0 + with con: + stock_symbols = [row[0] for row in con.execute("SELECT DISTINCT symbol FROM stocks WHERE symbol NOT LIKE '%.%'")] + processed = 0 for ticker in tqdm(stock_symbols): - try: - forward_pe_dict, short_dict = await get_data(ticker, con) - if forward_pe_dict.keys() and short_dict.keys(): - await save_as_json(ticker, forward_pe_dict, short_dict) - - counter += 1 - if counter % 50 == 0: - print(f"Processed {counter} tickers, waiting for 60 seconds...") + if await process_ticker(ticker, con): + processed += 1 + if processed % 50 == 0: + print(f"Processed {processed} tickers, waiting for 60 seconds...") await asyncio.sleep(60) - except Exception as e: - print(ticker, e) - con.close() -try: - asyncio.run(run()) -except Exception as e: - print(e) +if __name__ == "__main__": + asyncio.run(run()) \ No newline at end of file