bugfixing share statistics

This commit is contained in:
MuslemRahimi 2025-04-16 00:05:05 +02:00
parent e1ed1e68a7
commit 5a3211205a

View File

@ -1,107 +1,134 @@
import ujson import orjson
import sqlite3 import sqlite3
import asyncio import asyncio
import pandas as pd import pandas as pd
from tqdm import tqdm from tqdm import tqdm
from datetime import datetime from datetime import datetime
import yfinance as yf import yfinance as yf
import time import csv
from io import StringIO
from pathlib import Path
import requests
next_year = datetime.now().year + 1
async def save_as_json(symbol, forward_pe_dict, short_dict): async def save_as_json(symbol, forward_pe_dict, short_dict):
with open(f"json/share-statistics/{symbol}.json", 'w') as file: with open(f"json/share-statistics/{symbol}.json", 'wb') as file:
ujson.dump(short_dict, file) file.write(orjson.dumps(short_dict))
with open(f"json/forward-pe/{symbol}.json", 'w') as file: with open(f"json/forward-pe/{symbol}.json", 'wb') as file:
ujson.dump(forward_pe_dict, file) file.write(orjson.dumps(forward_pe_dict))
with open(f"json/stock-screener/data.json", 'rb') as file:
stock_screener_data = orjson.loads(file.read())
stock_screener_data_dict = {item['symbol']: item for item in stock_screener_data}
query_template = f""" def calculate_forward_pe(symbol):
SELECT estimates_path = Path("json/analyst-estimate") / f"{symbol}.json"
historicalShares quote_path = Path("json/quote") / f"{symbol}.json"
FROM
stocks
WHERE
symbol = ?
"""
def filter_data_quarterly(data):
# Generate a range of quarter-end dates from the start to the end date
start_date = data[0]['date']
end_date = datetime.today().strftime('%Y-%m-%d')
quarter_ends = pd.date_range(start=start_date, end=end_date, freq='QE').strftime('%Y-%m-%d').tolist()
# Filter data to keep only entries with dates matching quarter-end dates
filtered_data = [entry for entry in data if entry['date'] in quarter_ends]
return filtered_data
def get_yahoo_data(ticker, outstanding_shares, float_shares):
try: try:
data_dict = yf.Ticker(ticker).info with estimates_path.open('rb') as file:
forward_pe = round(data_dict.get('forwardPE'), 2) if data_dict.get('forwardPE') is not None else None estimates = orjson.loads(file.read())
short_outstanding_percent = round((data_dict['sharesShort']/outstanding_shares)*100,2)
short_float_percent = round((data_dict['sharesShort']/float_shares)*100,2)
return {'forwardPE': forward_pe}, {'sharesShort': data_dict['sharesShort'], 'shortRatio': data_dict['shortRatio'], 'sharesShortPriorMonth': data_dict['sharesShortPriorMonth'], 'shortOutStandingPercent': short_outstanding_percent, 'shortFloatPercent': short_float_percent}
except Exception as e:
print(e)
return {'forwardPE': 0}, {'sharesShort': 0, 'shortRatio': 0, 'sharesShortPriorMonth': 0, 'shortOutStandingPercent': 0, 'shortFloatPercent': 0}
async def get_data(ticker, con):
try:
df = pd.read_sql_query(query_template, con, params=(ticker,))
shareholder_statistics = ujson.loads(df.to_dict()['historicalShares'][0])
# Keys to keep
keys_to_keep = ["date","floatShares", "outstandingShares"]
# Create new list with only the specified keys and convert floatShares and outstandingShares to integers
shareholder_statistics = [
{key: int(d[key]) if key in ["floatShares", "outstandingShares"] else d[key]
for key in keys_to_keep}
for d in shareholder_statistics
]
shareholder_statistics = sorted(shareholder_statistics, key=lambda x: datetime.strptime(x['date'], '%Y-%m-%d'), reverse=False)
latest_outstanding_shares = shareholder_statistics[-1]['outstandingShares'] with quote_path.open('rb') as file:
latest_float_shares = shareholder_statistics[-1]['floatShares'] price_data = orjson.loads(file.read())
price = price_data.get('price')
estimate_item = next((item for item in estimates if item.get('date') == next_year), None)
if estimate_item:
eps = estimate_item.get('estimatedEpsAvg')
if eps and eps != 0:
return round(price / eps, 2)
except (FileNotFoundError, ValueError, KeyError):
return None
return None
# Filter out only quarter-end dates def download_csv_data(url):
historical_shares = filter_data_quarterly(shareholder_statistics) response = requests.get(url)
response.raise_for_status()
return response.text
forward_pe_data, short_data = get_yahoo_data(ticker, latest_outstanding_shares, latest_float_shares) def parse_csv_data(csv_text):
short_data = {**short_data, 'latestOutstandingShares': latest_outstanding_shares, 'latestFloatShares': latest_float_shares,'historicalShares': historical_shares} csv_file = StringIO(csv_text)
reader = csv.DictReader(csv_file, delimiter='|')
return list(reader)
def get_short_data(ticker, outstanding_shares, float_shares, record_dict):
row = record_dict.get(ticker.upper())
if not row:
return {'sharesShort': None, 'shortRatio': None, 'sharesShortPriorMonth': None,
'shortOutStandingPercent': None, 'shortFloatPercent': None}
try:
shares_short = int(row.get('currentShortPositionQuantity', 0))
except ValueError:
shares_short = 0
try:
shares_short_prior = int(row.get('previousShortPositionQuantity', 0))
except ValueError:
shares_short_prior = 0
try:
short_ratio = float(row.get('daysToCoverQuantity', 0))
except ValueError:
short_ratio = 0.0
short_outstanding_percent = round((shares_short / outstanding_shares) * 100, 2) if outstanding_shares else 0
short_float_percent = round((shares_short / float_shares) * 100, 2) if float_shares else 0
return {
'sharesShort': shares_short,
'shortRatio': short_ratio,
'sharesShortPriorMonth': shares_short_prior,
'shortOutStandingPercent': short_outstanding_percent,
'shortFloatPercent': short_float_percent
}
async def get_data(ticker, record_dict):
try:
latest_outstanding_shares = stock_screener_data_dict[ticker]['sharesOutStanding']
latest_float_shares = stock_screener_data_dict[ticker]['floatShares']
forward_pe = calculate_forward_pe(ticker)
forward_pe_dict = {'forwardPE': forward_pe}
short_data = get_short_data(ticker, latest_outstanding_shares, latest_float_shares, record_dict)
return forward_pe_dict, short_data
except Exception as e: except Exception as e:
print(e) print(e)
short_data = {} return {}, {}
forward_pe_data = {}
return forward_pe_data, short_data
async def run(): async def run():
url = "https://cdn.finra.org/equity/otcmarket/biweekly/shrt20250228.csv"
record_dict = {}
try:
csv_text = download_csv_data(url)
records = parse_csv_data(csv_text)
record_dict = {}
for row in records:
symbol_code = row.get('symbolCode', '').strip().upper()
if symbol_code:
record_dict[symbol_code] = row
except Exception as e:
print(f"Error processing CSV data: {e}")
con = sqlite3.connect('stocks.db') con = sqlite3.connect('stocks.db')
cursor = con.cursor() cursor = con.cursor()
cursor.execute("PRAGMA journal_mode = wal") cursor.execute("PRAGMA journal_mode = wal")
cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE symbol NOT LIKE '%.%'") cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE symbol NOT LIKE '%.%'")
stock_symbols = [row[0] for row in cursor.fetchall()] stock_symbols = [row[0] for row in cursor.fetchall()]
counter = 0 #Testing mode
#stock_symbols = ['NVDA','AAPL']
for ticker in tqdm(stock_symbols): for ticker in tqdm(stock_symbols):
forward_pe_dict, short_dict = await get_data(ticker, con) try:
if forward_pe_dict.keys() and short_dict.keys(): forward_pe_dict, short_dict = await get_data(ticker, record_dict)
await save_as_json(ticker, forward_pe_dict, short_dict) if forward_pe_dict and short_dict:
await save_as_json(ticker, forward_pe_dict, short_dict)
counter += 1 except Exception as e:
if counter % 50 == 0: print(f"Error processing {ticker}: {e}")
print(f"Processed {counter} tickers, waiting for 60 seconds...")
await asyncio.sleep(60)
con.close()
try: try:
asyncio.run(run()) asyncio.run(run())