bugfixing share statistics

This commit is contained in:
MuslemRahimi 2025-04-16 00:05:05 +02:00
parent e1ed1e68a7
commit 5a3211205a

View File

@ -1,107 +1,134 @@
import ujson
import orjson
import sqlite3
import asyncio
import pandas as pd
from tqdm import tqdm
from datetime import datetime
import yfinance as yf
import time
import csv
from io import StringIO
from pathlib import Path
import requests
next_year = datetime.now().year + 1
async def save_as_json(symbol, forward_pe_dict, short_dict):
with open(f"json/share-statistics/{symbol}.json", 'w') as file:
ujson.dump(short_dict, file)
with open(f"json/forward-pe/{symbol}.json", 'w') as file:
ujson.dump(forward_pe_dict, file)
with open(f"json/share-statistics/{symbol}.json", 'wb') as file:
file.write(orjson.dumps(short_dict))
with open(f"json/forward-pe/{symbol}.json", 'wb') as file:
file.write(orjson.dumps(forward_pe_dict))
with open(f"json/stock-screener/data.json", 'rb') as file:
stock_screener_data = orjson.loads(file.read())
stock_screener_data_dict = {item['symbol']: item for item in stock_screener_data}
query_template = f"""
SELECT
historicalShares
FROM
stocks
WHERE
symbol = ?
"""
def filter_data_quarterly(data):
# Generate a range of quarter-end dates from the start to the end date
start_date = data[0]['date']
end_date = datetime.today().strftime('%Y-%m-%d')
quarter_ends = pd.date_range(start=start_date, end=end_date, freq='QE').strftime('%Y-%m-%d').tolist()
# Filter data to keep only entries with dates matching quarter-end dates
filtered_data = [entry for entry in data if entry['date'] in quarter_ends]
def calculate_forward_pe(symbol):
estimates_path = Path("json/analyst-estimate") / f"{symbol}.json"
quote_path = Path("json/quote") / f"{symbol}.json"
return filtered_data
def get_yahoo_data(ticker, outstanding_shares, float_shares):
try:
data_dict = yf.Ticker(ticker).info
forward_pe = round(data_dict.get('forwardPE'), 2) if data_dict.get('forwardPE') is not None else None
short_outstanding_percent = round((data_dict['sharesShort']/outstanding_shares)*100,2)
short_float_percent = round((data_dict['sharesShort']/float_shares)*100,2)
return {'forwardPE': forward_pe}, {'sharesShort': data_dict['sharesShort'], 'shortRatio': data_dict['shortRatio'], 'sharesShortPriorMonth': data_dict['sharesShortPriorMonth'], 'shortOutStandingPercent': short_outstanding_percent, 'shortFloatPercent': short_float_percent}
except Exception as e:
print(e)
return {'forwardPE': 0}, {'sharesShort': 0, 'shortRatio': 0, 'sharesShortPriorMonth': 0, 'shortOutStandingPercent': 0, 'shortFloatPercent': 0}
async def get_data(ticker, con):
try:
df = pd.read_sql_query(query_template, con, params=(ticker,))
shareholder_statistics = ujson.loads(df.to_dict()['historicalShares'][0])
# Keys to keep
keys_to_keep = ["date","floatShares", "outstandingShares"]
# Create new list with only the specified keys and convert floatShares and outstandingShares to integers
shareholder_statistics = [
{key: int(d[key]) if key in ["floatShares", "outstandingShares"] else d[key]
for key in keys_to_keep}
for d in shareholder_statistics
]
shareholder_statistics = sorted(shareholder_statistics, key=lambda x: datetime.strptime(x['date'], '%Y-%m-%d'), reverse=False)
with estimates_path.open('rb') as file:
estimates = orjson.loads(file.read())
latest_outstanding_shares = shareholder_statistics[-1]['outstandingShares']
latest_float_shares = shareholder_statistics[-1]['floatShares']
with quote_path.open('rb') as file:
price_data = orjson.loads(file.read())
price = price_data.get('price')
estimate_item = next((item for item in estimates if item.get('date') == next_year), None)
if estimate_item:
eps = estimate_item.get('estimatedEpsAvg')
if eps and eps != 0:
return round(price / eps, 2)
except (FileNotFoundError, ValueError, KeyError):
return None
return None
# Filter out only quarter-end dates
historical_shares = filter_data_quarterly(shareholder_statistics)
def download_csv_data(url):
response = requests.get(url)
response.raise_for_status()
return response.text
forward_pe_data, short_data = get_yahoo_data(ticker, latest_outstanding_shares, latest_float_shares)
short_data = {**short_data, 'latestOutstandingShares': latest_outstanding_shares, 'latestFloatShares': latest_float_shares,'historicalShares': historical_shares}
def parse_csv_data(csv_text):
csv_file = StringIO(csv_text)
reader = csv.DictReader(csv_file, delimiter='|')
return list(reader)
def get_short_data(ticker, outstanding_shares, float_shares, record_dict):
row = record_dict.get(ticker.upper())
if not row:
return {'sharesShort': None, 'shortRatio': None, 'sharesShortPriorMonth': None,
'shortOutStandingPercent': None, 'shortFloatPercent': None}
try:
shares_short = int(row.get('currentShortPositionQuantity', 0))
except ValueError:
shares_short = 0
try:
shares_short_prior = int(row.get('previousShortPositionQuantity', 0))
except ValueError:
shares_short_prior = 0
try:
short_ratio = float(row.get('daysToCoverQuantity', 0))
except ValueError:
short_ratio = 0.0
short_outstanding_percent = round((shares_short / outstanding_shares) * 100, 2) if outstanding_shares else 0
short_float_percent = round((shares_short / float_shares) * 100, 2) if float_shares else 0
return {
'sharesShort': shares_short,
'shortRatio': short_ratio,
'sharesShortPriorMonth': shares_short_prior,
'shortOutStandingPercent': short_outstanding_percent,
'shortFloatPercent': short_float_percent
}
async def get_data(ticker, record_dict):
try:
latest_outstanding_shares = stock_screener_data_dict[ticker]['sharesOutStanding']
latest_float_shares = stock_screener_data_dict[ticker]['floatShares']
forward_pe = calculate_forward_pe(ticker)
forward_pe_dict = {'forwardPE': forward_pe}
short_data = get_short_data(ticker, latest_outstanding_shares, latest_float_shares, record_dict)
return forward_pe_dict, short_data
except Exception as e:
print(e)
short_data = {}
forward_pe_data = {}
return forward_pe_data, short_data
return {}, {}
async def run():
url = "https://cdn.finra.org/equity/otcmarket/biweekly/shrt20250228.csv"
record_dict = {}
try:
csv_text = download_csv_data(url)
records = parse_csv_data(csv_text)
record_dict = {}
for row in records:
symbol_code = row.get('symbolCode', '').strip().upper()
if symbol_code:
record_dict[symbol_code] = row
except Exception as e:
print(f"Error processing CSV data: {e}")
con = sqlite3.connect('stocks.db')
cursor = con.cursor()
cursor.execute("PRAGMA journal_mode = wal")
cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE symbol NOT LIKE '%.%'")
stock_symbols = [row[0] for row in cursor.fetchall()]
counter = 0
#Testing mode
#stock_symbols = ['NVDA','AAPL']
for ticker in tqdm(stock_symbols):
forward_pe_dict, short_dict = await get_data(ticker, con)
if forward_pe_dict.keys() and short_dict.keys():
await save_as_json(ticker, forward_pe_dict, short_dict)
counter += 1
if counter % 50 == 0:
print(f"Processed {counter} tickers, waiting for 60 seconds...")
await asyncio.sleep(60)
con.close()
try:
forward_pe_dict, short_dict = await get_data(ticker, record_dict)
if forward_pe_dict and short_dict:
await save_as_json(ticker, forward_pe_dict, short_dict)
except Exception as e:
print(f"Error processing {ticker}: {e}")
try:
asyncio.run(run())