backend/app/cron_options_stats.py
2025-01-27 21:25:24 +01:00

252 lines
9.6 KiB
Python

from __future__ import print_function
import asyncio
import aiohttp
import time
import intrinio_sdk as intrinio
from intrinio_sdk.rest import ApiException
from datetime import datetime, timedelta
import ast
import orjson
from tqdm import tqdm
import aiohttp
from concurrent.futures import ThreadPoolExecutor
import sqlite3
from dotenv import load_dotenv
import os
import re
load_dotenv()
api_key = os.getenv('INTRINIO_API_KEY')
intrinio.ApiClient().set_api_key(api_key)
#intrinio.ApiClient().allow_retries(True)
current_date = datetime.now().date()
source = ''
show_stats = ''
stock_price_source = ''
model = ''
show_extended_price = ''
after = datetime.today().strftime('%Y-%m-%d')
before = '2100-12-31'
include_related_symbols = False
page_size = 5000
MAX_CONCURRENT_REQUESTS = 50 # Adjust based on API rate limits
BATCH_SIZE = 1500
def get_expiration_date(option_symbol):
# Define regex pattern to match the symbol structure
match = re.match(r"([A-Z]+)(\d{6})([CP])(\d+)", option_symbol)
if not match:
raise ValueError(f"Invalid option_symbol format: {option_symbol}")
ticker, expiration, option_type, strike_price = match.groups()
# Convert expiration to datetime
date_expiration = datetime.strptime(expiration, "%y%m%d").date()
return date_expiration
# Database connection and symbol retrieval
def get_total_symbols():
with sqlite3.connect('stocks.db') as con:
cursor = con.cursor()
cursor.execute("PRAGMA journal_mode = wal")
cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE symbol NOT LIKE '%.%'")
stocks_symbols = [row[0] for row in cursor.fetchall()]
with sqlite3.connect('etf.db') as etf_con:
etf_cursor = etf_con.cursor()
etf_cursor.execute("PRAGMA journal_mode = wal")
etf_cursor.execute("SELECT DISTINCT symbol FROM etfs")
etf_symbols = [row[0] for row in etf_cursor.fetchall()]
return stocks_symbols + etf_symbols
def get_tickers_from_directory():
directory = "json/options-historical-data/companies"
try:
# Ensure the directory exists
if not os.path.exists(directory):
raise FileNotFoundError(f"The directory '{directory}' does not exist.")
# Get all tickers from filenames
return [file.replace(".json", "") for file in os.listdir(directory) if file.endswith(".json")]
except Exception as e:
print(f"An error occurred: {e}")
return []
def get_contracts_from_directory(symbol):
directory = f"json/all-options-contracts/{symbol}/"
try:
return [file.replace(".json", "") for file in os.listdir(directory) if file.endswith(".json")]
except:
return []
def save_json(data, symbol):
directory = "json/options-stats/companies"
os.makedirs(directory, exist_ok=True)
with open(f"{directory}/{symbol}.json", 'wb') as file:
file.write(orjson.dumps(data))
def safe_round(value):
try:
return round(float(value), 2)
except (ValueError, TypeError):
return value
async def get_price_batch_realtime(symbol, contract_list):
# API Configuration
api_url = "https://api-v2.intrinio.com/options/prices/realtime/batch"
headers = {
"Authorization": f"Bearer {api_key}" # Replace with your actual API key
}
params = {
"source": source,
"show_stats": show_stats,
"stock_price_source": stock_price_source,
"model": model,
"show_extended_price": show_extended_price
}
body = {
"contracts": contract_list
}
# Make API request
async with aiohttp.ClientSession() as session:
async with session.post(api_url, headers=headers, params=params, json=body) as response:
response_data = await response.json()
contracts_data = response_data.get('contracts', [])
res_dict = {
'total_premium': 0, 'call_premium': 0, 'put_premium': 0,
'volume': 0, 'call_volume': 0, 'put_volume': 0,
'gex': 0,
'total_open_interest': 0, 'call_open_interest': 0, 'put_open_interest': 0,
'iv_list': [],
'time': None
}
for item in contracts_data:
try:
price_data = item.get('price', {})
stats_data = item.get('stats', {})
option_data = item.get('option', {})
option_type = option_data.get('type', '').lower()
volume = int(price_data.get('volume', 0)) if price_data.get('volume') is not None else 0
open_interest = int(price_data.get('open_interest', 0)) if price_data.get('open_interest') is not None else 0
last_price = price_data.get('last', 0) or 0
premium = int(volume * last_price * 100)
implied_volatility = stats_data.get('implied_volatility')
gamma = stats_data.get('gamma', 0) or 0
delta = stats_data.get('delta', 0) or 0
# Update metrics
res_dict['gex'] += gamma * open_interest * 100
res_dict['total_premium'] += premium
res_dict['volume'] += volume
res_dict['total_open_interest'] += open_interest
if option_type == 'call':
res_dict['call_premium'] += premium
res_dict['call_volume'] += volume
res_dict['call_open_interest'] += open_interest
else:
res_dict['put_premium'] += premium
res_dict['put_volume'] += volume
res_dict['put_open_interest'] += open_interest
if implied_volatility is not None:
res_dict['iv_list'].append(implied_volatility)
# Handle timestamp
if 'ask_timestamp' in price_data and price_data['ask_timestamp']:
timestamp_str = price_data['ask_timestamp']
try:
dt = datetime.datetime.fromisoformat(timestamp_str.replace('Z', '+00:00'))
res_dict['time'] = dt.strftime("%Y-%m-%d")
except:
res_dict['time'] = timestamp_str[:10] # Fallback to string slicing
except Exception as e:
print(f"Error processing contract: {e}")
continue
return res_dict
async def main():
total_symbols = get_tickers_from_directory()
if len(total_symbols) < 3000:
total_symbols = get_total_symbols()
print(f"Number of tickers: {len(total_symbols)}")
for symbol in tqdm(total_symbols):
try:
contract_list = get_contracts_from_directory(symbol)
if len(contract_list) > 0:
# Initialize aggregated results dictionary
aggregated_results = {
'total_premium': 0, 'call_premium': 0, 'put_premium': 0,
'volume': 0, 'call_volume': 0, 'put_volume': 0,
'gex': 0,
'total_open_interest': 0, 'call_open_interest': 0, 'put_open_interest': 0,
'iv_list': [],
'time': None
}
# Process batches of 250 contracts
for i in range(0, len(contract_list), 250):
batch = contract_list[i:i+250]
batch_results = await get_price_batch_realtime(symbol, batch)
# Aggregate results
for key in ['total_premium', 'call_premium', 'put_premium',
'volume', 'call_volume', 'put_volume',
'gex',
'total_open_interest', 'call_open_interest', 'put_open_interest']:
aggregated_results[key] += batch_results[key]
aggregated_results['iv_list'].extend(batch_results['iv_list'])
aggregated_results['time'] = batch_results['time']
# Calculate final metrics
aggregated_results['iv'] = round((sum(aggregated_results['iv_list']) / len(aggregated_results['iv_list'])*100), 2) if aggregated_results['iv_list'] else 0
aggregated_results['putCallRatio'] = round(aggregated_results['put_volume'] / aggregated_results['call_volume'], 2) if aggregated_results['call_volume'] > 0 else 0
# Load previous data and calculate changes
with open(f"json/options-historical-data/companies/{symbol}.json", "r") as file:
past_data = orjson.loads(file.read())
index = next((i for i, item in enumerate(past_data) if item['date'] == aggregated_results['time']), 0)
previous_open_interest = past_data[index]['total_open_interest']
iv_rank = past_data[index]['iv_rank']
aggregated_results['changesPercentageOI'] = round((aggregated_results['total_open_interest']/previous_open_interest-1)*100, 2)
aggregated_results['changeOI'] = aggregated_results['total_open_interest'] - previous_open_interest
#we don't aggregate this result
aggregated_results['ivRank'] = iv_rank
# Remove the temporary iv_list before saving
del aggregated_results['iv_list']
# Save aggregated results
save_json(aggregated_results, symbol)
except Exception as e:
print(f"Error processing {symbol}: {e}")
if __name__ == "__main__":
asyncio.run(main())