From f3b842e0337136a5eaada71e63158fddf8376aa2 Mon Sep 17 00:00:00 2001 From: MuslemRahimi Date: Wed, 22 Jan 2025 21:43:39 +0100 Subject: [PATCH] update cron jobs --- app/cron_options_historical_volume.py | 217 ++++++++++++++----------- app/cron_options_single_contract.py | 6 +- app/cron_options_stats.py | 226 +++++++++++++++++++------- app/test.py | 24 +-- 4 files changed, 302 insertions(+), 171 deletions(-) diff --git a/app/cron_options_historical_volume.py b/app/cron_options_historical_volume.py index 8cbe383..0a98c9a 100644 --- a/app/cron_options_historical_volume.py +++ b/app/cron_options_historical_volume.py @@ -29,29 +29,42 @@ def save_json(data, symbol): with open(f"{directory_path}/{symbol}.json", 'wb') as file: # Use binary mode for orjson file.write(orjson.dumps(data)) +def safe_round(value, decimals=2): + try: + return round(float(value), decimals) + except (ValueError, TypeError): + return value + + +def calculate_iv_rank_for_all(data): + # Extract all IV values + iv_values = [entry['iv'] for entry in data if 'iv' in entry] + + if not iv_values: + return None # No IV data available + + # Compute highest and lowest IV + highest_iv = max(iv_values) + lowest_iv = min(iv_values) + + # Calculate IV Rank for each entry + for entry in data: + if 'iv' in entry: + iv = entry['iv'] + if highest_iv == lowest_iv: + entry['iv_rank'] = 100.0 # If all IVs are the same, rank is 100% + else: + entry['iv_rank'] = round(((iv - lowest_iv) / (highest_iv - lowest_iv)) * 100,2) + else: + entry['iv_rank'] = None # Handle missing IV + + return data + + def prepare_data(data, symbol): - res_list = [] - + data = [entry for entry in data if entry['call_volume'] != 0 or entry['put_volume'] != 0] - data = sorted(data, key=lambda x: x['date']) - for i in range(1, len(data)): - try: - current_open_interest = data[i]['total_open_interest'] - previous_open_interest = data[i-1]['total_open_interest'] - changes_percentage_oi = round((current_open_interest/previous_open_interest -1)*100,2) - data[i]['changesPercentageOI'] = changes_percentage_oi - data[i]['changeOI'] = current_open_interest-previous_open_interest - except: - data[i]['changesPercentageOI'] = None - data[i]['changeOI'] = None - - data = sorted(data, key=lambda x: x['date'], reverse=True) - - if data: - save_json(data,symbol) - ''' - start_date_str = data[-1]['date'] end_date_str = data[0]['date'] @@ -63,6 +76,8 @@ def prepare_data(data, symbol): df_change_dict = df_price.set_index('date')['changesPercentage'].to_dict() df_close_dict = df_price.set_index('date')['close'].to_dict() + res_list = [] + for item in data: try: # Round numerical and numerical-string values @@ -74,47 +89,56 @@ def prepare_data(data, symbol): # Add parsed fields new_item['volume'] = round(new_item['call_volume'] + new_item['put_volume'], 2) new_item['putCallRatio'] = round(new_item['put_volume']/new_item['call_volume'],2) - new_item['avgVolumeRatio'] = round(new_item['volume'] / (round(new_item['avg_30_day_call_volume'] + new_item['avg_30_day_put_volume'], 2)), 2) + #new_item['avgVolumeRatio'] = round(new_item['volume'] / (round(new_item['avg_30_day_call_volume'] + new_item['avg_30_day_put_volume'], 2)), 2) new_item['total_premium'] = round(new_item['call_premium'] + new_item['put_premium'], 2) - new_item['net_premium'] = round(new_item['net_call_premium'] - new_item['net_put_premium'],2) + #new_item['net_premium'] = round(new_item['net_call_premium'] - new_item['net_put_premium'],2) new_item['total_open_interest'] = round(new_item['call_open_interest'] + new_item['put_open_interest'], 2) - - bearish_premium = float(item['bearish_premium']) - bullish_premium = float(item['bullish_premium']) - neutral_premium = calculate_neutral_premium(item) + + #bearish_premium = float(item['bearish_premium']) + #bullish_premium = float(item['bullish_premium']) + #neutral_premium = calculate_neutral_premium(item) + ''' new_item['premium_ratio'] = [ safe_round(bearish_premium), neutral_premium, safe_round(bullish_premium) ] - + ''' # Add changesPercentage if the date exists in df_change_dict if item['date'] in df_change_dict: - new_item['changesPercentage'] = df_change_dict[item['date']] + new_item['changesPercentage'] = float(df_change_dict[item['date']]) + else: + new_item['changesPercentage'] = None + if item['date'] in df_close_dict: - new_item['price'] = df_close_dict[item['date']] + new_item['price'] = float(df_close_dict[item['date']]) + else: + new_item['price'] = None res_list.append(new_item) - except: - pass + except Exception as e: + print(e) + res_list = sorted(res_list, key=lambda x: x['date']) + for i in range(1, len(res_list)): try: current_open_interest = res_list[i]['total_open_interest'] previous_open_interest = res_list[i-1]['total_open_interest'] changes_percentage_oi = round((current_open_interest/previous_open_interest -1)*100,2) res_list[i]['changesPercentageOI'] = changes_percentage_oi + res_list[i]['changeOI'] = current_open_interest-previous_open_interest except: res_list[i]['changesPercentageOI'] = None + res_list[i]['changeOI'] = None res_list = sorted(res_list, key=lambda x: x['date'],reverse=True) if res_list: save_json(res_list, symbol) - ''' def get_contracts_from_directory(directory: str): @@ -137,9 +161,10 @@ def get_contracts_from_directory(directory): return [f.split('.')[0] for f in os.listdir(directory) if f.endswith('.json')] + def aggregate_data_by_date(total_symbols): data_by_date = defaultdict(lambda: { - "date": "", # Add date field to the dictionary + "date": "", "call_volume": 0, "put_volume": 0, "call_open_interest": 0, @@ -148,6 +173,8 @@ def aggregate_data_by_date(total_symbols): "call_net_premium": 0, "put_premium": 0, "put_net_premium": 0, + "iv": 0, # Sum of implied volatilities + "iv_count": 0, # Count of entries for IV }) for symbol in tqdm(total_symbols): @@ -164,49 +191,37 @@ def aggregate_data_by_date(total_symbols): file_path = os.path.join(contract_dir, f"{item}.json") with open(file_path, "r") as file: data = orjson.loads(file.read()) + option_type = data.get('optionType', None) if option_type not in ['call', 'put']: continue + for entry in data.get('history', []): date = entry.get('date') - volume = entry.get('volume',0) - open_interest = entry.get('open_interest',0) - total_premium = entry.get('total_premium',0) - - - if volume is None: - volume = 0 - if open_interest is None: - open_interest = 0 - if total_premium is None: - total_premium = 0 - - + volume = entry.get('volume', 0) or 0 + open_interest = entry.get('open_interest', 0) or 0 + total_premium = entry.get('total_premium', 0) or 0 + implied_volatility = entry.get('implied_volatility', 0) or 0 + if date: - data_by_date[date]["date"] = date # Store the date in the dictionary + daily_data = data_by_date[date] + daily_data["date"] = date + if option_type == 'call': - if volume is not None: - data_by_date[date]["call_volume"] += int(volume) - if open_interest is not None: - data_by_date[date]["call_open_interest"] += int(open_interest) - if total_premium is not None: - data_by_date[date]["call_premium"] += int(total_premium) - + daily_data["call_volume"] += int(volume) + daily_data["call_open_interest"] += int(open_interest) + daily_data["call_premium"] += int(total_premium) elif option_type == 'put': - if volume is not None: - data_by_date[date]["put_volume"] += int(volume) - if open_interest is not None: - data_by_date[date]["put_open_interest"] += int(open_interest) - if total_premium is not None: - data_by_date[date]["put_premium"] += int(total_premium) + daily_data["put_volume"] += int(volume) + daily_data["put_open_interest"] += int(open_interest) + daily_data["put_premium"] += int(total_premium) + daily_data["iv"] += round(implied_volatility, 2) + daily_data["iv_count"] += 1 + try: - data_by_date[date]["putCallRatio"] = round(data_by_date[date]["put_volume"]/data_by_date[date]["call_volume"],2) - except: - data_by_date[date]["putCallRatio"] = None - - data_by_date[date]["volume"] = data_by_date[date]["call_volume"] + data_by_date[date]["put_volume"] - data_by_date[date]["total_open_interest"] = data_by_date[date]["call_open_interest"] + data_by_date[date]["put_open_interest"] - + daily_data["putCallRatio"] = round(daily_data["put_volume"] / daily_data["call_volume"], 2) + except ZeroDivisionError: + daily_data["putCallRatio"] = None except Exception as e: print(f"Error processing contract {item} for {symbol}: {e}") @@ -214,35 +229,43 @@ def aggregate_data_by_date(total_symbols): except Exception as e: print(f"Error processing symbol {symbol}: {e}") continue - - # Convert to list of dictionaries and sort by date - data = list(data_by_date.values()) - - data = prepare_data(data,symbol) - - - -if __name__ == '__main__': - # Connect to the databases - con = sqlite3.connect('stocks.db') - etf_con = sqlite3.connect('etf.db') - cursor = con.cursor() - cursor.execute("PRAGMA journal_mode = wal") - #cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE symbol NOT LIKE '%.%' AND marketCap > 1E9") - cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE symbol NOT LIKE '%.%'") - stocks_symbols = [row[0] for row in cursor.fetchall()] - - etf_cursor = etf_con.cursor() - etf_cursor.execute("PRAGMA journal_mode = wal") - #etf_cursor.execute("SELECT DISTINCT symbol FROM etfs WHERE marketCap > 1E9") - etf_cursor.execute("SELECT DISTINCT symbol FROM etfs") - etf_symbols = [row[0] for row in etf_cursor.fetchall()] - total_symbols = stocks_symbols + etf_symbols - - - total_symbols = ['AA'] - data = aggregate_data_by_date(total_symbols) + # Convert to list of dictionaries and sort by date + data = list(data_by_date.values()) + for daily_data in data: + # Compute the average IV if there are valid entries + if daily_data["iv_count"] > 0: + daily_data["iv"] = round(daily_data["iv"] / daily_data["iv_count"], 2) + else: + daily_data["iv"] = None # Or set it to 0 if you prefer - con.close() - etf_con.close() \ No newline at end of file + data = sorted(data, key=lambda x: x['date'], reverse=True) + data = calculate_iv_rank_for_all(data) + data = prepare_data(data, symbol) + + + + +# Connect to the databases +con = sqlite3.connect('stocks.db') +etf_con = sqlite3.connect('etf.db') +cursor = con.cursor() +cursor.execute("PRAGMA journal_mode = wal") +#cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE symbol NOT LIKE '%.%' AND marketCap > 1E9") +cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE symbol NOT LIKE '%.%'") +stocks_symbols = [row[0] for row in cursor.fetchall()] + +etf_cursor = etf_con.cursor() +etf_cursor.execute("PRAGMA journal_mode = wal") +#etf_cursor.execute("SELECT DISTINCT symbol FROM etfs WHERE marketCap > 1E9") +etf_cursor.execute("SELECT DISTINCT symbol FROM etfs") +etf_symbols = [row[0] for row in etf_cursor.fetchall()] +total_symbols = stocks_symbols + etf_symbols + + + +total_symbols = ['AA'] +data = aggregate_data_by_date(total_symbols) + +con.close() +etf_con.close() \ No newline at end of file diff --git a/app/cron_options_single_contract.py b/app/cron_options_single_contract.py index 46e4718..48c64a7 100644 --- a/app/cron_options_single_contract.py +++ b/app/cron_options_single_contract.py @@ -122,6 +122,9 @@ async def get_single_contract_eod_data(symbol, contract_id, semaphore): key_data = {k: v for k, v in response._option.__dict__.items() if isinstance(v, (str, int, float, bool, list, dict, type(None)))} history = [] + + + if response and hasattr(response, '_prices'): for price in response._prices: history.append({ @@ -181,7 +184,8 @@ async def get_single_contract_eod_data(symbol, contract_id, semaphore): res_list[i]['total_premium'] = 0 res_list[i]['net_premium'] = 0 - + + data = {'expiration': key_data['_expiration'], 'strike': key_data['_strike'], 'optionType': key_data['_type'], 'history': res_list} await save_json(data, symbol, contract_id) diff --git a/app/cron_options_stats.py b/app/cron_options_stats.py index f641fce..e2ea4e1 100644 --- a/app/cron_options_stats.py +++ b/app/cron_options_stats.py @@ -1,13 +1,42 @@ -import aiohttp +from __future__ import print_function import asyncio +import time +import intrinio_sdk as intrinio +from intrinio_sdk.rest import ApiException +from datetime import datetime, timedelta +import ast import orjson +from tqdm import tqdm +import aiohttp +from concurrent.futures import ThreadPoolExecutor +import sqlite3 from dotenv import load_dotenv import os -import sqlite3 load_dotenv() -api_key = os.getenv('UNUSUAL_WHALES_API_KEY') +api_key = os.getenv('INTRINIO_API_KEY') + +intrinio.ApiClient().set_api_key(api_key) +#intrinio.ApiClient().allow_retries(True) + +source = '' +show_stats = '' +stock_price_source = '' +model = '' +show_extended_price = '' + + +after = datetime.today().strftime('%Y-%m-%d') +before = '2100-12-31' +include_related_symbols = False +page_size = 5000 +MAX_CONCURRENT_REQUESTS = 50 # Adjust based on API rate limits +BATCH_SIZE = 1500 + + + + # Database connection and symbol retrieval def get_total_symbols(): @@ -40,6 +69,18 @@ def get_tickers_from_directory(): print(f"An error occurred: {e}") return [] +def get_contracts_from_directory(symbol): + directory = f"json/all-options-contracts/{symbol}/" + try: + # Ensure the directory exists + if not os.path.exists(directory): + raise FileNotFoundError(f"The directory '{directory}' does not exist.") + # Get all tickers from filenames + return [file.replace(".json", "") for file in os.listdir(directory) if file.endswith(".json")] + + except: + return [] + def save_json(data, symbol): directory = "json/options-stats/companies" os.makedirs(directory, exist_ok=True) @@ -54,86 +95,147 @@ def safe_round(value): return value -def calculate_neutral_premium(data_item): - call_premium = float(data_item['call_premium']) - put_premium = float(data_item['put_premium']) - bearish_premium = float(data_item['bearish_premium']) - bullish_premium = float(data_item['bullish_premium']) +def get_all_expirations(symbol): + response = intrinio.OptionsApi().get_options_expirations_eod( + symbol, + after=after, + before=before, + include_related_symbols=include_related_symbols + ) + data = (response.__dict__).get('_expirations') + return data - total_premiums = bearish_premium + bullish_premium - observed_premiums = call_premium + put_premium - neutral_premium = observed_premiums - total_premiums - - return safe_round(neutral_premium) +async def get_options_chain(symbol, expiration, semaphore): + async with semaphore: + try: + # Run the synchronous API call in a thread pool since intrinio doesn't support async + loop = asyncio.get_event_loop() + with ThreadPoolExecutor() as pool: + response = await loop.run_in_executor( + pool, + lambda: intrinio.OptionsApi().get_options_chain_eod( + symbol, + expiration, + include_related_symbols=include_related_symbols + ) + ) + contracts = set() + for item in response.chain: + try: + contracts.add(item.option.code) + except Exception as e: + print(f"Error processing contract in {expiration}: {e}") + return contracts + + except Exception as e: + print(f"Error fetching chain for {expiration}: {e}") + return set() -def prepare_data(data): +async def get_price_batch_realtime(symbol,contract_list): + body = { + "contracts": contract_list + } + + + response = intrinio.OptionsApi().get_options_prices_batch_realtime(body, source=source, show_stats=show_stats, stock_price_source=stock_price_source, model=model, show_extended_price=show_extended_price) + data = response.__dict__ + data = data['_contracts'] + + res_dict = {'total_premium': 0, 'call_premium': 0, 'put_premium': 0, + 'volume': 0, 'call_volume': 0, 'put_volume': 0, 'gex': 0, 'dex': 0, + 'total_open_interest': 0, 'call_open_interest': 0, 'put_open_interest': 0,} + + time = None + iv_list = [] for item in data: try: - symbol = item['ticker'] - bearish_premium = float(item['bearish_premium']) - bullish_premium = float(item['bullish_premium']) - neutral_premium = calculate_neutral_premium(item) + price_data = (item.__dict__)['_price'].__dict__ + stats_data = (item.__dict__)['_stats'].__dict__ + option_data = (item.__dict__)['_option'].__dict__ + option_type = ((item.__dict__)['_option'].__dict__)['_type'] + + volume = int(price_data['_volume']) if price_data['_volume'] != None else 0 + total_open_interest = int(price_data['_open_interest']) if price_data['_open_interest'] != None else 0 + last_price = price_data['_last'] if price_data['_last'] != None else 0 + premium = int(volume * last_price * 100) + implied_volatility = stats_data['_implied_volatility'] - new_item = { - key: safe_round(value) - for key, value in item.items() - if key != 'in_out_flow' - } + gamma = stats_data['_gamma'] if stats_data['_gamma'] != None else 0 + delta = stats_data['_delta'] if stats_data['_delta'] != None else 0 - new_item['premium_ratio'] = [ - safe_round(bearish_premium), - neutral_premium, - safe_round(bullish_premium) - ] - new_item['open_interest_change'] = ( - new_item['total_open_interest'] - - (new_item.get('prev_call_oi', 0) + new_item.get('prev_put_oi', 0)) - if 'total_open_interest' in new_item else None - ) + res_dict['gex'] += gamma * total_open_interest * 100 + res_dict['dex'] += delta * total_open_interest * 100 - if new_item: - save_json(new_item, symbol) + res_dict['total_premium'] += premium + res_dict['volume'] += volume + res_dict['total_open_interest'] += total_open_interest + + if option_type == 'call': + res_dict['call_premium'] += premium + res_dict['call_volume'] += volume + res_dict['call_open_interest'] += total_open_interest + else: + res_dict['put_premium'] += premium + res_dict['put_volume'] += volume + res_dict['put_open_interest'] += total_open_interest + + iv_list.append(implied_volatility) + + time = price_data['_ask_timestamp'].strftime("%Y-%m-%d") except: pass + res_dict['iv'] = round((sum(iv_list) / len(iv_list)*100),2) if iv_list else 0 + res_dict['putCallRatio'] = round(res_dict['put_volume'] / res_dict['call_volume'],2) if res_dict['call_volume'] > 0 else 0 + + with open("json/options-historical-data/companies/AA.json", "r") as file: + past_data = orjson.loads(file.read()) + index = next((i for i, item in enumerate(past_data) if item['date'] == time), 0) + previous_open_interest = past_data[index]['total_open_interest'] -async def fetch_data(session, chunk): - chunk_str = ",".join(chunk) - url = "https://api.unusualwhales.com/api/screener/stocks" - params = {"ticker": chunk_str} - headers = { - "Accept": "application/json, text/plain", - "Authorization": api_key - } + res_dict['changesPercentageOI'] = round((res_dict['total_open_interest']/previous_open_interest-1)*100,2) + res_dict['changeOI'] = res_dict['total_open_interest'] - previous_open_interest - try: - async with session.get(url, headers=headers, params=params) as response: - json_data = await response.json() - data = json_data.get('data', []) - prepare_data(data) - print(f"Processed chunk with {len(data)} results.") - except Exception as e: - print(f"Exception fetching chunk {chunk_str}: {e}") + if res_dict: + save_json(res_dict, symbol) + + + + + +async def prepare_dataset(symbol): + expiration_list = get_all_expirations(symbol) + + semaphore = asyncio.Semaphore(MAX_CONCURRENT_REQUESTS) + + # Create tasks for all expirations + tasks = [get_options_chain(symbol, expiration, semaphore) for expiration in expiration_list] + # Show progress bar for completed tasks + contract_sets = set() + for task in tqdm(asyncio.as_completed(tasks), total=len(tasks), desc="Processing expirations"): + contracts = await task + contract_sets.update(contracts) + + # Convert final set to list + contract_list = list(contract_sets) async def main(): + ''' total_symbols = get_tickers_from_directory() if len(total_symbols) < 3000: total_symbols = get_total_symbols() print(f"Number of tickers: {len(total_symbols)}") - chunk_size = 50 - chunks = [total_symbols[i:i + chunk_size] for i in range(0, len(total_symbols), chunk_size)] - async with aiohttp.ClientSession() as session: - for i in range(0, len(chunks), 100): # Process 100 chunks at a time - try: - tasks = [fetch_data(session, chunk) for chunk in chunks[i:i + 100]] - await asyncio.gather(*tasks) - print("Processed 100 chunks. Sleeping for 60 seconds...") - await asyncio.sleep(60) # Avoid API rate limits - except: - pass + total_symbols = ['AA'] + for symbol in total_symbols: + await prepare_dataset(symbol) + ''' + symbol = 'AA' + contract_list = get_contracts_from_directory(symbol) + + await get_price_batch_realtime(symbol, contract_list) if __name__ == "__main__": diff --git a/app/test.py b/app/test.py index a34c555..f0f3c71 100644 --- a/app/test.py +++ b/app/test.py @@ -16,15 +16,17 @@ intrinio.ApiClient().allow_retries(True) -#identifier = 'AA250321C00045000' +source = 'delayed' +start_date = '' +start_time = '' +end_date = '' +end_time = '' +timezone = 'UTC' +page_size = 100 +min_size = 100 +security = 'AAPL' +next_page = '' -symbol = 'MSFT' -strike = 95 -source = '' -stock_price_source = '' -model = '' -show_extended_price = '' -include_related_symbols = False - -response = intrinio.OptionsApi().get_option_strikes_realtime(symbol, strike, source=source, stock_price_source=stock_price_source, model=model, show_extended_price=show_extended_price, include_related_symbols=include_related_symbols) -print(response) \ No newline at end of file +response = intrinio.OptionsApi().get_option_trades(source=source, start_date=start_date, start_time=start_time, end_date=end_date, end_time=end_time, timezone=timezone, page_size=page_size, min_size=min_size, security=security, next_page=next_page) +print(response) + \ No newline at end of file