bugfixing iv
This commit is contained in:
parent
abbf7dc579
commit
3c3b7a6e45
@ -62,11 +62,89 @@ def save_json(data, symbol):
|
|||||||
file.write(orjson.dumps(serializable_data))
|
file.write(orjson.dumps(serializable_data))
|
||||||
|
|
||||||
|
|
||||||
|
def is_outlier(value, values, n_sigmas=3):
|
||||||
|
"""
|
||||||
|
Detect if a value is an outlier using the z-score method
|
||||||
|
|
||||||
|
Args:
|
||||||
|
value: The value to check
|
||||||
|
values: List of values to compare against
|
||||||
|
n_sigmas: Number of standard deviations to use as threshold (default: 3)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: True if the value is an outlier, False otherwise
|
||||||
|
"""
|
||||||
|
if value is None or not values:
|
||||||
|
return False
|
||||||
|
|
||||||
|
values = [v for v in values if v is not None]
|
||||||
|
if not values:
|
||||||
|
return False
|
||||||
|
|
||||||
|
mean = np.mean(values)
|
||||||
|
std = np.std(values)
|
||||||
|
|
||||||
|
if std == 0:
|
||||||
|
return False
|
||||||
|
|
||||||
|
z_score = abs((value - mean) / std)
|
||||||
|
return z_score > n_sigmas
|
||||||
|
|
||||||
|
def clean_iv_data(data):
|
||||||
|
"""
|
||||||
|
Clean IV data by handling outliers
|
||||||
|
|
||||||
|
Args:
|
||||||
|
data: List of dictionaries containing IV values
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of dictionaries with cleaned IV values
|
||||||
|
"""
|
||||||
|
# Extract IV values
|
||||||
|
iv_values = [item.get('iv') for item in data]
|
||||||
|
|
||||||
|
# Create a copy of the data to modify
|
||||||
|
cleaned_data = []
|
||||||
|
|
||||||
|
window_size = 20 # Rolling window size for outlier detection
|
||||||
|
|
||||||
|
for i, item in enumerate(data):
|
||||||
|
cleaned_item = item.copy()
|
||||||
|
iv = item.get('iv')
|
||||||
|
|
||||||
|
if iv is not None:
|
||||||
|
# Get a window of IV values centered around the current point
|
||||||
|
start_idx = max(0, i - window_size // 2)
|
||||||
|
end_idx = min(len(data), i + window_size // 2)
|
||||||
|
window_values = [data[j].get('iv') for j in range(start_idx, end_idx)]
|
||||||
|
|
||||||
|
# Check if the current IV is an outlier
|
||||||
|
if is_outlier(iv, window_values):
|
||||||
|
# Replace outlier with the median of nearby non-outlier values
|
||||||
|
non_outlier_values = [
|
||||||
|
v for v in window_values
|
||||||
|
if v is not None and not is_outlier(v, window_values)
|
||||||
|
]
|
||||||
|
|
||||||
|
if non_outlier_values:
|
||||||
|
cleaned_item['iv'] = round(np.median(non_outlier_values), 2)
|
||||||
|
else:
|
||||||
|
cleaned_item['iv'] = None
|
||||||
|
else:
|
||||||
|
cleaned_item['iv'] = round(iv, 2)
|
||||||
|
|
||||||
|
cleaned_data.append(cleaned_item)
|
||||||
|
|
||||||
|
return cleaned_data
|
||||||
|
|
||||||
def compute_realized_volatility(data, window_size=20):
|
def compute_realized_volatility(data, window_size=20):
|
||||||
"""
|
"""
|
||||||
Compute the realized volatility of stock prices over a rolling window.
|
Compute the realized volatility of stock prices over a rolling window.
|
||||||
Realized volatility is the annualized standard deviation of log returns of stock prices.
|
Realized volatility is the annualized standard deviation of log returns of stock prices.
|
||||||
"""
|
"""
|
||||||
|
# First clean the IV data
|
||||||
|
data = clean_iv_data(data)
|
||||||
|
|
||||||
# Sort data by date (oldest first)
|
# Sort data by date (oldest first)
|
||||||
data = sorted(data, key=lambda x: x['date'])
|
data = sorted(data, key=lambda x: x['date'])
|
||||||
|
|
||||||
@ -143,6 +221,7 @@ if __name__ == '__main__':
|
|||||||
directory_path = "json/implied-volatility"
|
directory_path = "json/implied-volatility"
|
||||||
total_symbols = stocks_symbols + etf_symbols + index_symbols
|
total_symbols = stocks_symbols + etf_symbols + index_symbols
|
||||||
|
|
||||||
|
|
||||||
for symbol in tqdm(total_symbols):
|
for symbol in tqdm(total_symbols):
|
||||||
try:
|
try:
|
||||||
with open(f"json/options-historical-data/companies/{symbol}.json", "r") as file:
|
with open(f"json/options-historical-data/companies/{symbol}.json", "r") as file:
|
||||||
|
|||||||
@ -1,12 +1,9 @@
|
|||||||
import requests
|
|
||||||
import orjson
|
import orjson
|
||||||
import re
|
|
||||||
from datetime import datetime,timedelta
|
from datetime import datetime,timedelta
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
import os
|
import os
|
||||||
import sqlite3
|
import sqlite3
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import time
|
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
@ -36,8 +33,57 @@ def safe_round(value, decimals=2):
|
|||||||
|
|
||||||
|
|
||||||
def aggregate_data_by_date(symbol):
|
def aggregate_data_by_date(symbol):
|
||||||
data_by_date = defaultdict(lambda: {
|
# Pre-load price data and create lookup dictionary for better performance
|
||||||
"date": "",
|
with open(f"json/historical-price/max/{symbol}.json", "r") as file:
|
||||||
|
price_list = {p['time']: p['close'] for p in orjson.loads(file.read())}
|
||||||
|
|
||||||
|
# Use dict instead of defaultdict for better performance
|
||||||
|
data_by_date = {}
|
||||||
|
|
||||||
|
today = datetime.today().date()
|
||||||
|
one_year_ago = today - timedelta(days=365)
|
||||||
|
one_year_ago_str = one_year_ago.strftime('%Y-%m-%d')
|
||||||
|
|
||||||
|
contract_dir = f"json/all-options-contracts/{symbol}"
|
||||||
|
contract_list = get_contracts_from_directory(contract_dir)
|
||||||
|
|
||||||
|
if not contract_list:
|
||||||
|
return []
|
||||||
|
|
||||||
|
for item in tqdm(contract_list):
|
||||||
|
try:
|
||||||
|
file_path = os.path.join(contract_dir, f"{item}.json")
|
||||||
|
with open(file_path, "r") as file:
|
||||||
|
data = orjson.loads(file.read())
|
||||||
|
|
||||||
|
option_type = data.get('optionType')
|
||||||
|
if option_type not in ['call', 'put']:
|
||||||
|
continue
|
||||||
|
|
||||||
|
is_call = option_type == 'call'
|
||||||
|
|
||||||
|
for entry in data.get('history', []):
|
||||||
|
date = entry.get('date')
|
||||||
|
#if date < one_year_ago_str:
|
||||||
|
# continue
|
||||||
|
|
||||||
|
spot_price = price_list.get(date)
|
||||||
|
if not spot_price:
|
||||||
|
continue
|
||||||
|
|
||||||
|
volume = entry.get('volume', 0) or 0
|
||||||
|
open_interest = entry.get('open_interest', 0) or 0
|
||||||
|
total_premium = entry.get('total_premium', 0) or 0
|
||||||
|
implied_volatility = entry.get('implied_volatility', 0) or 0
|
||||||
|
gamma = entry.get('gamma', 0) or 0
|
||||||
|
delta = entry.get('delta', 0) or 0
|
||||||
|
|
||||||
|
gex = open_interest * gamma * spot_price
|
||||||
|
dex = open_interest * delta * spot_price
|
||||||
|
|
||||||
|
if date not in data_by_date:
|
||||||
|
data_by_date[date] = {
|
||||||
|
"date": date,
|
||||||
"call_volume": 0,
|
"call_volume": 0,
|
||||||
"put_volume": 0,
|
"put_volume": 0,
|
||||||
"call_open_interest": 0,
|
"call_open_interest": 0,
|
||||||
@ -48,105 +94,43 @@ def aggregate_data_by_date(symbol):
|
|||||||
"put_gex": 0,
|
"put_gex": 0,
|
||||||
"call_dex": 0,
|
"call_dex": 0,
|
||||||
"put_dex": 0,
|
"put_dex": 0,
|
||||||
"iv": 0.0, # Sum of implied volatilities
|
"iv": [],
|
||||||
"iv_count": 0, # Count of entries for IV
|
"iv_count": 0,
|
||||||
})
|
}
|
||||||
|
|
||||||
# Calculate cutoff date (1 year ago)
|
|
||||||
today = datetime.today().date()
|
|
||||||
one_year_ago = today - timedelta(days=365)
|
|
||||||
one_year_ago_str = one_year_ago.strftime('%Y-%m-%d')
|
|
||||||
|
|
||||||
contract_dir = f"json/all-options-contracts/{symbol}"
|
|
||||||
contract_list = get_contracts_from_directory(contract_dir)
|
|
||||||
|
|
||||||
with open(f"json/historical-price/max/{symbol}.json","r") as file:
|
|
||||||
price_list = orjson.loads(file.read())
|
|
||||||
|
|
||||||
if len(contract_list) > 0:
|
|
||||||
for item in tqdm(contract_list):
|
|
||||||
try:
|
|
||||||
file_path = os.path.join(contract_dir, f"{item}.json")
|
|
||||||
with open(file_path, "r") as file:
|
|
||||||
data = orjson.loads(file.read())
|
|
||||||
|
|
||||||
option_type = data.get('optionType', None)
|
|
||||||
if option_type not in ['call', 'put']:
|
|
||||||
continue
|
|
||||||
|
|
||||||
for entry in data.get('history', []):
|
|
||||||
date = entry.get('date')
|
|
||||||
|
|
||||||
# Skip entries older than one year
|
|
||||||
if date < one_year_ago_str:
|
|
||||||
continue
|
|
||||||
|
|
||||||
volume = entry.get('volume', 0) or 0
|
|
||||||
open_interest = entry.get('open_interest', 0) or 0
|
|
||||||
total_premium = entry.get('total_premium', 0) or 0
|
|
||||||
implied_volatility = entry.get('implied_volatility', 0) or 0
|
|
||||||
gamma = entry.get('gamma',0) or 0
|
|
||||||
delta = entry.get('delta',0) or 0
|
|
||||||
|
|
||||||
# Find the matching date in price_list
|
|
||||||
matching_price = next((p for p in price_list if p.get('time') == date), 0)
|
|
||||||
|
|
||||||
if matching_price:
|
|
||||||
spot_price = matching_price['close']
|
|
||||||
else:
|
|
||||||
continue
|
|
||||||
|
|
||||||
gex = open_interest * gamma * spot_price
|
|
||||||
dex = open_interest * delta * spot_price
|
|
||||||
|
|
||||||
|
|
||||||
daily_data = data_by_date[date]
|
daily_data = data_by_date[date]
|
||||||
daily_data["date"] = date
|
|
||||||
|
|
||||||
if option_type == 'call':
|
# Use conditional indexing instead of if-else
|
||||||
daily_data["call_volume"] += int(volume)
|
type_prefix = 'call_' if is_call else 'put_'
|
||||||
daily_data["call_open_interest"] += int(open_interest)
|
daily_data[f"{type_prefix}volume"] += int(volume)
|
||||||
daily_data["call_premium"] += int(total_premium)
|
daily_data[f"{type_prefix}open_interest"] += int(open_interest)
|
||||||
daily_data["call_gex"] += round(gex,2)
|
daily_data[f"{type_prefix}premium"] += int(total_premium)
|
||||||
daily_data["call_dex"] += round(dex,2)
|
daily_data[f"{type_prefix}gex"] += round(gex, 2)
|
||||||
elif option_type == 'put':
|
daily_data[f"{type_prefix}dex"] += round(dex, 2)
|
||||||
daily_data["put_volume"] += int(volume)
|
|
||||||
daily_data["put_open_interest"] += int(open_interest)
|
|
||||||
daily_data["put_premium"] += int(total_premium)
|
|
||||||
daily_data["put_gex"] += round(gex,2)
|
|
||||||
daily_data["put_dex"] += round(dex,2)
|
|
||||||
|
|
||||||
# Aggregate IV for both calls and puts
|
daily_data["iv"].append(round(implied_volatility, 2))
|
||||||
daily_data["iv"] += round(implied_volatility, 2)
|
|
||||||
daily_data["iv_count"] += 1
|
daily_data["iv_count"] += 1
|
||||||
|
|
||||||
# Calculate put/call ratio
|
|
||||||
try:
|
try:
|
||||||
daily_data["putCallRatio"] = round(daily_data["put_volume"] / daily_data["call_volume"], 2)
|
daily_data["putCallRatio"] = round(daily_data["put_volume"] / daily_data["call_volume"], 2)
|
||||||
except ZeroDivisionError:
|
except ZeroDivisionError:
|
||||||
daily_data["putCallRatio"] = None
|
daily_data["putCallRatio"] = None
|
||||||
|
|
||||||
except Exception as e:
|
except:
|
||||||
print(f"Error processing {item}: {e}")
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Convert to list and calculate average IV
|
# Convert to list and calculate median IV
|
||||||
data = []
|
data = list(data_by_date.values())
|
||||||
for date, daily in data_by_date.items():
|
|
||||||
if daily['iv_count'] > 0:
|
# Use vectorized operations with pandas for IV calculations
|
||||||
daily['iv'] = round(daily['iv'] / daily['iv_count'], 2)
|
df = pd.DataFrame(data)
|
||||||
else:
|
df['iv'] = df.apply(lambda x: round(float(pd.Series(x['iv']).median()), 2) if x['iv_count'] > 0 else None, axis=1)
|
||||||
daily['iv'] = None
|
|
||||||
data.append(daily)
|
|
||||||
|
|
||||||
# Sort and calculate IV Rank
|
# Sort and calculate IV Rank
|
||||||
|
data = df.to_dict('records')
|
||||||
data = sorted(data, key=lambda x: x['date'])
|
data = sorted(data, key=lambda x: x['date'])
|
||||||
data = calculate_iv_rank_for_all(data)
|
data = calculate_iv_rank_for_all(data)
|
||||||
data = sorted(data, key=lambda x: x['date'], reverse=True)
|
return sorted(data, key=lambda x: x['date'], reverse=True)
|
||||||
|
|
||||||
return data
|
|
||||||
else:
|
|
||||||
return []
|
|
||||||
|
|
||||||
def calculate_iv_rank_for_all(data):
|
def calculate_iv_rank_for_all(data):
|
||||||
if not data:
|
if not data:
|
||||||
@ -199,87 +183,64 @@ def calculate_iv_rank_for_all(data):
|
|||||||
|
|
||||||
|
|
||||||
def prepare_data(data, symbol):
|
def prepare_data(data, symbol):
|
||||||
|
# Filter data first to reduce processing
|
||||||
data = [entry for entry in data if entry['call_volume'] != 0 or entry['put_volume'] != 0]
|
data = [entry for entry in data if entry['call_volume'] != 0 or entry['put_volume'] != 0]
|
||||||
|
if not data:
|
||||||
|
return
|
||||||
|
|
||||||
start_date_str = data[-1]['date']
|
start_date_str = data[-1]['date']
|
||||||
end_date_str = data[0]['date']
|
end_date_str = data[0]['date']
|
||||||
|
|
||||||
query = query_template.format(ticker=symbol)
|
# Determine query connection
|
||||||
if symbol in stocks_symbols:
|
query_con = (con if symbol in stocks_symbols else
|
||||||
query_con = con
|
etf_con if symbol in etf_symbols else
|
||||||
elif symbol in etf_symbols:
|
index_con)
|
||||||
query_con = etf_con
|
|
||||||
else:
|
# Use pandas efficient reading and processing
|
||||||
query_con = index_con
|
df_price = pd.read_sql_query(
|
||||||
|
query_template.format(ticker=symbol),
|
||||||
|
query_con,
|
||||||
|
params=(start_date_str, end_date_str)
|
||||||
|
).round(2)
|
||||||
|
|
||||||
df_price = pd.read_sql_query(query, query_con, params=(start_date_str, end_date_str)).round(2)
|
|
||||||
df_price = df_price.rename(columns={"change_percent": "changesPercentage"})
|
df_price = df_price.rename(columns={"change_percent": "changesPercentage"})
|
||||||
|
price_lookup = df_price.set_index('date').to_dict('index')
|
||||||
# Convert the DataFrame to a dictionary for quick lookups by date
|
|
||||||
df_change_dict = df_price.set_index('date')['changesPercentage'].to_dict()
|
|
||||||
df_close_dict = df_price.set_index('date')['close'].to_dict()
|
|
||||||
|
|
||||||
res_list = []
|
res_list = []
|
||||||
|
|
||||||
for item in data:
|
for item in data:
|
||||||
try:
|
try:
|
||||||
# Round numerical and numerical-string values
|
|
||||||
new_item = {
|
new_item = {
|
||||||
key: safe_round(value) if isinstance(value, (int, float, str)) else value
|
key: safe_round(value) if isinstance(value, (int, float, str)) else value
|
||||||
for key, value in item.items()
|
for key, value in item.items()
|
||||||
}
|
}
|
||||||
|
|
||||||
# Add parsed fields
|
# Calculate derived fields
|
||||||
new_item['volume'] = round(new_item['call_volume'] + new_item['put_volume'], 2)
|
new_item.update({
|
||||||
new_item['putCallRatio'] = round(new_item['put_volume']/new_item['call_volume'],2)
|
'volume': new_item['call_volume'] + new_item['put_volume'],
|
||||||
#new_item['avgVolumeRatio'] = round(new_item['volume'] / (round(new_item['avg_30_day_call_volume'] + new_item['avg_30_day_put_volume'], 2)), 2)
|
'putCallRatio': round(new_item['put_volume'] / new_item['call_volume'], 2),
|
||||||
new_item['total_premium'] = round(new_item['call_premium'] + new_item['put_premium'], 2)
|
'total_premium': new_item['call_premium'] + new_item['put_premium'],
|
||||||
#new_item['net_premium'] = round(new_item['net_call_premium'] - new_item['net_put_premium'],2)
|
'total_open_interest': new_item['call_open_interest'] + new_item['put_open_interest']
|
||||||
new_item['total_open_interest'] = round(new_item['call_open_interest'] + new_item['put_open_interest'], 2)
|
})
|
||||||
|
|
||||||
|
# Get price data from lookup
|
||||||
#bearish_premium = float(item['bearish_premium'])
|
if price_data := price_lookup.get(item['date']):
|
||||||
#bullish_premium = float(item['bullish_premium'])
|
new_item['changesPercentage'] = float(price_data['changesPercentage'])
|
||||||
#neutral_premium = calculate_neutral_premium(item)
|
new_item['price'] = float(price_data['close'])
|
||||||
'''
|
|
||||||
new_item['premium_ratio'] = [
|
|
||||||
safe_round(bearish_premium),
|
|
||||||
neutral_premium,
|
|
||||||
safe_round(bullish_premium)
|
|
||||||
]
|
|
||||||
'''
|
|
||||||
|
|
||||||
# Add changesPercentage if the date exists in df_change_dict
|
|
||||||
if item['date'] in df_change_dict:
|
|
||||||
new_item['changesPercentage'] = float(df_change_dict[item['date']])
|
|
||||||
else:
|
else:
|
||||||
new_item['changesPercentage'] = None
|
new_item['changesPercentage'] = None
|
||||||
|
|
||||||
if item['date'] in df_close_dict:
|
|
||||||
new_item['price'] = float(df_close_dict[item['date']])
|
|
||||||
else:
|
|
||||||
new_item['price'] = None
|
new_item['price'] = None
|
||||||
|
|
||||||
res_list.append(new_item)
|
res_list.append(new_item)
|
||||||
except:
|
except:
|
||||||
pass
|
continue
|
||||||
|
|
||||||
|
# Calculate OI changes using vectorized operations
|
||||||
|
df = pd.DataFrame(res_list)
|
||||||
|
df = df.sort_values('date')
|
||||||
|
df['changeOI'] = df['total_open_interest'].diff()
|
||||||
|
df['changesPercentageOI'] = (df['total_open_interest'].pct_change() * 100).round(2)
|
||||||
|
|
||||||
res_list = sorted(res_list, key=lambda x: x['date'])
|
res_list = df.sort_values('date', ascending=False).to_dict('records')
|
||||||
|
|
||||||
for i in range(1, len(res_list)):
|
|
||||||
try:
|
|
||||||
current_open_interest = res_list[i]['total_open_interest']
|
|
||||||
previous_open_interest = res_list[i-1]['total_open_interest']
|
|
||||||
changes_percentage_oi = round((current_open_interest/previous_open_interest -1)*100,2)
|
|
||||||
res_list[i]['changesPercentageOI'] = changes_percentage_oi
|
|
||||||
res_list[i]['changeOI'] = current_open_interest-previous_open_interest
|
|
||||||
except:
|
|
||||||
res_list[i]['changesPercentageOI'] = None
|
|
||||||
res_list[i]['changeOI'] = None
|
|
||||||
|
|
||||||
res_list = sorted(res_list, key=lambda x: x['date'],reverse=True)
|
|
||||||
|
|
||||||
if res_list:
|
if res_list:
|
||||||
save_json(res_list, symbol)
|
save_json(res_list, symbol)
|
||||||
|
|||||||
@ -57,7 +57,6 @@ intrinio.ApiClient().allow_retries(True)
|
|||||||
|
|
||||||
after = (datetime.today()- timedelta(days=365)).strftime('%Y-%m-%d')
|
after = (datetime.today()- timedelta(days=365)).strftime('%Y-%m-%d')
|
||||||
before = '2100-12-31'
|
before = '2100-12-31'
|
||||||
N_year_ago = datetime.now() - timedelta(days=365)
|
|
||||||
include_related_symbols = False
|
include_related_symbols = False
|
||||||
page_size = 5000
|
page_size = 5000
|
||||||
MAX_CONCURRENT_REQUESTS = 100 # Adjust based on API rate limits
|
MAX_CONCURRENT_REQUESTS = 100 # Adjust based on API rate limits
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user