update ai model
This commit is contained in:
parent
96c96254fc
commit
3b70c93d28
@ -4,7 +4,7 @@ import aiohttp
|
|||||||
import aiofiles
|
import aiofiles
|
||||||
import sqlite3
|
import sqlite3
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from ml_models.fundamental_predictor import FundamentalPredictor
|
from ml_models.score_model import ScorePredictor
|
||||||
import yfinance as yf
|
import yfinance as yf
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
@ -22,7 +22,7 @@ import gc
|
|||||||
gc.enable()
|
gc.enable()
|
||||||
|
|
||||||
async def save_json(symbol, data):
|
async def save_json(symbol, data):
|
||||||
with open(f"json/fundamental-predictor-analysis/{symbol}.json", 'wb') as file:
|
with open(f"json/ai-score/{symbol}.json", 'wb') as file:
|
||||||
file.write(orjson.dumps(data))
|
file.write(orjson.dumps(data))
|
||||||
|
|
||||||
|
|
||||||
@ -31,11 +31,6 @@ def trend_intensity(close, window=20):
|
|||||||
std = close.rolling(window=window).std()
|
std = close.rolling(window=window).std()
|
||||||
return ((close - ma) / std).abs().rolling(window=window).mean()
|
return ((close - ma) / std).abs().rolling(window=window).mean()
|
||||||
|
|
||||||
def fisher_transform(high, low, window=10):
|
|
||||||
value = (high + low) / 2
|
|
||||||
norm_value = (2 * ((value - value.rolling(window=window).min()) /
|
|
||||||
(value.rolling(window=window).max() - value.rolling(window=window).min())) - 1)
|
|
||||||
return 0.5 * np.log((1 + norm_value) / (1 - norm_value))
|
|
||||||
|
|
||||||
def calculate_fdi(high, low, close, window=30):
|
def calculate_fdi(high, low, close, window=30):
|
||||||
n1 = (np.log(high.rolling(window=window).max() - low.rolling(window=window).min()) -
|
n1 = (np.log(high.rolling(window=window).max() - low.rolling(window=window).min()) -
|
||||||
@ -185,8 +180,6 @@ async def download_data(ticker, con, start_date, end_date):
|
|||||||
df['volatility_ratio'] = df['close'].rolling(window=30).std() / df['close'].rolling(window=60).std()
|
df['volatility_ratio'] = df['close'].rolling(window=30).std() / df['close'].rolling(window=60).std()
|
||||||
|
|
||||||
df['fdi'] = calculate_fdi(df['high'], df['low'], df['close'])
|
df['fdi'] = calculate_fdi(df['high'], df['low'], df['close'])
|
||||||
#df['hurst'] = df['close'].rolling(window=100).apply(hurst_exponent)
|
|
||||||
df['fisher'] = fisher_transform(df['high'], df['low'])
|
|
||||||
df['tii'] = trend_intensity(df['close'])
|
df['tii'] = trend_intensity(df['close'])
|
||||||
|
|
||||||
|
|
||||||
@ -196,7 +189,7 @@ async def download_data(ticker, con, start_date, end_date):
|
|||||||
'adi', 'cmf', 'emv', 'fi', 'williams', 'stoch','sma_crossover',
|
'adi', 'cmf', 'emv', 'fi', 'williams', 'stoch','sma_crossover',
|
||||||
'volatility','daily_return','cumulative_return', 'roc','avg_volume_30d',
|
'volatility','daily_return','cumulative_return', 'roc','avg_volume_30d',
|
||||||
'rolling_rsi','rolling_stoch_rsi', 'ema_crossover','ichimoku_a','ichimoku_b',
|
'rolling_rsi','rolling_stoch_rsi', 'ema_crossover','ichimoku_a','ichimoku_b',
|
||||||
'atr','kama','rocr','ppo','volatility_ratio','vwap','tii','fdi','fisher'
|
'atr','kama','rocr','ppo','volatility_ratio','vwap','tii','fdi'
|
||||||
]
|
]
|
||||||
|
|
||||||
# Match each combined data entry with the closest available stock price in df
|
# Match each combined data entry with the closest available stock price in df
|
||||||
@ -229,7 +222,6 @@ async def download_data(ticker, con, start_date, end_date):
|
|||||||
combined_data = sorted(combined_data, key=lambda x: x['date'])
|
combined_data = sorted(combined_data, key=lambda x: x['date'])
|
||||||
# Convert combined data into a DataFrame
|
# Convert combined data into a DataFrame
|
||||||
df_combined = pd.DataFrame(combined_data).dropna()
|
df_combined = pd.DataFrame(combined_data).dropna()
|
||||||
|
|
||||||
key_elements = [
|
key_elements = [
|
||||||
'revenue',
|
'revenue',
|
||||||
'costOfRevenue',
|
'costOfRevenue',
|
||||||
@ -275,37 +267,30 @@ async def download_data(ticker, con, start_date, end_date):
|
|||||||
'propertyPlantEquipmentNet',
|
'propertyPlantEquipmentNet',
|
||||||
'ownersEarnings',
|
'ownersEarnings',
|
||||||
]
|
]
|
||||||
|
|
||||||
# Compute ratios for all combinations of key elements
|
# Compute ratios for all combinations of key elements
|
||||||
|
|
||||||
|
new_columns = {}
|
||||||
|
|
||||||
|
# Loop over combinations of column pairs
|
||||||
for num, denom in combinations(key_elements, 2):
|
for num, denom in combinations(key_elements, 2):
|
||||||
# Compute ratio num/denom
|
# Compute ratio and reverse ratio
|
||||||
|
ratio = df_combined[num] / df_combined[denom]
|
||||||
|
reverse_ratio = df_combined[denom] / df_combined[num]
|
||||||
|
|
||||||
|
# Define column names for both ratios
|
||||||
column_name = f'{num}_to_{denom}'
|
column_name = f'{num}_to_{denom}'
|
||||||
try:
|
|
||||||
ratio = df_combined[num] / df_combined[denom]
|
|
||||||
# Check for valid ratio
|
|
||||||
df_combined[column_name] = np.where((ratio != 0) &
|
|
||||||
(ratio != np.inf) &
|
|
||||||
(ratio != -np.inf) &
|
|
||||||
(~np.isnan(ratio)),
|
|
||||||
ratio, 0)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error calculating {column_name}: {e}")
|
|
||||||
df_combined[column_name] = 0
|
|
||||||
|
|
||||||
# Compute reverse ratio denom/num
|
|
||||||
reverse_column_name = f'{denom}_to_{num}'
|
reverse_column_name = f'{denom}_to_{num}'
|
||||||
try:
|
|
||||||
reverse_ratio = df_combined[denom] / df_combined[num]
|
# Store the new columns in the dictionary, replacing invalid values with 0
|
||||||
# Check for valid reverse ratio
|
new_columns[column_name] = np.nan_to_num(ratio, nan=0, posinf=0, neginf=0)
|
||||||
df_combined[reverse_column_name] = np.where((reverse_ratio != 0) &
|
new_columns[reverse_column_name] = np.nan_to_num(reverse_ratio, nan=0, posinf=0, neginf=0)
|
||||||
(reverse_ratio != np.inf) &
|
|
||||||
(reverse_ratio != -np.inf) &
|
# Add all new columns to the original DataFrame at once
|
||||||
(~np.isnan(reverse_ratio)),
|
df_combined = pd.concat([df_combined, pd.DataFrame(new_columns)], axis=1)
|
||||||
reverse_ratio, 0)
|
|
||||||
except Exception as e:
|
# To defragment the DataFrame, make a copy
|
||||||
print(f"Error calculating {reverse_column_name}: {e}")
|
df_combined = df_combined.copy()
|
||||||
df_combined[reverse_column_name] = 0
|
|
||||||
|
|
||||||
# Create 'Target' column based on price change
|
# Create 'Target' column based on price change
|
||||||
df_combined['Target'] = ((df_combined['price'].shift(-1) - df_combined['price']) / df_combined['price'] > 0).astype(int)
|
df_combined['Target'] = ((df_combined['price'].shift(-1) - df_combined['price']) / df_combined['price'] > 0).astype(int)
|
||||||
@ -314,7 +299,7 @@ async def download_data(ticker, con, start_date, end_date):
|
|||||||
df_combined = df_combined.dropna()
|
df_combined = df_combined.dropna()
|
||||||
df_combined = df_combined.where(~df_combined.isin([np.inf, -np.inf]), 0)
|
df_combined = df_combined.where(~df_combined.isin([np.inf, -np.inf]), 0)
|
||||||
df_copy = df_combined.copy()
|
df_copy = df_combined.copy()
|
||||||
#print(df_copy[['date','revenue','ownersEarnings','revenuePerShare']])
|
|
||||||
return df_copy
|
return df_copy
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -327,14 +312,14 @@ async def process_symbol(ticker, con, start_date, end_date):
|
|||||||
test_size = 0.2
|
test_size = 0.2
|
||||||
start_date = datetime(1995, 1, 1).strftime("%Y-%m-%d")
|
start_date = datetime(1995, 1, 1).strftime("%Y-%m-%d")
|
||||||
end_date = datetime.today().strftime("%Y-%m-%d")
|
end_date = datetime.today().strftime("%Y-%m-%d")
|
||||||
predictor = FundamentalPredictor()
|
predictor = ScorePredictor()
|
||||||
df = await download_data(ticker, con, start_date, end_date)
|
df = await download_data(ticker, con, start_date, end_date)
|
||||||
split_size = int(len(df) * (1-test_size))
|
split_size = int(len(df) * (1-test_size))
|
||||||
test_data = df.iloc[split_size:]
|
test_data = df.iloc[split_size:]
|
||||||
best_features = [col for col in df.columns if col not in ['date','price','Target']]
|
best_features = [col for col in df.columns if col not in ['date','price','Target']]
|
||||||
data, prediction_list = predictor.evaluate_model(test_data[best_features], test_data['Target'])
|
data, prediction_list = predictor.evaluate_model(test_data[best_features], test_data['Target'])
|
||||||
|
|
||||||
|
print(data)
|
||||||
'''
|
'''
|
||||||
output_list = [{'date': date, 'price': price, 'prediction': prediction, 'target': target}
|
output_list = [{'date': date, 'price': price, 'prediction': prediction, 'target': target}
|
||||||
for (date, price,target), prediction in zip(test_data[['date', 'price','Target']].iloc[-6:].values, prediction_list[-6:])]
|
for (date, price,target), prediction in zip(test_data[['date', 'price','Target']].iloc[-6:].values, prediction_list[-6:])]
|
||||||
@ -380,19 +365,19 @@ async def train_process(tickers, con):
|
|||||||
print('======Train Set Datapoints======')
|
print('======Train Set Datapoints======')
|
||||||
print(len(df_train))
|
print(len(df_train))
|
||||||
|
|
||||||
predictor = FundamentalPredictor()
|
predictor = ScorePredictor()
|
||||||
#print(selected_features)
|
#print(selected_features)
|
||||||
selected_features = [col for col in df_train if col not in ['price','date','Target']]
|
selected_features = [col for col in df_train if col not in ['price','date','Target']]
|
||||||
best_features = predictor.feature_selection(df_train[selected_features], df_train['Target'],k=100)
|
#best_features = predictor.feature_selection(df_train[selected_features], df_train['Target'],k=5)
|
||||||
print(best_features)
|
#print(best_features)
|
||||||
predictor.train_model(df_train[best_features], df_train['Target'])
|
predictor.train_model(df_train[selected_features], df_train['Target'])
|
||||||
predictor.evaluate_model(df_test[best_features], df_test['Target'])
|
predictor.evaluate_model(df_test[best_features], df_test['Target'])
|
||||||
|
|
||||||
async def test_process(con):
|
async def test_process(con):
|
||||||
test_size = 0.2
|
test_size = 0.2
|
||||||
start_date = datetime(1995, 1, 1).strftime("%Y-%m-%d")
|
start_date = datetime(1995, 1, 1).strftime("%Y-%m-%d")
|
||||||
end_date = datetime.today().strftime("%Y-%m-%d")
|
end_date = datetime.today().strftime("%Y-%m-%d")
|
||||||
predictor = FundamentalPredictor()
|
predictor = ScorePredictor()
|
||||||
df = await download_data('GME', con, start_date, end_date)
|
df = await download_data('GME', con, start_date, end_date)
|
||||||
split_size = int(len(df) * (1-test_size))
|
split_size = int(len(df) * (1-test_size))
|
||||||
test_data = df.iloc[split_size:]
|
test_data = df.iloc[split_size:]
|
||||||
@ -405,25 +390,26 @@ async def run():
|
|||||||
#Train first model
|
#Train first model
|
||||||
|
|
||||||
con = sqlite3.connect('stocks.db')
|
con = sqlite3.connect('stocks.db')
|
||||||
|
|
||||||
cursor = con.cursor()
|
cursor = con.cursor()
|
||||||
cursor.execute("PRAGMA journal_mode = wal")
|
cursor.execute("PRAGMA journal_mode = wal")
|
||||||
cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 10E9 AND symbol NOT LIKE '%.%'")
|
cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 10E9 AND symbol NOT LIKE '%.%'")
|
||||||
stock_symbols = [row[0] for row in cursor.fetchall()] #['AAPL','GME','LLY','NVDA'] #
|
stock_symbols = ['DHR','ABT','TXN','LIN','RIO','FCX','ECL','NVO','GOOGL','NFLX','SAP','UNH','JNJ','ABBV','MRK','PLD','NEE','DUK','AMT','EQIX','META','DOV','NWN','PG','PH','MMM','AWR','YYAI','PPSI','VYX','XP','BWXT','OLED','ROIC','NKE','LMT','PAYX','GME','AMD','AAPL','NVDA','PLTR'] #[row[0] for row in cursor.fetchall()]
|
||||||
|
stock_symbols = list(set(stock_symbols))
|
||||||
print('Number of Stocks')
|
print('Number of Stocks')
|
||||||
print(len(stock_symbols))
|
print(len(stock_symbols))
|
||||||
await train_process(stock_symbols, con)
|
#await train_process(stock_symbols, con)
|
||||||
|
|
||||||
|
|
||||||
#Prediction Steps for all stock symbols
|
#Prediction Steps for all stock symbols
|
||||||
'''
|
|
||||||
cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 1E9")
|
cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 1E9")
|
||||||
stock_symbols = [row[0] for row in cursor.fetchall()]
|
stock_symbols = [row[0] for row in cursor.fetchall()]
|
||||||
|
|
||||||
total_symbols = ['GME'] #stock_symbols
|
total_symbols = ['GME'] #stock_symbols
|
||||||
|
|
||||||
print(f"Total tickers: {len(total_symbols)}")
|
print(f"Total tickers: {len(total_symbols)}")
|
||||||
start_date = datetime(2000, 1, 1).strftime("%Y-%m-%d")
|
start_date = datetime(1995, 1, 1).strftime("%Y-%m-%d")
|
||||||
end_date = datetime.today().strftime("%Y-%m-%d")
|
end_date = datetime.today().strftime("%Y-%m-%d")
|
||||||
|
|
||||||
chunk_size = len(total_symbols)# // 100 # Divide the list into N chunks
|
chunk_size = len(total_symbols)# // 100 # Divide the list into N chunks
|
||||||
@ -434,7 +420,7 @@ async def run():
|
|||||||
tasks.append(process_symbol(ticker, con, start_date, end_date))
|
tasks.append(process_symbol(ticker, con, start_date, end_date))
|
||||||
|
|
||||||
await asyncio.gather(*tasks)
|
await asyncio.gather(*tasks)
|
||||||
'''
|
|
||||||
con.close()
|
con.close()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|||||||
@ -3950,7 +3950,7 @@ async def get_fomc_impact(api_key: str = Security(get_api_key)):
|
|||||||
compressed_data = gzip.compress(data)
|
compressed_data = gzip.compress(data)
|
||||||
|
|
||||||
redis_client.set(cache_key, compressed_data)
|
redis_client.set(cache_key, compressed_data)
|
||||||
redis_client.expire(cache_key,3600*3600)
|
redis_client.expire(cache_key,5*60)
|
||||||
|
|
||||||
return StreamingResponse(
|
return StreamingResponse(
|
||||||
io.BytesIO(compressed_data),
|
io.BytesIO(compressed_data),
|
||||||
|
|||||||
Binary file not shown.
BIN
app/ml_models/__pycache__/score_model.cpython-310.pyc
Normal file
BIN
app/ml_models/__pycache__/score_model.cpython-310.pyc
Normal file
Binary file not shown.
@ -7,8 +7,8 @@ from xgboost import XGBClassifier
|
|||||||
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, accuracy_score
|
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, accuracy_score
|
||||||
from sklearn.model_selection import train_test_split
|
from sklearn.model_selection import train_test_split
|
||||||
from sklearn.preprocessing import MinMaxScaler, StandardScaler
|
from sklearn.preprocessing import MinMaxScaler, StandardScaler
|
||||||
from keras.models import Sequential
|
from keras.models import Sequential, Model
|
||||||
from keras.layers import LSTM, Dense, Conv1D, Dropout, BatchNormalization, MaxPooling1D, Bidirectional
|
from keras.layers import Input, Multiply, Reshape, LSTM, Dense, Conv1D, Dropout, BatchNormalization, GlobalAveragePooling1D, MaxPooling1D, Bidirectional
|
||||||
from keras.optimizers import Adam
|
from keras.optimizers import Adam
|
||||||
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
|
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
|
||||||
from keras.models import load_model
|
from keras.models import load_model
|
||||||
@ -31,39 +31,47 @@ import time
|
|||||||
|
|
||||||
class FundamentalPredictor:
|
class FundamentalPredictor:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.model = self.build_model()
|
|
||||||
self.scaler = MinMaxScaler()
|
self.scaler = MinMaxScaler()
|
||||||
|
self.model = self.build_model()
|
||||||
|
|
||||||
def build_model(self):
|
def build_model(self):
|
||||||
clear_session()
|
clear_session()
|
||||||
model = Sequential()
|
|
||||||
|
|
||||||
model.add(Dense(1000, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
|
# Input layer
|
||||||
model.add(Dropout(0.2))
|
inputs = Input(shape=(2139,))
|
||||||
model.add(BatchNormalization())
|
|
||||||
|
# First dense layer
|
||||||
model.add(Dense(2000, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
|
x = Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.01))(inputs)
|
||||||
model.add(Dropout(0.2))
|
x = Dropout(0.3)(x)
|
||||||
model.add(BatchNormalization())
|
x = BatchNormalization()(x)
|
||||||
|
|
||||||
model.add(Dense(3000, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
|
# Additional dense layers
|
||||||
model.add(Dropout(0.2))
|
for units in [512,256, 256]:
|
||||||
model.add(BatchNormalization())
|
x = Dense(units, activation='relu', kernel_regularizer=regularizers.l2(0.01))(x)
|
||||||
|
x = Dropout(0.2)(x)
|
||||||
model.add(Dense(2000, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
|
x = BatchNormalization()(x)
|
||||||
model.add(Dropout(0.2))
|
|
||||||
model.add(BatchNormalization())
|
# Reshape for attention mechanism
|
||||||
|
x = Reshape((256, 1))(x)
|
||||||
model.add(Dense(1000, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
|
|
||||||
model.add(Dropout(0.2))
|
# Attention mechanism
|
||||||
model.add(BatchNormalization())
|
attention = Dense(256, activation='relu')(x)
|
||||||
model.add(Dense(500, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
|
attention = Dense(1, activation='softmax')(attention)
|
||||||
|
|
||||||
# Output layer for binary classification
|
# Apply attention
|
||||||
model.add(Dense(1, activation='sigmoid'))
|
x = Multiply()([x, attention])
|
||||||
|
|
||||||
# Optimizer with a lower learning rate and scheduler
|
# Global average pooling
|
||||||
optimizer = Adam(learning_rate=0.1)
|
x = GlobalAveragePooling1D()(x)
|
||||||
|
|
||||||
|
# Output layer
|
||||||
|
outputs = Dense(1, activation='sigmoid')(x)
|
||||||
|
|
||||||
|
# Create the model
|
||||||
|
model = Model(inputs=inputs, outputs=outputs)
|
||||||
|
|
||||||
|
# Optimizer with a lower learning rate
|
||||||
|
optimizer = Adam(learning_rate=0.1, clipnorm = 1.0)
|
||||||
|
|
||||||
# Compile the model
|
# Compile the model
|
||||||
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
|
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
|
||||||
@ -87,10 +95,10 @@ class FundamentalPredictor:
|
|||||||
checkpoint = ModelCheckpoint('ml_models/weights/fundamental_weights/weights.keras',
|
checkpoint = ModelCheckpoint('ml_models/weights/fundamental_weights/weights.keras',
|
||||||
save_best_only=True, save_freq = 1,
|
save_best_only=True, save_freq = 1,
|
||||||
monitor='val_loss', mode='min')
|
monitor='val_loss', mode='min')
|
||||||
early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
|
early_stopping = EarlyStopping(monitor='val_loss', patience=70, restore_best_weights=True)
|
||||||
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=0.00001)
|
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=60, min_lr=0.00001)
|
||||||
|
|
||||||
self.model.fit(X_train, y_train, epochs=100_000, batch_size=64,
|
self.model.fit(X_train, y_train, epochs=100_000, batch_size=32,
|
||||||
validation_split=0.1, callbacks=[checkpoint, early_stopping, reduce_lr])
|
validation_split=0.1, callbacks=[checkpoint, early_stopping, reduce_lr])
|
||||||
self.model.save('ml_models/weights/fundamental_weights/weights.keras')
|
self.model.save('ml_models/weights/fundamental_weights/weights.keras')
|
||||||
|
|
||||||
|
|||||||
137
app/ml_models/score_model.py
Normal file
137
app/ml_models/score_model.py
Normal file
@ -0,0 +1,137 @@
|
|||||||
|
import yfinance as yf
|
||||||
|
import pandas as pd
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from sklearn.ensemble import RandomForestClassifier
|
||||||
|
import numpy as np
|
||||||
|
from xgboost import XGBClassifier
|
||||||
|
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, accuracy_score
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.preprocessing import MinMaxScaler, StandardScaler
|
||||||
|
from keras.models import Sequential, Model
|
||||||
|
from keras.layers import Input, Multiply, Reshape, LSTM, Dense, Conv1D, Dropout, BatchNormalization, GlobalAveragePooling1D, MaxPooling1D, Bidirectional
|
||||||
|
from keras.optimizers import Adam
|
||||||
|
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
|
||||||
|
from keras.models import load_model
|
||||||
|
from sklearn.feature_selection import SelectKBest, f_classif
|
||||||
|
from tensorflow.keras.backend import clear_session
|
||||||
|
from keras import regularizers
|
||||||
|
from keras.layers import Layer
|
||||||
|
|
||||||
|
|
||||||
|
from tqdm import tqdm
|
||||||
|
from collections import defaultdict
|
||||||
|
import asyncio
|
||||||
|
import aiohttp
|
||||||
|
import aiofiles
|
||||||
|
import pickle
|
||||||
|
import time
|
||||||
|
|
||||||
|
# Based on the paper: https://arxiv.org/pdf/1603.00751
|
||||||
|
|
||||||
|
|
||||||
|
class ScorePredictor:
|
||||||
|
def __init__(self):
|
||||||
|
self.scaler = MinMaxScaler()
|
||||||
|
self.model = self.build_model()
|
||||||
|
|
||||||
|
def build_model(self):
|
||||||
|
clear_session()
|
||||||
|
|
||||||
|
# Input layer
|
||||||
|
inputs = Input(shape=(2139,))
|
||||||
|
|
||||||
|
# First dense layer
|
||||||
|
x = Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.01))(inputs)
|
||||||
|
x = Dropout(0.3)(x)
|
||||||
|
x = BatchNormalization()(x)
|
||||||
|
|
||||||
|
# Additional dense layers
|
||||||
|
for units in [512,256, 256]:
|
||||||
|
x = Dense(units, activation='relu', kernel_regularizer=regularizers.l2(0.01))(x)
|
||||||
|
x = Dropout(0.2)(x)
|
||||||
|
x = BatchNormalization()(x)
|
||||||
|
|
||||||
|
# Reshape for attention mechanism
|
||||||
|
x = Reshape((256, 1))(x)
|
||||||
|
|
||||||
|
# Attention mechanism
|
||||||
|
attention = Dense(256, activation='relu')(x)
|
||||||
|
attention = Dense(1, activation='softmax')(attention)
|
||||||
|
|
||||||
|
# Apply attention
|
||||||
|
x = Multiply()([x, attention])
|
||||||
|
|
||||||
|
# Global average pooling
|
||||||
|
x = GlobalAveragePooling1D()(x)
|
||||||
|
|
||||||
|
# Output layer
|
||||||
|
outputs = Dense(1, activation='sigmoid')(x)
|
||||||
|
|
||||||
|
# Create the model
|
||||||
|
model = Model(inputs=inputs, outputs=outputs)
|
||||||
|
|
||||||
|
# Optimizer with a lower learning rate
|
||||||
|
optimizer = Adam(learning_rate=0.1, clipnorm = 1.0)
|
||||||
|
|
||||||
|
# Compile the model
|
||||||
|
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
|
||||||
|
|
||||||
|
return model
|
||||||
|
|
||||||
|
def preprocess_data(self, X):
|
||||||
|
# X = X.applymap(lambda x: 9999 if x == 0 else x) # Replace 0 with 9999 as suggested in the paper
|
||||||
|
X = np.where(np.isinf(X), np.nan, X)
|
||||||
|
X = np.nan_to_num(X)
|
||||||
|
X = self.scaler.fit_transform(X)
|
||||||
|
return X
|
||||||
|
|
||||||
|
def reshape_for_lstm(self, X):
|
||||||
|
return X.reshape((X.shape[0], X.shape[1], 1))
|
||||||
|
|
||||||
|
def train_model(self, X_train, y_train):
|
||||||
|
X_train = self.preprocess_data(X_train)
|
||||||
|
#X_train = self.reshape_for_lstm(X_train)
|
||||||
|
|
||||||
|
checkpoint = ModelCheckpoint('ml_models/weights/fundamental_weights/weights.keras',
|
||||||
|
save_best_only=True, save_freq = 1,
|
||||||
|
monitor='val_loss', mode='min')
|
||||||
|
early_stopping = EarlyStopping(monitor='val_loss', patience=70, restore_best_weights=True)
|
||||||
|
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=60, min_lr=0.00001)
|
||||||
|
|
||||||
|
self.model.fit(X_train, y_train, epochs=100_000, batch_size=32,
|
||||||
|
validation_split=0.1, callbacks=[checkpoint, early_stopping, reduce_lr])
|
||||||
|
self.model.save('ml_models/weights/fundamental_weights/weights.keras')
|
||||||
|
|
||||||
|
def evaluate_model(self, X_test, y_test):
|
||||||
|
X_test = self.preprocess_data(X_test)
|
||||||
|
X_test = self.reshape_for_lstm(X_test)
|
||||||
|
|
||||||
|
self.model = load_model('ml_models/weights/fundamental_weights/weights.keras')
|
||||||
|
|
||||||
|
test_predictions = self.model.predict(X_test).flatten()
|
||||||
|
|
||||||
|
test_predictions[test_predictions >= 0.5] = 1
|
||||||
|
test_predictions[test_predictions < 0.5] = 0
|
||||||
|
|
||||||
|
test_precision = precision_score(y_test, test_predictions)
|
||||||
|
test_accuracy = accuracy_score(y_test, test_predictions)
|
||||||
|
|
||||||
|
print("Test Set Metrics:")
|
||||||
|
print(f"Precision: {round(test_precision * 100)}%")
|
||||||
|
print(f"Accuracy: {round(test_accuracy * 100)}%")
|
||||||
|
|
||||||
|
next_value_prediction = 1 if test_predictions[-1] >= 0.5 else 0
|
||||||
|
return {'accuracy': round(test_accuracy * 100),
|
||||||
|
'precision': round(test_precision * 100),
|
||||||
|
'sentiment': 'Bullish' if next_value_prediction == 1 else 'Bearish'}, test_predictions
|
||||||
|
|
||||||
|
def feature_selection(self, X_train, y_train, k=100):
|
||||||
|
print('feature selection:')
|
||||||
|
print(X_train.shape, y_train.shape)
|
||||||
|
selector = SelectKBest(score_func=f_classif, k=k)
|
||||||
|
selector.fit(X_train, y_train)
|
||||||
|
|
||||||
|
selector.transform(X_train)
|
||||||
|
selected_features = [col for i, col in enumerate(X_train.columns) if selector.get_support()[i]]
|
||||||
|
|
||||||
|
return selected_features
|
||||||
Loading…
x
Reference in New Issue
Block a user