modify model

This commit is contained in:
MuslemRahimi 2024-10-01 21:18:26 +02:00
parent d0b5cd5aaa
commit c8159047f0
3 changed files with 54 additions and 46 deletions

View File

@ -46,13 +46,13 @@ async def download_data(ticker, con, start_date, end_date):
statements = [ statements = [
f"json/financial-statements/ratios/quarter/{ticker}.json", f"json/financial-statements/ratios/quarter/{ticker}.json",
f"json/financial-statements/key-metrics/quarter/{ticker}.json", f"json/financial-statements/key-metrics/quarter/{ticker}.json",
f"json/financial-statements/cash-flow-statement/quarter/{ticker}.json", #f"json/financial-statements/cash-flow-statement/quarter/{ticker}.json",
f"json/financial-statements/income-statement/quarter/{ticker}.json", #f"json/financial-statements/income-statement/quarter/{ticker}.json",
f"json/financial-statements/balance-sheet-statement/quarter/{ticker}.json", #f"json/financial-statements/balance-sheet-statement/quarter/{ticker}.json",
f"json/financial-statements/income-statement-growth/quarter/{ticker}.json", f"json/financial-statements/income-statement-growth/quarter/{ticker}.json",
f"json/financial-statements/balance-sheet-statement-growth/quarter/{ticker}.json", f"json/financial-statements/balance-sheet-statement-growth/quarter/{ticker}.json",
f"json/financial-statements/cash-flow-statement-growth/quarter/{ticker}.json", f"json/financial-statements/cash-flow-statement-growth/quarter/{ticker}.json",
f"json/financial-statements/owner-earnings/quarter/{ticker}.json", #f"json/financial-statements/owner-earnings/quarter/{ticker}.json",
] ]
# Helper function to load JSON data asynchronously # Helper function to load JSON data asynchronously
@ -81,34 +81,34 @@ async def download_data(ticker, con, start_date, end_date):
key_metrics = await filter_data(key_metrics, ignore_keys) key_metrics = await filter_data(key_metrics, ignore_keys)
cashflow = await load_json_from_file(statements[2]) #cashflow = await load_json_from_file(statements[2])
cashflow = await filter_data(cashflow, ignore_keys) #cashflow = await filter_data(cashflow, ignore_keys)
income = await load_json_from_file(statements[3]) #income = await load_json_from_file(statements[3])
income = await filter_data(income, ignore_keys) #income = await filter_data(income, ignore_keys)
balance = await load_json_from_file(statements[4]) #balance = await load_json_from_file(statements[4])
balance = await filter_data(balance, ignore_keys) #balance = await filter_data(balance, ignore_keys)
income_growth = await load_json_from_file(statements[5]) income_growth = await load_json_from_file(statements[2])
income_growth = await filter_data(income_growth, ignore_keys) income_growth = await filter_data(income_growth, ignore_keys)
balance_growth = await load_json_from_file(statements[6]) balance_growth = await load_json_from_file(statements[3])
balance_growth = await filter_data(balance_growth, ignore_keys) balance_growth = await filter_data(balance_growth, ignore_keys)
cashflow_growth = await load_json_from_file(statements[7]) cashflow_growth = await load_json_from_file(statements[4])
cashflow_growth = await filter_data(cashflow_growth, ignore_keys) cashflow_growth = await filter_data(cashflow_growth, ignore_keys)
owner_earnings = await load_json_from_file(statements[8]) #owner_earnings = await load_json_from_file(statements[8])
owner_earnings = await filter_data(owner_earnings, ignore_keys) #owner_earnings = await filter_data(owner_earnings, ignore_keys)
# Combine all the data # Combine all the data
combined_data = defaultdict(dict) combined_data = defaultdict(dict)
# Merge the data based on 'date' # Merge the data based on 'date'
for entries in zip(ratios, key_metrics, cashflow, income, balance, income_growth, balance_growth, cashflow_growth, owner_earnings): for entries in zip(ratios, key_metrics,income_growth, balance_growth, cashflow_growth):
for entry in entries: for entry in entries:
date = entry['date'] date = entry['date']
for key, value in entry.items(): for key, value in entry.items():
@ -141,8 +141,8 @@ async def download_data(ticker, con, start_date, end_date):
df['daily_return'] = df['close'].pct_change() df['daily_return'] = df['close'].pct_change()
df['cumulative_return'] = (1 + df['daily_return']).cumprod() - 1 df['cumulative_return'] = (1 + df['daily_return']).cumprod() - 1
df['volume_change'] = df['volume'].pct_change() df['volume_change'] = df['volume'].pct_change()
df['roc'] = df['close'].pct_change(periods=30) * 100 # 12-day ROC df['roc'] = df['close'].pct_change(periods=60)
df['avg_volume_30d'] = df['volume'].rolling(window=30).mean() df['avg_volume'] = df['volume'].rolling(window=60).mean()
df['drawdown'] = df['close'] / df['close'].rolling(window=252).max() - 1 df['drawdown'] = df['close'] / df['close'].rolling(window=252).max() - 1
@ -159,9 +159,9 @@ async def download_data(ticker, con, start_date, end_date):
df['obv'] = OnBalanceVolumeIndicator(close=df['close'], volume=df['volume']).on_balance_volume() df['obv'] = OnBalanceVolumeIndicator(close=df['close'], volume=df['volume']).on_balance_volume()
df['vpt'] = VolumePriceTrendIndicator(close=df['close'], volume=df['volume']).volume_price_trend() df['vpt'] = VolumePriceTrendIndicator(close=df['close'], volume=df['volume']).volume_price_trend()
df['rsi'] = rsi(df["close"], window=30) df['rsi'] = rsi(df["close"], window=60)
df['rolling_rsi'] = df['rsi'].rolling(window=10).mean() df['rolling_rsi'] = df['rsi'].rolling(window=10).mean()
df['stoch_rsi'] = stochrsi_k(df['close'], window=30, smooth1=3, smooth2=3) df['stoch_rsi'] = stochrsi_k(df['close'], window=60, smooth1=3, smooth2=3)
df['rolling_stoch_rsi'] = df['stoch_rsi'].rolling(window=10).mean() df['rolling_stoch_rsi'] = df['stoch_rsi'].rolling(window=10).mean()
df['adi'] = acc_dist_index(high=df['high'],low=df['low'],close=df['close'],volume=df['volume']) df['adi'] = acc_dist_index(high=df['high'],low=df['low'],close=df['close'],volume=df['volume'])
@ -186,7 +186,7 @@ async def download_data(ticker, con, start_date, end_date):
'rsi', 'macd', 'macd_signal', 'macd_hist', 'adx', 'adx_pos', 'adx_neg', 'rsi', 'macd', 'macd_signal', 'macd_hist', 'adx', 'adx_pos', 'adx_neg',
'cci', 'mfi', 'nvi', 'obv', 'vpt', 'stoch_rsi','bb_width', 'cci', 'mfi', 'nvi', 'obv', 'vpt', 'stoch_rsi','bb_width',
'adi', 'cmf', 'emv', 'fi', 'williams', 'stoch','sma_crossover', 'adi', 'cmf', 'emv', 'fi', 'williams', 'stoch','sma_crossover',
'volatility','daily_return','cumulative_return', 'roc','avg_volume_30d', 'volatility','daily_return','cumulative_return', 'roc','avg_volume',
'rolling_rsi','rolling_stoch_rsi', 'ema_crossover','ichimoku_a','ichimoku_b', 'rolling_rsi','rolling_stoch_rsi', 'ema_crossover','ichimoku_a','ichimoku_b',
'atr','kama','rocr','ppo','volatility_ratio','vwap','tii','fdi','drawdown', 'atr','kama','rocr','ppo','volatility_ratio','vwap','tii','fdi','drawdown',
'volume_change' 'volume_change'
@ -236,7 +236,6 @@ async def download_data(ticker, con, start_date, end_date):
'freeCashFlow', 'freeCashFlow',
'incomeBeforeTax', 'incomeBeforeTax',
'incomeTaxExpense', 'incomeTaxExpense',
'epsdiluted',
'debtRepayment', 'debtRepayment',
'dividendsPaid', 'dividendsPaid',
'depreciationAndAmortization', 'depreciationAndAmortization',
@ -345,6 +344,7 @@ async def warm_start_training(tickers, con):
predictor = ScorePredictor() predictor = ScorePredictor()
selected_features = [col for col in df_train if col not in ['price', 'date', 'Target']] selected_features = [col for col in df_train if col not in ['price', 'date', 'Target']]
predictor.warm_start_training(df_train[selected_features], df_train['Target']) predictor.warm_start_training(df_train[selected_features], df_train['Target'])
predictor.evaluate_model(df_train[selected_features], df_train['Target'])
return predictor return predictor
@ -373,25 +373,30 @@ async def fine_tune_and_evaluate(ticker, con, start_date, end_date):
res = {'score': data['score']} res = {'score': data['score']}
await save_json(ticker, res) await save_json(ticker, res)
print(f"Saved results for {ticker}") print(f"Saved results for {ticker}")
gc.collect()
except Exception as e: except Exception as e:
print(f"Error processing {ticker}: {e}") print(f"Error processing {ticker}: {e}")
finally:
# Ensure any remaining cleanup if necessary
if 'predictor' in locals():
del predictor # Explicitly delete the predictor to aid garbage collection
async def run(): async def run():
train_mode = True # Set this to False for fine-tuning and evaluation train_mode = False # Set this to False for fine-tuning and evaluation
con = sqlite3.connect('stocks.db') con = sqlite3.connect('stocks.db')
cursor = con.cursor() cursor = con.cursor()
cursor.execute("PRAGMA journal_mode = wal") cursor.execute("PRAGMA journal_mode = wal")
if train_mode: if train_mode:
# Warm start training # Warm start training
warm_start_symbols = ['META', 'NFLX','GOOG','TSLA','AWR','AMD','NVDA'] cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 10E9 AND symbol NOT LIKE '%.%' AND symbol NOT LIKE '%-%'")
warm_start_symbols = [row[0] for row in cursor.fetchall()]
print('Warm Start Training for:', warm_start_symbols) print('Warm Start Training for:', warm_start_symbols)
predictor = await warm_start_training(warm_start_symbols, con) predictor = await warm_start_training(warm_start_symbols, con)
else: else:
# Fine-tuning and evaluation for all stocks # Fine-tuning and evaluation for all stocks
cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 1E9 AND symbol NOT LIKE '%.%'") cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 1E9 AND symbol NOT LIKE '%.%'")
stock_symbols = ['NVDA'] #[row[0] for row in cursor.fetchall()] stock_symbols = [row[0] for row in cursor.fetchall()]
print(f"Total tickers for fine-tuning: {len(stock_symbols)}") print(f"Total tickers for fine-tuning: {len(stock_symbols)}")
start_date = datetime(1995, 1, 1).strftime("%Y-%m-%d") start_date = datetime(1995, 1, 1).strftime("%Y-%m-%d")

View File

@ -8,7 +8,7 @@ from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_sco
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler from sklearn.preprocessing import MinMaxScaler, StandardScaler
from keras.models import Sequential, Model from keras.models import Sequential, Model
from keras.layers import Input, Multiply, Reshape, LSTM, Dense, Conv1D, Dropout, BatchNormalization, GlobalAveragePooling1D, MaxPooling1D, Bidirectional from keras.layers import Input, Multiply, Reshape, LSTM, Dense, Dropout, BatchNormalization, GlobalAveragePooling1D, MaxPooling1D, Bidirectional
from keras.optimizers import Adam from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.models import load_model from keras.models import load_model
@ -62,17 +62,18 @@ class ScorePredictor:
def build_model(self): def build_model(self):
clear_session() clear_session()
inputs = Input(shape=(335,)) inputs = Input(shape=(231,))
x = Dense(512, activation='elu')(inputs)
x = Dropout(0.2)(x)
x = BatchNormalization()(x)
for units in [64, 32]: x = Dense(128, activation='leaky_relu')(inputs)
x = Dense(units, activation='elu')(x)
x = Dropout(0.2)(x)
x = BatchNormalization()(x) x = BatchNormalization()(x)
x = Dropout(0.2)(x)
x = Reshape((32, 1))(x) for units in [64,32,16]:
x = Dense(units, activation='leaky_relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)
x = Reshape((16, 1))(x)
x, _ = SelfAttention()(x) x, _ = SelfAttention()(x)
outputs = Dense(2, activation='softmax')(x) outputs = Dense(2, activation='softmax')(x)
@ -93,8 +94,8 @@ class ScorePredictor:
self.model = self.build_model() self.model = self.build_model()
checkpoint = ModelCheckpoint(self.warm_start_model_path, save_best_only=True, save_freq=1, monitor='val_loss', mode='min') checkpoint = ModelCheckpoint(self.warm_start_model_path, save_best_only=True, save_freq=1, monitor='val_loss', mode='min')
early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True) early_stopping = EarlyStopping(monitor='val_loss', patience=50, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=0.001) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=30, min_lr=0.001)
self.model.fit(X_train, y_train, epochs=100_000, batch_size=32, validation_split=0.1, callbacks=[checkpoint, early_stopping, reduce_lr]) self.model.fit(X_train, y_train, epochs=100_000, batch_size=32, validation_split=0.1, callbacks=[checkpoint, early_stopping, reduce_lr])
self.model.save(self.warm_start_model_path) self.model.save(self.warm_start_model_path)
@ -102,15 +103,17 @@ class ScorePredictor:
def fine_tune_model(self, X_train, y_train): def fine_tune_model(self, X_train, y_train):
X_train = self.preprocess_data(X_train) X_train = self.preprocess_data(X_train)
#batch_size = min(64, max(16, len(X_train) // 10))
if self.model is None: if self.model is None:
self.model = load_model(self.warm_start_model_path, custom_objects={'SelfAttention': SelfAttention}) self.model = load_model(self.warm_start_model_path, custom_objects={'SelfAttention': SelfAttention})
early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True) #early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=0.0001) #reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=0.01)
self.model.fit(X_train, y_train, epochs=150, batch_size=16, validation_split=0.1)
print("Model fine-tuned")
self.model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.1, callbacks=[early_stopping, reduce_lr])
print("Model fine-tuned (not saved).")
def evaluate_model(self, X_test, y_test): def evaluate_model(self, X_test, y_test):
X_test = self.preprocess_data(X_test) X_test = self.preprocess_data(X_test)
@ -121,19 +124,19 @@ class ScorePredictor:
test_predictions = self.model.predict(X_test) test_predictions = self.model.predict(X_test)
class_1_probabilities = test_predictions[:, 1] class_1_probabilities = test_predictions[:, 1]
binary_predictions = (class_1_probabilities >= 0.5).astype(int) binary_predictions = (class_1_probabilities >= 0.5).astype(int)
print(test_predictions) #print(test_predictions)
test_precision = precision_score(y_test, binary_predictions) test_precision = precision_score(y_test, binary_predictions)
test_accuracy = accuracy_score(y_test, binary_predictions) test_accuracy = accuracy_score(y_test, binary_predictions)
print("Test Set Metrics:") print("Test Set Metrics:")
print(f"Precision: {round(test_precision * 100)}%") print(f"Precision: {round(test_precision * 100)}%")
print(f"Accuracy: {round(test_accuracy * 100)}%") print(f"Accuracy: {round(test_accuracy * 100)}%")
print(pd.DataFrame({'y_test': y_test, 'y_pred': binary_predictions}))
thresholds = [0.8, 0.75, 0.7, 0.6, 0.5, 0.45, 0.4, 0.35, 0.3, 0.2] thresholds = [0.8, 0.75, 0.7, 0.6, 0.5, 0.45, 0.4, 0.35, 0.3, 0]
scores = [10, 9, 8, 7, 6, 5, 4, 3, 2, 1] scores = [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
last_prediction_prob = class_1_probabilities[-1] last_prediction_prob = class_1_probabilities[-1]
score = 0 score = None
print(f"Last prediction probability: {last_prediction_prob}") print(f"Last prediction probability: {last_prediction_prob}")
for threshold, value in zip(thresholds, scores): for threshold, value in zip(thresholds, scores):