modify model
This commit is contained in:
parent
d0b5cd5aaa
commit
c8159047f0
@ -46,13 +46,13 @@ async def download_data(ticker, con, start_date, end_date):
|
|||||||
statements = [
|
statements = [
|
||||||
f"json/financial-statements/ratios/quarter/{ticker}.json",
|
f"json/financial-statements/ratios/quarter/{ticker}.json",
|
||||||
f"json/financial-statements/key-metrics/quarter/{ticker}.json",
|
f"json/financial-statements/key-metrics/quarter/{ticker}.json",
|
||||||
f"json/financial-statements/cash-flow-statement/quarter/{ticker}.json",
|
#f"json/financial-statements/cash-flow-statement/quarter/{ticker}.json",
|
||||||
f"json/financial-statements/income-statement/quarter/{ticker}.json",
|
#f"json/financial-statements/income-statement/quarter/{ticker}.json",
|
||||||
f"json/financial-statements/balance-sheet-statement/quarter/{ticker}.json",
|
#f"json/financial-statements/balance-sheet-statement/quarter/{ticker}.json",
|
||||||
f"json/financial-statements/income-statement-growth/quarter/{ticker}.json",
|
f"json/financial-statements/income-statement-growth/quarter/{ticker}.json",
|
||||||
f"json/financial-statements/balance-sheet-statement-growth/quarter/{ticker}.json",
|
f"json/financial-statements/balance-sheet-statement-growth/quarter/{ticker}.json",
|
||||||
f"json/financial-statements/cash-flow-statement-growth/quarter/{ticker}.json",
|
f"json/financial-statements/cash-flow-statement-growth/quarter/{ticker}.json",
|
||||||
f"json/financial-statements/owner-earnings/quarter/{ticker}.json",
|
#f"json/financial-statements/owner-earnings/quarter/{ticker}.json",
|
||||||
]
|
]
|
||||||
|
|
||||||
# Helper function to load JSON data asynchronously
|
# Helper function to load JSON data asynchronously
|
||||||
@ -81,34 +81,34 @@ async def download_data(ticker, con, start_date, end_date):
|
|||||||
key_metrics = await filter_data(key_metrics, ignore_keys)
|
key_metrics = await filter_data(key_metrics, ignore_keys)
|
||||||
|
|
||||||
|
|
||||||
cashflow = await load_json_from_file(statements[2])
|
#cashflow = await load_json_from_file(statements[2])
|
||||||
cashflow = await filter_data(cashflow, ignore_keys)
|
#cashflow = await filter_data(cashflow, ignore_keys)
|
||||||
|
|
||||||
income = await load_json_from_file(statements[3])
|
#income = await load_json_from_file(statements[3])
|
||||||
income = await filter_data(income, ignore_keys)
|
#income = await filter_data(income, ignore_keys)
|
||||||
|
|
||||||
balance = await load_json_from_file(statements[4])
|
#balance = await load_json_from_file(statements[4])
|
||||||
balance = await filter_data(balance, ignore_keys)
|
#balance = await filter_data(balance, ignore_keys)
|
||||||
|
|
||||||
income_growth = await load_json_from_file(statements[5])
|
income_growth = await load_json_from_file(statements[2])
|
||||||
income_growth = await filter_data(income_growth, ignore_keys)
|
income_growth = await filter_data(income_growth, ignore_keys)
|
||||||
|
|
||||||
balance_growth = await load_json_from_file(statements[6])
|
balance_growth = await load_json_from_file(statements[3])
|
||||||
balance_growth = await filter_data(balance_growth, ignore_keys)
|
balance_growth = await filter_data(balance_growth, ignore_keys)
|
||||||
|
|
||||||
|
|
||||||
cashflow_growth = await load_json_from_file(statements[7])
|
cashflow_growth = await load_json_from_file(statements[4])
|
||||||
cashflow_growth = await filter_data(cashflow_growth, ignore_keys)
|
cashflow_growth = await filter_data(cashflow_growth, ignore_keys)
|
||||||
|
|
||||||
owner_earnings = await load_json_from_file(statements[8])
|
#owner_earnings = await load_json_from_file(statements[8])
|
||||||
owner_earnings = await filter_data(owner_earnings, ignore_keys)
|
#owner_earnings = await filter_data(owner_earnings, ignore_keys)
|
||||||
|
|
||||||
|
|
||||||
# Combine all the data
|
# Combine all the data
|
||||||
combined_data = defaultdict(dict)
|
combined_data = defaultdict(dict)
|
||||||
|
|
||||||
# Merge the data based on 'date'
|
# Merge the data based on 'date'
|
||||||
for entries in zip(ratios, key_metrics, cashflow, income, balance, income_growth, balance_growth, cashflow_growth, owner_earnings):
|
for entries in zip(ratios, key_metrics,income_growth, balance_growth, cashflow_growth):
|
||||||
for entry in entries:
|
for entry in entries:
|
||||||
date = entry['date']
|
date = entry['date']
|
||||||
for key, value in entry.items():
|
for key, value in entry.items():
|
||||||
@ -141,8 +141,8 @@ async def download_data(ticker, con, start_date, end_date):
|
|||||||
df['daily_return'] = df['close'].pct_change()
|
df['daily_return'] = df['close'].pct_change()
|
||||||
df['cumulative_return'] = (1 + df['daily_return']).cumprod() - 1
|
df['cumulative_return'] = (1 + df['daily_return']).cumprod() - 1
|
||||||
df['volume_change'] = df['volume'].pct_change()
|
df['volume_change'] = df['volume'].pct_change()
|
||||||
df['roc'] = df['close'].pct_change(periods=30) * 100 # 12-day ROC
|
df['roc'] = df['close'].pct_change(periods=60)
|
||||||
df['avg_volume_30d'] = df['volume'].rolling(window=30).mean()
|
df['avg_volume'] = df['volume'].rolling(window=60).mean()
|
||||||
df['drawdown'] = df['close'] / df['close'].rolling(window=252).max() - 1
|
df['drawdown'] = df['close'] / df['close'].rolling(window=252).max() - 1
|
||||||
|
|
||||||
|
|
||||||
@ -159,9 +159,9 @@ async def download_data(ticker, con, start_date, end_date):
|
|||||||
df['obv'] = OnBalanceVolumeIndicator(close=df['close'], volume=df['volume']).on_balance_volume()
|
df['obv'] = OnBalanceVolumeIndicator(close=df['close'], volume=df['volume']).on_balance_volume()
|
||||||
df['vpt'] = VolumePriceTrendIndicator(close=df['close'], volume=df['volume']).volume_price_trend()
|
df['vpt'] = VolumePriceTrendIndicator(close=df['close'], volume=df['volume']).volume_price_trend()
|
||||||
|
|
||||||
df['rsi'] = rsi(df["close"], window=30)
|
df['rsi'] = rsi(df["close"], window=60)
|
||||||
df['rolling_rsi'] = df['rsi'].rolling(window=10).mean()
|
df['rolling_rsi'] = df['rsi'].rolling(window=10).mean()
|
||||||
df['stoch_rsi'] = stochrsi_k(df['close'], window=30, smooth1=3, smooth2=3)
|
df['stoch_rsi'] = stochrsi_k(df['close'], window=60, smooth1=3, smooth2=3)
|
||||||
df['rolling_stoch_rsi'] = df['stoch_rsi'].rolling(window=10).mean()
|
df['rolling_stoch_rsi'] = df['stoch_rsi'].rolling(window=10).mean()
|
||||||
|
|
||||||
df['adi'] = acc_dist_index(high=df['high'],low=df['low'],close=df['close'],volume=df['volume'])
|
df['adi'] = acc_dist_index(high=df['high'],low=df['low'],close=df['close'],volume=df['volume'])
|
||||||
@ -186,7 +186,7 @@ async def download_data(ticker, con, start_date, end_date):
|
|||||||
'rsi', 'macd', 'macd_signal', 'macd_hist', 'adx', 'adx_pos', 'adx_neg',
|
'rsi', 'macd', 'macd_signal', 'macd_hist', 'adx', 'adx_pos', 'adx_neg',
|
||||||
'cci', 'mfi', 'nvi', 'obv', 'vpt', 'stoch_rsi','bb_width',
|
'cci', 'mfi', 'nvi', 'obv', 'vpt', 'stoch_rsi','bb_width',
|
||||||
'adi', 'cmf', 'emv', 'fi', 'williams', 'stoch','sma_crossover',
|
'adi', 'cmf', 'emv', 'fi', 'williams', 'stoch','sma_crossover',
|
||||||
'volatility','daily_return','cumulative_return', 'roc','avg_volume_30d',
|
'volatility','daily_return','cumulative_return', 'roc','avg_volume',
|
||||||
'rolling_rsi','rolling_stoch_rsi', 'ema_crossover','ichimoku_a','ichimoku_b',
|
'rolling_rsi','rolling_stoch_rsi', 'ema_crossover','ichimoku_a','ichimoku_b',
|
||||||
'atr','kama','rocr','ppo','volatility_ratio','vwap','tii','fdi','drawdown',
|
'atr','kama','rocr','ppo','volatility_ratio','vwap','tii','fdi','drawdown',
|
||||||
'volume_change'
|
'volume_change'
|
||||||
@ -236,7 +236,6 @@ async def download_data(ticker, con, start_date, end_date):
|
|||||||
'freeCashFlow',
|
'freeCashFlow',
|
||||||
'incomeBeforeTax',
|
'incomeBeforeTax',
|
||||||
'incomeTaxExpense',
|
'incomeTaxExpense',
|
||||||
'epsdiluted',
|
|
||||||
'debtRepayment',
|
'debtRepayment',
|
||||||
'dividendsPaid',
|
'dividendsPaid',
|
||||||
'depreciationAndAmortization',
|
'depreciationAndAmortization',
|
||||||
@ -345,7 +344,8 @@ async def warm_start_training(tickers, con):
|
|||||||
predictor = ScorePredictor()
|
predictor = ScorePredictor()
|
||||||
selected_features = [col for col in df_train if col not in ['price', 'date', 'Target']]
|
selected_features = [col for col in df_train if col not in ['price', 'date', 'Target']]
|
||||||
predictor.warm_start_training(df_train[selected_features], df_train['Target'])
|
predictor.warm_start_training(df_train[selected_features], df_train['Target'])
|
||||||
|
predictor.evaluate_model(df_train[selected_features], df_train['Target'])
|
||||||
|
|
||||||
return predictor
|
return predictor
|
||||||
|
|
||||||
async def fine_tune_and_evaluate(ticker, con, start_date, end_date):
|
async def fine_tune_and_evaluate(ticker, con, start_date, end_date):
|
||||||
@ -373,25 +373,30 @@ async def fine_tune_and_evaluate(ticker, con, start_date, end_date):
|
|||||||
res = {'score': data['score']}
|
res = {'score': data['score']}
|
||||||
await save_json(ticker, res)
|
await save_json(ticker, res)
|
||||||
print(f"Saved results for {ticker}")
|
print(f"Saved results for {ticker}")
|
||||||
|
gc.collect()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error processing {ticker}: {e}")
|
print(f"Error processing {ticker}: {e}")
|
||||||
|
finally:
|
||||||
|
# Ensure any remaining cleanup if necessary
|
||||||
|
if 'predictor' in locals():
|
||||||
|
del predictor # Explicitly delete the predictor to aid garbage collection
|
||||||
|
|
||||||
async def run():
|
async def run():
|
||||||
train_mode = True # Set this to False for fine-tuning and evaluation
|
train_mode = False # Set this to False for fine-tuning and evaluation
|
||||||
con = sqlite3.connect('stocks.db')
|
con = sqlite3.connect('stocks.db')
|
||||||
cursor = con.cursor()
|
cursor = con.cursor()
|
||||||
cursor.execute("PRAGMA journal_mode = wal")
|
cursor.execute("PRAGMA journal_mode = wal")
|
||||||
|
|
||||||
if train_mode:
|
if train_mode:
|
||||||
# Warm start training
|
# Warm start training
|
||||||
warm_start_symbols = ['META', 'NFLX','GOOG','TSLA','AWR','AMD','NVDA']
|
cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 10E9 AND symbol NOT LIKE '%.%' AND symbol NOT LIKE '%-%'")
|
||||||
|
warm_start_symbols = [row[0] for row in cursor.fetchall()]
|
||||||
print('Warm Start Training for:', warm_start_symbols)
|
print('Warm Start Training for:', warm_start_symbols)
|
||||||
predictor = await warm_start_training(warm_start_symbols, con)
|
predictor = await warm_start_training(warm_start_symbols, con)
|
||||||
else:
|
else:
|
||||||
# Fine-tuning and evaluation for all stocks
|
# Fine-tuning and evaluation for all stocks
|
||||||
cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 1E9 AND symbol NOT LIKE '%.%'")
|
cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 1E9 AND symbol NOT LIKE '%.%'")
|
||||||
stock_symbols = ['NVDA'] #[row[0] for row in cursor.fetchall()]
|
stock_symbols = [row[0] for row in cursor.fetchall()]
|
||||||
|
|
||||||
print(f"Total tickers for fine-tuning: {len(stock_symbols)}")
|
print(f"Total tickers for fine-tuning: {len(stock_symbols)}")
|
||||||
start_date = datetime(1995, 1, 1).strftime("%Y-%m-%d")
|
start_date = datetime(1995, 1, 1).strftime("%Y-%m-%d")
|
||||||
|
|||||||
Binary file not shown.
@ -8,7 +8,7 @@ from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_sco
|
|||||||
from sklearn.model_selection import train_test_split
|
from sklearn.model_selection import train_test_split
|
||||||
from sklearn.preprocessing import MinMaxScaler, StandardScaler
|
from sklearn.preprocessing import MinMaxScaler, StandardScaler
|
||||||
from keras.models import Sequential, Model
|
from keras.models import Sequential, Model
|
||||||
from keras.layers import Input, Multiply, Reshape, LSTM, Dense, Conv1D, Dropout, BatchNormalization, GlobalAveragePooling1D, MaxPooling1D, Bidirectional
|
from keras.layers import Input, Multiply, Reshape, LSTM, Dense, Dropout, BatchNormalization, GlobalAveragePooling1D, MaxPooling1D, Bidirectional
|
||||||
from keras.optimizers import Adam
|
from keras.optimizers import Adam
|
||||||
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
|
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
|
||||||
from keras.models import load_model
|
from keras.models import load_model
|
||||||
@ -62,17 +62,18 @@ class ScorePredictor:
|
|||||||
def build_model(self):
|
def build_model(self):
|
||||||
clear_session()
|
clear_session()
|
||||||
|
|
||||||
inputs = Input(shape=(335,))
|
inputs = Input(shape=(231,))
|
||||||
x = Dense(512, activation='elu')(inputs)
|
|
||||||
x = Dropout(0.2)(x)
|
x = Dense(128, activation='leaky_relu')(inputs)
|
||||||
x = BatchNormalization()(x)
|
x = BatchNormalization()(x)
|
||||||
|
x = Dropout(0.2)(x)
|
||||||
|
|
||||||
for units in [64, 32]:
|
for units in [64,32,16]:
|
||||||
x = Dense(units, activation='elu')(x)
|
x = Dense(units, activation='leaky_relu')(x)
|
||||||
x = Dropout(0.2)(x)
|
|
||||||
x = BatchNormalization()(x)
|
x = BatchNormalization()(x)
|
||||||
|
x = Dropout(0.2)(x)
|
||||||
|
|
||||||
x = Reshape((32, 1))(x)
|
x = Reshape((16, 1))(x)
|
||||||
x, _ = SelfAttention()(x)
|
x, _ = SelfAttention()(x)
|
||||||
outputs = Dense(2, activation='softmax')(x)
|
outputs = Dense(2, activation='softmax')(x)
|
||||||
|
|
||||||
@ -93,8 +94,8 @@ class ScorePredictor:
|
|||||||
self.model = self.build_model()
|
self.model = self.build_model()
|
||||||
|
|
||||||
checkpoint = ModelCheckpoint(self.warm_start_model_path, save_best_only=True, save_freq=1, monitor='val_loss', mode='min')
|
checkpoint = ModelCheckpoint(self.warm_start_model_path, save_best_only=True, save_freq=1, monitor='val_loss', mode='min')
|
||||||
early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
|
early_stopping = EarlyStopping(monitor='val_loss', patience=50, restore_best_weights=True)
|
||||||
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=0.001)
|
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=30, min_lr=0.001)
|
||||||
|
|
||||||
self.model.fit(X_train, y_train, epochs=100_000, batch_size=32, validation_split=0.1, callbacks=[checkpoint, early_stopping, reduce_lr])
|
self.model.fit(X_train, y_train, epochs=100_000, batch_size=32, validation_split=0.1, callbacks=[checkpoint, early_stopping, reduce_lr])
|
||||||
self.model.save(self.warm_start_model_path)
|
self.model.save(self.warm_start_model_path)
|
||||||
@ -102,15 +103,17 @@ class ScorePredictor:
|
|||||||
|
|
||||||
def fine_tune_model(self, X_train, y_train):
|
def fine_tune_model(self, X_train, y_train):
|
||||||
X_train = self.preprocess_data(X_train)
|
X_train = self.preprocess_data(X_train)
|
||||||
|
#batch_size = min(64, max(16, len(X_train) // 10))
|
||||||
|
|
||||||
if self.model is None:
|
if self.model is None:
|
||||||
self.model = load_model(self.warm_start_model_path, custom_objects={'SelfAttention': SelfAttention})
|
self.model = load_model(self.warm_start_model_path, custom_objects={'SelfAttention': SelfAttention})
|
||||||
|
|
||||||
early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
|
#early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
|
||||||
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=0.0001)
|
#reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=0.01)
|
||||||
|
|
||||||
|
self.model.fit(X_train, y_train, epochs=150, batch_size=16, validation_split=0.1)
|
||||||
|
print("Model fine-tuned")
|
||||||
|
|
||||||
self.model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.1, callbacks=[early_stopping, reduce_lr])
|
|
||||||
print("Model fine-tuned (not saved).")
|
|
||||||
|
|
||||||
def evaluate_model(self, X_test, y_test):
|
def evaluate_model(self, X_test, y_test):
|
||||||
X_test = self.preprocess_data(X_test)
|
X_test = self.preprocess_data(X_test)
|
||||||
@ -121,19 +124,19 @@ class ScorePredictor:
|
|||||||
test_predictions = self.model.predict(X_test)
|
test_predictions = self.model.predict(X_test)
|
||||||
class_1_probabilities = test_predictions[:, 1]
|
class_1_probabilities = test_predictions[:, 1]
|
||||||
binary_predictions = (class_1_probabilities >= 0.5).astype(int)
|
binary_predictions = (class_1_probabilities >= 0.5).astype(int)
|
||||||
print(test_predictions)
|
#print(test_predictions)
|
||||||
test_precision = precision_score(y_test, binary_predictions)
|
test_precision = precision_score(y_test, binary_predictions)
|
||||||
test_accuracy = accuracy_score(y_test, binary_predictions)
|
test_accuracy = accuracy_score(y_test, binary_predictions)
|
||||||
|
|
||||||
print("Test Set Metrics:")
|
print("Test Set Metrics:")
|
||||||
print(f"Precision: {round(test_precision * 100)}%")
|
print(f"Precision: {round(test_precision * 100)}%")
|
||||||
print(f"Accuracy: {round(test_accuracy * 100)}%")
|
print(f"Accuracy: {round(test_accuracy * 100)}%")
|
||||||
|
print(pd.DataFrame({'y_test': y_test, 'y_pred': binary_predictions}))
|
||||||
thresholds = [0.8, 0.75, 0.7, 0.6, 0.5, 0.45, 0.4, 0.35, 0.3, 0.2]
|
thresholds = [0.8, 0.75, 0.7, 0.6, 0.5, 0.45, 0.4, 0.35, 0.3, 0]
|
||||||
scores = [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
|
scores = [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
|
||||||
|
|
||||||
last_prediction_prob = class_1_probabilities[-1]
|
last_prediction_prob = class_1_probabilities[-1]
|
||||||
score = 0
|
score = None
|
||||||
print(f"Last prediction probability: {last_prediction_prob}")
|
print(f"Last prediction probability: {last_prediction_prob}")
|
||||||
|
|
||||||
for threshold, value in zip(thresholds, scores):
|
for threshold, value in zip(thresholds, scores):
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user