bugfixing ai model

This commit is contained in:
MuslemRahimi 2024-09-30 14:23:09 +02:00
parent 3b70c93d28
commit 5a220c85dd
3 changed files with 78 additions and 51 deletions

View File

@ -22,7 +22,7 @@ import gc
gc.enable() gc.enable()
async def save_json(symbol, data): async def save_json(symbol, data):
with open(f"json/ai-score/{symbol}.json", 'wb') as file: with open(f"json/ai-score/companies/{symbol}.json", 'wb') as file:
file.write(orjson.dumps(data)) file.write(orjson.dumps(data))
@ -317,23 +317,35 @@ async def process_symbol(ticker, con, start_date, end_date):
split_size = int(len(df) * (1-test_size)) split_size = int(len(df) * (1-test_size))
test_data = df.iloc[split_size:] test_data = df.iloc[split_size:]
best_features = [col for col in df.columns if col not in ['date','price','Target']] best_features = [col for col in df.columns if col not in ['date','price','Target']]
data, prediction_list = predictor.evaluate_model(test_data[best_features], test_data['Target']) data = predictor.evaluate_model(test_data[best_features], test_data['Target'])
print(data)
'''
output_list = [{'date': date, 'price': price, 'prediction': prediction, 'target': target}
for (date, price,target), prediction in zip(test_data[['date', 'price','Target']].iloc[-6:].values, prediction_list[-6:])]
'''
#print(output_list)
if len(data) != 0: if len(data) != 0:
if data['precision'] >= 50 and data['accuracy'] >= 50: if data['precision'] >= 50 and data['accuracy'] >= 50:
await save_json(ticker, data) res = {'score': data['score']}
await save_json(ticker, res)
except Exception as e: except Exception as e:
print(e) print(e)
async def chunked_gather(tickers, con, start_date, end_date, chunk_size=10):
# Helper function to divide the tickers into chunks
def chunks(lst, size):
for i in range(0, len(lst), size):
yield lst[i:i+size]
results = []
for chunk in chunks(tickers, chunk_size):
# Create tasks for each chunk
tasks = [download_data(ticker, con, start_date, end_date) for ticker in chunk]
# Await the results for the current chunk
chunk_results = await asyncio.gather(*tasks)
# Accumulate the results
results.extend(chunk_results)
return results
#Train mode #Train mode
async def train_process(tickers, con): async def train_process(tickers, con):
tickers = list(set(tickers)) tickers = list(set(tickers))
@ -345,8 +357,8 @@ async def train_process(tickers, con):
df_train = pd.DataFrame() df_train = pd.DataFrame()
df_test = pd.DataFrame() df_test = pd.DataFrame()
tasks = [download_data(ticker, con, start_date, end_date) for ticker in tickers] dfs = await chunked_gather(tickers, con, start_date, end_date, chunk_size=10)
dfs = await asyncio.gather(*tasks)
for df in dfs: for df in dfs:
try: try:
split_size = int(len(df) * (1-test_size)) split_size = int(len(df) * (1-test_size))
@ -373,17 +385,6 @@ async def train_process(tickers, con):
predictor.train_model(df_train[selected_features], df_train['Target']) predictor.train_model(df_train[selected_features], df_train['Target'])
predictor.evaluate_model(df_test[best_features], df_test['Target']) predictor.evaluate_model(df_test[best_features], df_test['Target'])
async def test_process(con):
test_size = 0.2
start_date = datetime(1995, 1, 1).strftime("%Y-%m-%d")
end_date = datetime.today().strftime("%Y-%m-%d")
predictor = ScorePredictor()
df = await download_data('GME', con, start_date, end_date)
split_size = int(len(df) * (1-test_size))
test_data = df.iloc[split_size:]
selected_features = [col for col in test_data if col not in ['price','date','Target']]
predictor.evaluate_model(test_data[selected_features], test_data['Target'])
async def run(): async def run():
@ -393,20 +394,21 @@ async def run():
cursor = con.cursor() cursor = con.cursor()
cursor.execute("PRAGMA journal_mode = wal") cursor.execute("PRAGMA journal_mode = wal")
cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 10E9 AND symbol NOT LIKE '%.%'") cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 1E9 AND symbol NOT LIKE '%.%'")
stock_symbols = ['DHR','ABT','TXN','LIN','RIO','FCX','ECL','NVO','GOOGL','NFLX','SAP','UNH','JNJ','ABBV','MRK','PLD','NEE','DUK','AMT','EQIX','META','DOV','NWN','PG','PH','MMM','AWR','YYAI','PPSI','VYX','XP','BWXT','OLED','ROIC','NKE','LMT','PAYX','GME','AMD','AAPL','NVDA','PLTR'] #[row[0] for row in cursor.fetchall()] stock_symbols = [row[0] for row in cursor.fetchall()] #['DHR','ABT','TXN','LIN','RIO','FCX','ECL','NVO','GOOGL','NFLX','SAP','UNH','JNJ','ABBV','MRK','PLD','NEE','DUK','AMT','EQIX','META','DOV','NWN','PG','PH','MMM','AWR','YYAI','PPSI','VYX','XP','BWXT','OLED','ROIC','NKE','LMT','PAYX','GME','AMD','AAPL','NVDA','PLTR']
stock_symbols = list(set(stock_symbols)) stock_symbols = list(set(stock_symbols))
print('Number of Stocks') print('Number of Stocks')
print(len(stock_symbols)) print(len(stock_symbols))
#await train_process(stock_symbols, con) await train_process(stock_symbols, con)
#Prediction Steps for all stock symbols #Prediction Steps for all stock symbols
#cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 1E9")
cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 1E9") #stock_symbols = [row[0] for row in cursor.fetchall()]
stock_symbols = [row[0] for row in cursor.fetchall()] total_symbols = stock_symbols
total_symbols = ['GME'] #stock_symbols
print(f"Total tickers: {len(total_symbols)}") print(f"Total tickers: {len(total_symbols)}")
start_date = datetime(1995, 1, 1).strftime("%Y-%m-%d") start_date = datetime(1995, 1, 1).strftime("%Y-%m-%d")

View File

@ -41,12 +41,12 @@ class ScorePredictor:
inputs = Input(shape=(2139,)) inputs = Input(shape=(2139,))
# First dense layer # First dense layer
x = Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.01))(inputs) x = Dense(2048, activation='relu', kernel_regularizer=regularizers.l2(0.01))(inputs)
x = Dropout(0.3)(x) x = Dropout(0.3)(x)
x = BatchNormalization()(x) x = BatchNormalization()(x)
# Additional dense layers # Additional dense layers
for units in [512,256, 256]: for units in [1024,512, 256, 256]:
x = Dense(units, activation='relu', kernel_regularizer=regularizers.l2(0.01))(x) x = Dense(units, activation='relu', kernel_regularizer=regularizers.l2(0.01))(x)
x = Dropout(0.2)(x) x = Dropout(0.2)(x)
x = BatchNormalization()(x) x = BatchNormalization()(x)
@ -64,17 +64,17 @@ class ScorePredictor:
# Global average pooling # Global average pooling
x = GlobalAveragePooling1D()(x) x = GlobalAveragePooling1D()(x)
# Output layer # Output layer (for class probabilities)
outputs = Dense(1, activation='sigmoid')(x) outputs = Dense(2, activation='softmax')(x) # Two neurons for class probabilities with softmax
# Create the model # Create the model
model = Model(inputs=inputs, outputs=outputs) model = Model(inputs=inputs, outputs=outputs)
# Optimizer with a lower learning rate # Optimizer with a lower learning rate
optimizer = Adam(learning_rate=0.1, clipnorm = 1.0) optimizer = Adam(learning_rate=0.001, clipnorm=1.0)
# Compile the model # Compile the model
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy']) model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
return model return model
@ -92,38 +92,63 @@ class ScorePredictor:
X_train = self.preprocess_data(X_train) X_train = self.preprocess_data(X_train)
#X_train = self.reshape_for_lstm(X_train) #X_train = self.reshape_for_lstm(X_train)
checkpoint = ModelCheckpoint('ml_models/weights/fundamental_weights/weights.keras', checkpoint = ModelCheckpoint('ml_models/weights/ai-score/weights.keras',
save_best_only=True, save_freq = 1, save_best_only=True, save_freq = 1,
monitor='val_loss', mode='min') monitor='val_loss', mode='min')
early_stopping = EarlyStopping(monitor='val_loss', patience=70, restore_best_weights=True) early_stopping = EarlyStopping(monitor='val_loss', patience=100, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=60, min_lr=0.00001) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=80, min_lr=0.00001)
self.model.fit(X_train, y_train, epochs=100_000, batch_size=32, self.model.fit(X_train, y_train, epochs=100_000, batch_size=32,
validation_split=0.1, callbacks=[checkpoint, early_stopping, reduce_lr]) validation_split=0.1, callbacks=[checkpoint, early_stopping, reduce_lr])
self.model.save('ml_models/weights/fundamental_weights/weights.keras') self.model.save('ml_models/weights/ai-score/weights.keras')
def evaluate_model(self, X_test, y_test): def evaluate_model(self, X_test, y_test):
# Preprocess the test data
X_test = self.preprocess_data(X_test) X_test = self.preprocess_data(X_test)
X_test = self.reshape_for_lstm(X_test) #X_test = self.reshape_for_lstm(X_test)
self.model = load_model('ml_models/weights/fundamental_weights/weights.keras') # Load the trained model
self.model = load_model('ml_models/weights/ai-score/weights.keras')
test_predictions = self.model.predict(X_test).flatten() # Get the model's predictions
test_predictions = self.model.predict(X_test)
#print(test_predictions)
test_predictions[test_predictions >= 0.5] = 1 # Extract the probabilities for class 1 (index 1 in the softmax output)
test_predictions[test_predictions < 0.5] = 0 class_1_probabilities = test_predictions[:, 1]
# Convert probabilities to binary predictions using a threshold of 0.5
binary_predictions = (class_1_probabilities >= 0.5).astype(int)
test_precision = precision_score(y_test, test_predictions) # Calculate precision and accuracy using binary predictions
test_accuracy = accuracy_score(y_test, test_predictions) test_precision = precision_score(y_test, binary_predictions)
test_accuracy = accuracy_score(y_test, binary_predictions)
print("Test Set Metrics:") print("Test Set Metrics:")
print(f"Precision: {round(test_precision * 100)}%") print(f"Precision: {round(test_precision * 100)}%")
print(f"Accuracy: {round(test_accuracy * 100)}%") print(f"Accuracy: {round(test_accuracy * 100)}%")
next_value_prediction = 1 if test_predictions[-1] >= 0.5 else 0 # Define thresholds and corresponding scores
thresholds = [0.8, 0.75, 0.7, 0.6, 0.5, 0.45, 0.4, 0.35, 0.3, 0.2]
scores = [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
# Get the last prediction value (class 1 probability) for scoring
last_prediction_prob = class_1_probabilities[-1]
# Initialize score to 0 (or any default value)
score = 0
#print(last_prediction_prob)
# Determine the score based on the last prediction probability
for threshold, value in zip(thresholds, scores):
if last_prediction_prob >= threshold:
score = value
break # Exit the loop once the score is determined
# Return the evaluation results
return {'accuracy': round(test_accuracy * 100), return {'accuracy': round(test_accuracy * 100),
'precision': round(test_precision * 100), 'precision': round(test_precision * 100),
'sentiment': 'Bullish' if next_value_prediction == 1 else 'Bearish'}, test_predictions 'score': score}
def feature_selection(self, X_train, y_train, k=100): def feature_selection(self, X_train, y_train, k=100):
print('feature selection:') print('feature selection:')