bugfixing ai model

This commit is contained in:
MuslemRahimi 2024-09-30 14:23:09 +02:00
parent 3b70c93d28
commit 5a220c85dd
3 changed files with 78 additions and 51 deletions

View File

@ -22,7 +22,7 @@ import gc
gc.enable()
async def save_json(symbol, data):
with open(f"json/ai-score/{symbol}.json", 'wb') as file:
with open(f"json/ai-score/companies/{symbol}.json", 'wb') as file:
file.write(orjson.dumps(data))
@ -317,23 +317,35 @@ async def process_symbol(ticker, con, start_date, end_date):
split_size = int(len(df) * (1-test_size))
test_data = df.iloc[split_size:]
best_features = [col for col in df.columns if col not in ['date','price','Target']]
data, prediction_list = predictor.evaluate_model(test_data[best_features], test_data['Target'])
print(data)
'''
output_list = [{'date': date, 'price': price, 'prediction': prediction, 'target': target}
for (date, price,target), prediction in zip(test_data[['date', 'price','Target']].iloc[-6:].values, prediction_list[-6:])]
'''
#print(output_list)
data = predictor.evaluate_model(test_data[best_features], test_data['Target'])
if len(data) != 0:
if data['precision'] >= 50 and data['accuracy'] >= 50:
await save_json(ticker, data)
res = {'score': data['score']}
await save_json(ticker, res)
except Exception as e:
print(e)
async def chunked_gather(tickers, con, start_date, end_date, chunk_size=10):
# Helper function to divide the tickers into chunks
def chunks(lst, size):
for i in range(0, len(lst), size):
yield lst[i:i+size]
results = []
for chunk in chunks(tickers, chunk_size):
# Create tasks for each chunk
tasks = [download_data(ticker, con, start_date, end_date) for ticker in chunk]
# Await the results for the current chunk
chunk_results = await asyncio.gather(*tasks)
# Accumulate the results
results.extend(chunk_results)
return results
#Train mode
async def train_process(tickers, con):
tickers = list(set(tickers))
@ -345,8 +357,8 @@ async def train_process(tickers, con):
df_train = pd.DataFrame()
df_test = pd.DataFrame()
tasks = [download_data(ticker, con, start_date, end_date) for ticker in tickers]
dfs = await asyncio.gather(*tasks)
dfs = await chunked_gather(tickers, con, start_date, end_date, chunk_size=10)
for df in dfs:
try:
split_size = int(len(df) * (1-test_size))
@ -373,17 +385,6 @@ async def train_process(tickers, con):
predictor.train_model(df_train[selected_features], df_train['Target'])
predictor.evaluate_model(df_test[best_features], df_test['Target'])
async def test_process(con):
test_size = 0.2
start_date = datetime(1995, 1, 1).strftime("%Y-%m-%d")
end_date = datetime.today().strftime("%Y-%m-%d")
predictor = ScorePredictor()
df = await download_data('GME', con, start_date, end_date)
split_size = int(len(df) * (1-test_size))
test_data = df.iloc[split_size:]
selected_features = [col for col in test_data if col not in ['price','date','Target']]
predictor.evaluate_model(test_data[selected_features], test_data['Target'])
async def run():
@ -393,20 +394,21 @@ async def run():
cursor = con.cursor()
cursor.execute("PRAGMA journal_mode = wal")
cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 10E9 AND symbol NOT LIKE '%.%'")
stock_symbols = ['DHR','ABT','TXN','LIN','RIO','FCX','ECL','NVO','GOOGL','NFLX','SAP','UNH','JNJ','ABBV','MRK','PLD','NEE','DUK','AMT','EQIX','META','DOV','NWN','PG','PH','MMM','AWR','YYAI','PPSI','VYX','XP','BWXT','OLED','ROIC','NKE','LMT','PAYX','GME','AMD','AAPL','NVDA','PLTR'] #[row[0] for row in cursor.fetchall()]
cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 1E9 AND symbol NOT LIKE '%.%'")
stock_symbols = [row[0] for row in cursor.fetchall()] #['DHR','ABT','TXN','LIN','RIO','FCX','ECL','NVO','GOOGL','NFLX','SAP','UNH','JNJ','ABBV','MRK','PLD','NEE','DUK','AMT','EQIX','META','DOV','NWN','PG','PH','MMM','AWR','YYAI','PPSI','VYX','XP','BWXT','OLED','ROIC','NKE','LMT','PAYX','GME','AMD','AAPL','NVDA','PLTR']
stock_symbols = list(set(stock_symbols))
print('Number of Stocks')
print(len(stock_symbols))
#await train_process(stock_symbols, con)
await train_process(stock_symbols, con)
#Prediction Steps for all stock symbols
cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 1E9")
stock_symbols = [row[0] for row in cursor.fetchall()]
total_symbols = ['GME'] #stock_symbols
#cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 1E9")
#stock_symbols = [row[0] for row in cursor.fetchall()]
total_symbols = stock_symbols
print(f"Total tickers: {len(total_symbols)}")
start_date = datetime(1995, 1, 1).strftime("%Y-%m-%d")

View File

@ -41,12 +41,12 @@ class ScorePredictor:
inputs = Input(shape=(2139,))
# First dense layer
x = Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.01))(inputs)
x = Dense(2048, activation='relu', kernel_regularizer=regularizers.l2(0.01))(inputs)
x = Dropout(0.3)(x)
x = BatchNormalization()(x)
# Additional dense layers
for units in [512,256, 256]:
for units in [1024,512, 256, 256]:
x = Dense(units, activation='relu', kernel_regularizer=regularizers.l2(0.01))(x)
x = Dropout(0.2)(x)
x = BatchNormalization()(x)
@ -64,17 +64,17 @@ class ScorePredictor:
# Global average pooling
x = GlobalAveragePooling1D()(x)
# Output layer
outputs = Dense(1, activation='sigmoid')(x)
# Output layer (for class probabilities)
outputs = Dense(2, activation='softmax')(x) # Two neurons for class probabilities with softmax
# Create the model
model = Model(inputs=inputs, outputs=outputs)
# Optimizer with a lower learning rate
optimizer = Adam(learning_rate=0.1, clipnorm = 1.0)
optimizer = Adam(learning_rate=0.001, clipnorm=1.0)
# Compile the model
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
return model
@ -92,38 +92,63 @@ class ScorePredictor:
X_train = self.preprocess_data(X_train)
#X_train = self.reshape_for_lstm(X_train)
checkpoint = ModelCheckpoint('ml_models/weights/fundamental_weights/weights.keras',
checkpoint = ModelCheckpoint('ml_models/weights/ai-score/weights.keras',
save_best_only=True, save_freq = 1,
monitor='val_loss', mode='min')
early_stopping = EarlyStopping(monitor='val_loss', patience=70, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=60, min_lr=0.00001)
early_stopping = EarlyStopping(monitor='val_loss', patience=100, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=80, min_lr=0.00001)
self.model.fit(X_train, y_train, epochs=100_000, batch_size=32,
validation_split=0.1, callbacks=[checkpoint, early_stopping, reduce_lr])
self.model.save('ml_models/weights/fundamental_weights/weights.keras')
self.model.save('ml_models/weights/ai-score/weights.keras')
def evaluate_model(self, X_test, y_test):
# Preprocess the test data
X_test = self.preprocess_data(X_test)
X_test = self.reshape_for_lstm(X_test)
#X_test = self.reshape_for_lstm(X_test)
self.model = load_model('ml_models/weights/fundamental_weights/weights.keras')
# Load the trained model
self.model = load_model('ml_models/weights/ai-score/weights.keras')
test_predictions = self.model.predict(X_test).flatten()
# Get the model's predictions
test_predictions = self.model.predict(X_test)
#print(test_predictions)
test_predictions[test_predictions >= 0.5] = 1
test_predictions[test_predictions < 0.5] = 0
# Extract the probabilities for class 1 (index 1 in the softmax output)
class_1_probabilities = test_predictions[:, 1]
# Convert probabilities to binary predictions using a threshold of 0.5
binary_predictions = (class_1_probabilities >= 0.5).astype(int)
test_precision = precision_score(y_test, test_predictions)
test_accuracy = accuracy_score(y_test, test_predictions)
# Calculate precision and accuracy using binary predictions
test_precision = precision_score(y_test, binary_predictions)
test_accuracy = accuracy_score(y_test, binary_predictions)
print("Test Set Metrics:")
print(f"Precision: {round(test_precision * 100)}%")
print(f"Accuracy: {round(test_accuracy * 100)}%")
next_value_prediction = 1 if test_predictions[-1] >= 0.5 else 0
# Define thresholds and corresponding scores
thresholds = [0.8, 0.75, 0.7, 0.6, 0.5, 0.45, 0.4, 0.35, 0.3, 0.2]
scores = [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
# Get the last prediction value (class 1 probability) for scoring
last_prediction_prob = class_1_probabilities[-1]
# Initialize score to 0 (or any default value)
score = 0
#print(last_prediction_prob)
# Determine the score based on the last prediction probability
for threshold, value in zip(thresholds, scores):
if last_prediction_prob >= threshold:
score = value
break # Exit the loop once the score is determined
# Return the evaluation results
return {'accuracy': round(test_accuracy * 100),
'precision': round(test_precision * 100),
'sentiment': 'Bullish' if next_value_prediction == 1 else 'Bearish'}, test_predictions
'score': score}
def feature_selection(self, X_train, y_train, k=100):
print('feature selection:')