bugfixing ai model
This commit is contained in:
parent
3b70c93d28
commit
5a220c85dd
@ -22,7 +22,7 @@ import gc
|
||||
gc.enable()
|
||||
|
||||
async def save_json(symbol, data):
|
||||
with open(f"json/ai-score/{symbol}.json", 'wb') as file:
|
||||
with open(f"json/ai-score/companies/{symbol}.json", 'wb') as file:
|
||||
file.write(orjson.dumps(data))
|
||||
|
||||
|
||||
@ -317,23 +317,35 @@ async def process_symbol(ticker, con, start_date, end_date):
|
||||
split_size = int(len(df) * (1-test_size))
|
||||
test_data = df.iloc[split_size:]
|
||||
best_features = [col for col in df.columns if col not in ['date','price','Target']]
|
||||
data, prediction_list = predictor.evaluate_model(test_data[best_features], test_data['Target'])
|
||||
|
||||
print(data)
|
||||
'''
|
||||
output_list = [{'date': date, 'price': price, 'prediction': prediction, 'target': target}
|
||||
for (date, price,target), prediction in zip(test_data[['date', 'price','Target']].iloc[-6:].values, prediction_list[-6:])]
|
||||
'''
|
||||
#print(output_list)
|
||||
data = predictor.evaluate_model(test_data[best_features], test_data['Target'])
|
||||
|
||||
if len(data) != 0:
|
||||
if data['precision'] >= 50 and data['accuracy'] >= 50:
|
||||
await save_json(ticker, data)
|
||||
res = {'score': data['score']}
|
||||
await save_json(ticker, res)
|
||||
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
|
||||
async def chunked_gather(tickers, con, start_date, end_date, chunk_size=10):
|
||||
# Helper function to divide the tickers into chunks
|
||||
def chunks(lst, size):
|
||||
for i in range(0, len(lst), size):
|
||||
yield lst[i:i+size]
|
||||
|
||||
results = []
|
||||
|
||||
for chunk in chunks(tickers, chunk_size):
|
||||
# Create tasks for each chunk
|
||||
tasks = [download_data(ticker, con, start_date, end_date) for ticker in chunk]
|
||||
# Await the results for the current chunk
|
||||
chunk_results = await asyncio.gather(*tasks)
|
||||
# Accumulate the results
|
||||
results.extend(chunk_results)
|
||||
|
||||
return results
|
||||
|
||||
#Train mode
|
||||
async def train_process(tickers, con):
|
||||
tickers = list(set(tickers))
|
||||
@ -345,8 +357,8 @@ async def train_process(tickers, con):
|
||||
df_train = pd.DataFrame()
|
||||
df_test = pd.DataFrame()
|
||||
|
||||
tasks = [download_data(ticker, con, start_date, end_date) for ticker in tickers]
|
||||
dfs = await asyncio.gather(*tasks)
|
||||
dfs = await chunked_gather(tickers, con, start_date, end_date, chunk_size=10)
|
||||
|
||||
for df in dfs:
|
||||
try:
|
||||
split_size = int(len(df) * (1-test_size))
|
||||
@ -373,17 +385,6 @@ async def train_process(tickers, con):
|
||||
predictor.train_model(df_train[selected_features], df_train['Target'])
|
||||
predictor.evaluate_model(df_test[best_features], df_test['Target'])
|
||||
|
||||
async def test_process(con):
|
||||
test_size = 0.2
|
||||
start_date = datetime(1995, 1, 1).strftime("%Y-%m-%d")
|
||||
end_date = datetime.today().strftime("%Y-%m-%d")
|
||||
predictor = ScorePredictor()
|
||||
df = await download_data('GME', con, start_date, end_date)
|
||||
split_size = int(len(df) * (1-test_size))
|
||||
test_data = df.iloc[split_size:]
|
||||
selected_features = [col for col in test_data if col not in ['price','date','Target']]
|
||||
predictor.evaluate_model(test_data[selected_features], test_data['Target'])
|
||||
|
||||
|
||||
async def run():
|
||||
|
||||
@ -393,20 +394,21 @@ async def run():
|
||||
|
||||
cursor = con.cursor()
|
||||
cursor.execute("PRAGMA journal_mode = wal")
|
||||
cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 10E9 AND symbol NOT LIKE '%.%'")
|
||||
stock_symbols = ['DHR','ABT','TXN','LIN','RIO','FCX','ECL','NVO','GOOGL','NFLX','SAP','UNH','JNJ','ABBV','MRK','PLD','NEE','DUK','AMT','EQIX','META','DOV','NWN','PG','PH','MMM','AWR','YYAI','PPSI','VYX','XP','BWXT','OLED','ROIC','NKE','LMT','PAYX','GME','AMD','AAPL','NVDA','PLTR'] #[row[0] for row in cursor.fetchall()]
|
||||
cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 1E9 AND symbol NOT LIKE '%.%'")
|
||||
stock_symbols = [row[0] for row in cursor.fetchall()] #['DHR','ABT','TXN','LIN','RIO','FCX','ECL','NVO','GOOGL','NFLX','SAP','UNH','JNJ','ABBV','MRK','PLD','NEE','DUK','AMT','EQIX','META','DOV','NWN','PG','PH','MMM','AWR','YYAI','PPSI','VYX','XP','BWXT','OLED','ROIC','NKE','LMT','PAYX','GME','AMD','AAPL','NVDA','PLTR']
|
||||
stock_symbols = list(set(stock_symbols))
|
||||
print('Number of Stocks')
|
||||
print(len(stock_symbols))
|
||||
#await train_process(stock_symbols, con)
|
||||
await train_process(stock_symbols, con)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#Prediction Steps for all stock symbols
|
||||
|
||||
cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 1E9")
|
||||
stock_symbols = [row[0] for row in cursor.fetchall()]
|
||||
|
||||
total_symbols = ['GME'] #stock_symbols
|
||||
#cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 1E9")
|
||||
#stock_symbols = [row[0] for row in cursor.fetchall()]
|
||||
total_symbols = stock_symbols
|
||||
|
||||
print(f"Total tickers: {len(total_symbols)}")
|
||||
start_date = datetime(1995, 1, 1).strftime("%Y-%m-%d")
|
||||
|
||||
Binary file not shown.
@ -41,12 +41,12 @@ class ScorePredictor:
|
||||
inputs = Input(shape=(2139,))
|
||||
|
||||
# First dense layer
|
||||
x = Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.01))(inputs)
|
||||
x = Dense(2048, activation='relu', kernel_regularizer=regularizers.l2(0.01))(inputs)
|
||||
x = Dropout(0.3)(x)
|
||||
x = BatchNormalization()(x)
|
||||
|
||||
# Additional dense layers
|
||||
for units in [512,256, 256]:
|
||||
for units in [1024,512, 256, 256]:
|
||||
x = Dense(units, activation='relu', kernel_regularizer=regularizers.l2(0.01))(x)
|
||||
x = Dropout(0.2)(x)
|
||||
x = BatchNormalization()(x)
|
||||
@ -64,17 +64,17 @@ class ScorePredictor:
|
||||
# Global average pooling
|
||||
x = GlobalAveragePooling1D()(x)
|
||||
|
||||
# Output layer
|
||||
outputs = Dense(1, activation='sigmoid')(x)
|
||||
# Output layer (for class probabilities)
|
||||
outputs = Dense(2, activation='softmax')(x) # Two neurons for class probabilities with softmax
|
||||
|
||||
# Create the model
|
||||
model = Model(inputs=inputs, outputs=outputs)
|
||||
|
||||
# Optimizer with a lower learning rate
|
||||
optimizer = Adam(learning_rate=0.1, clipnorm = 1.0)
|
||||
optimizer = Adam(learning_rate=0.001, clipnorm=1.0)
|
||||
|
||||
# Compile the model
|
||||
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
|
||||
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
|
||||
|
||||
return model
|
||||
|
||||
@ -92,38 +92,63 @@ class ScorePredictor:
|
||||
X_train = self.preprocess_data(X_train)
|
||||
#X_train = self.reshape_for_lstm(X_train)
|
||||
|
||||
checkpoint = ModelCheckpoint('ml_models/weights/fundamental_weights/weights.keras',
|
||||
checkpoint = ModelCheckpoint('ml_models/weights/ai-score/weights.keras',
|
||||
save_best_only=True, save_freq = 1,
|
||||
monitor='val_loss', mode='min')
|
||||
early_stopping = EarlyStopping(monitor='val_loss', patience=70, restore_best_weights=True)
|
||||
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=60, min_lr=0.00001)
|
||||
early_stopping = EarlyStopping(monitor='val_loss', patience=100, restore_best_weights=True)
|
||||
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=80, min_lr=0.00001)
|
||||
|
||||
self.model.fit(X_train, y_train, epochs=100_000, batch_size=32,
|
||||
validation_split=0.1, callbacks=[checkpoint, early_stopping, reduce_lr])
|
||||
self.model.save('ml_models/weights/fundamental_weights/weights.keras')
|
||||
self.model.save('ml_models/weights/ai-score/weights.keras')
|
||||
|
||||
def evaluate_model(self, X_test, y_test):
|
||||
# Preprocess the test data
|
||||
X_test = self.preprocess_data(X_test)
|
||||
X_test = self.reshape_for_lstm(X_test)
|
||||
#X_test = self.reshape_for_lstm(X_test)
|
||||
|
||||
self.model = load_model('ml_models/weights/fundamental_weights/weights.keras')
|
||||
# Load the trained model
|
||||
self.model = load_model('ml_models/weights/ai-score/weights.keras')
|
||||
|
||||
test_predictions = self.model.predict(X_test).flatten()
|
||||
# Get the model's predictions
|
||||
test_predictions = self.model.predict(X_test)
|
||||
#print(test_predictions)
|
||||
|
||||
test_predictions[test_predictions >= 0.5] = 1
|
||||
test_predictions[test_predictions < 0.5] = 0
|
||||
# Extract the probabilities for class 1 (index 1 in the softmax output)
|
||||
class_1_probabilities = test_predictions[:, 1]
|
||||
# Convert probabilities to binary predictions using a threshold of 0.5
|
||||
binary_predictions = (class_1_probabilities >= 0.5).astype(int)
|
||||
|
||||
test_precision = precision_score(y_test, test_predictions)
|
||||
test_accuracy = accuracy_score(y_test, test_predictions)
|
||||
# Calculate precision and accuracy using binary predictions
|
||||
test_precision = precision_score(y_test, binary_predictions)
|
||||
test_accuracy = accuracy_score(y_test, binary_predictions)
|
||||
|
||||
print("Test Set Metrics:")
|
||||
print(f"Precision: {round(test_precision * 100)}%")
|
||||
print(f"Accuracy: {round(test_accuracy * 100)}%")
|
||||
|
||||
next_value_prediction = 1 if test_predictions[-1] >= 0.5 else 0
|
||||
# Define thresholds and corresponding scores
|
||||
thresholds = [0.8, 0.75, 0.7, 0.6, 0.5, 0.45, 0.4, 0.35, 0.3, 0.2]
|
||||
scores = [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
|
||||
|
||||
# Get the last prediction value (class 1 probability) for scoring
|
||||
last_prediction_prob = class_1_probabilities[-1]
|
||||
|
||||
# Initialize score to 0 (or any default value)
|
||||
score = 0
|
||||
#print(last_prediction_prob)
|
||||
# Determine the score based on the last prediction probability
|
||||
for threshold, value in zip(thresholds, scores):
|
||||
if last_prediction_prob >= threshold:
|
||||
score = value
|
||||
break # Exit the loop once the score is determined
|
||||
|
||||
# Return the evaluation results
|
||||
return {'accuracy': round(test_accuracy * 100),
|
||||
'precision': round(test_precision * 100),
|
||||
'sentiment': 'Bullish' if next_value_prediction == 1 else 'Bearish'}, test_predictions
|
||||
'score': score}
|
||||
|
||||
|
||||
|
||||
def feature_selection(self, X_train, y_train, k=100):
|
||||
print('feature selection:')
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user