bugfixing ai model
This commit is contained in:
parent
3b70c93d28
commit
5a220c85dd
@ -22,7 +22,7 @@ import gc
|
|||||||
gc.enable()
|
gc.enable()
|
||||||
|
|
||||||
async def save_json(symbol, data):
|
async def save_json(symbol, data):
|
||||||
with open(f"json/ai-score/{symbol}.json", 'wb') as file:
|
with open(f"json/ai-score/companies/{symbol}.json", 'wb') as file:
|
||||||
file.write(orjson.dumps(data))
|
file.write(orjson.dumps(data))
|
||||||
|
|
||||||
|
|
||||||
@ -317,23 +317,35 @@ async def process_symbol(ticker, con, start_date, end_date):
|
|||||||
split_size = int(len(df) * (1-test_size))
|
split_size = int(len(df) * (1-test_size))
|
||||||
test_data = df.iloc[split_size:]
|
test_data = df.iloc[split_size:]
|
||||||
best_features = [col for col in df.columns if col not in ['date','price','Target']]
|
best_features = [col for col in df.columns if col not in ['date','price','Target']]
|
||||||
data, prediction_list = predictor.evaluate_model(test_data[best_features], test_data['Target'])
|
data = predictor.evaluate_model(test_data[best_features], test_data['Target'])
|
||||||
|
|
||||||
print(data)
|
|
||||||
'''
|
|
||||||
output_list = [{'date': date, 'price': price, 'prediction': prediction, 'target': target}
|
|
||||||
for (date, price,target), prediction in zip(test_data[['date', 'price','Target']].iloc[-6:].values, prediction_list[-6:])]
|
|
||||||
'''
|
|
||||||
#print(output_list)
|
|
||||||
|
|
||||||
if len(data) != 0:
|
if len(data) != 0:
|
||||||
if data['precision'] >= 50 and data['accuracy'] >= 50:
|
if data['precision'] >= 50 and data['accuracy'] >= 50:
|
||||||
await save_json(ticker, data)
|
res = {'score': data['score']}
|
||||||
|
await save_json(ticker, res)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print(e)
|
||||||
|
|
||||||
|
|
||||||
|
async def chunked_gather(tickers, con, start_date, end_date, chunk_size=10):
|
||||||
|
# Helper function to divide the tickers into chunks
|
||||||
|
def chunks(lst, size):
|
||||||
|
for i in range(0, len(lst), size):
|
||||||
|
yield lst[i:i+size]
|
||||||
|
|
||||||
|
results = []
|
||||||
|
|
||||||
|
for chunk in chunks(tickers, chunk_size):
|
||||||
|
# Create tasks for each chunk
|
||||||
|
tasks = [download_data(ticker, con, start_date, end_date) for ticker in chunk]
|
||||||
|
# Await the results for the current chunk
|
||||||
|
chunk_results = await asyncio.gather(*tasks)
|
||||||
|
# Accumulate the results
|
||||||
|
results.extend(chunk_results)
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
#Train mode
|
#Train mode
|
||||||
async def train_process(tickers, con):
|
async def train_process(tickers, con):
|
||||||
tickers = list(set(tickers))
|
tickers = list(set(tickers))
|
||||||
@ -345,8 +357,8 @@ async def train_process(tickers, con):
|
|||||||
df_train = pd.DataFrame()
|
df_train = pd.DataFrame()
|
||||||
df_test = pd.DataFrame()
|
df_test = pd.DataFrame()
|
||||||
|
|
||||||
tasks = [download_data(ticker, con, start_date, end_date) for ticker in tickers]
|
dfs = await chunked_gather(tickers, con, start_date, end_date, chunk_size=10)
|
||||||
dfs = await asyncio.gather(*tasks)
|
|
||||||
for df in dfs:
|
for df in dfs:
|
||||||
try:
|
try:
|
||||||
split_size = int(len(df) * (1-test_size))
|
split_size = int(len(df) * (1-test_size))
|
||||||
@ -373,17 +385,6 @@ async def train_process(tickers, con):
|
|||||||
predictor.train_model(df_train[selected_features], df_train['Target'])
|
predictor.train_model(df_train[selected_features], df_train['Target'])
|
||||||
predictor.evaluate_model(df_test[best_features], df_test['Target'])
|
predictor.evaluate_model(df_test[best_features], df_test['Target'])
|
||||||
|
|
||||||
async def test_process(con):
|
|
||||||
test_size = 0.2
|
|
||||||
start_date = datetime(1995, 1, 1).strftime("%Y-%m-%d")
|
|
||||||
end_date = datetime.today().strftime("%Y-%m-%d")
|
|
||||||
predictor = ScorePredictor()
|
|
||||||
df = await download_data('GME', con, start_date, end_date)
|
|
||||||
split_size = int(len(df) * (1-test_size))
|
|
||||||
test_data = df.iloc[split_size:]
|
|
||||||
selected_features = [col for col in test_data if col not in ['price','date','Target']]
|
|
||||||
predictor.evaluate_model(test_data[selected_features], test_data['Target'])
|
|
||||||
|
|
||||||
|
|
||||||
async def run():
|
async def run():
|
||||||
|
|
||||||
@ -393,20 +394,21 @@ async def run():
|
|||||||
|
|
||||||
cursor = con.cursor()
|
cursor = con.cursor()
|
||||||
cursor.execute("PRAGMA journal_mode = wal")
|
cursor.execute("PRAGMA journal_mode = wal")
|
||||||
cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 10E9 AND symbol NOT LIKE '%.%'")
|
cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 1E9 AND symbol NOT LIKE '%.%'")
|
||||||
stock_symbols = ['DHR','ABT','TXN','LIN','RIO','FCX','ECL','NVO','GOOGL','NFLX','SAP','UNH','JNJ','ABBV','MRK','PLD','NEE','DUK','AMT','EQIX','META','DOV','NWN','PG','PH','MMM','AWR','YYAI','PPSI','VYX','XP','BWXT','OLED','ROIC','NKE','LMT','PAYX','GME','AMD','AAPL','NVDA','PLTR'] #[row[0] for row in cursor.fetchall()]
|
stock_symbols = [row[0] for row in cursor.fetchall()] #['DHR','ABT','TXN','LIN','RIO','FCX','ECL','NVO','GOOGL','NFLX','SAP','UNH','JNJ','ABBV','MRK','PLD','NEE','DUK','AMT','EQIX','META','DOV','NWN','PG','PH','MMM','AWR','YYAI','PPSI','VYX','XP','BWXT','OLED','ROIC','NKE','LMT','PAYX','GME','AMD','AAPL','NVDA','PLTR']
|
||||||
stock_symbols = list(set(stock_symbols))
|
stock_symbols = list(set(stock_symbols))
|
||||||
print('Number of Stocks')
|
print('Number of Stocks')
|
||||||
print(len(stock_symbols))
|
print(len(stock_symbols))
|
||||||
#await train_process(stock_symbols, con)
|
await train_process(stock_symbols, con)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#Prediction Steps for all stock symbols
|
#Prediction Steps for all stock symbols
|
||||||
|
#cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 1E9")
|
||||||
cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 1E9")
|
#stock_symbols = [row[0] for row in cursor.fetchall()]
|
||||||
stock_symbols = [row[0] for row in cursor.fetchall()]
|
total_symbols = stock_symbols
|
||||||
|
|
||||||
total_symbols = ['GME'] #stock_symbols
|
|
||||||
|
|
||||||
print(f"Total tickers: {len(total_symbols)}")
|
print(f"Total tickers: {len(total_symbols)}")
|
||||||
start_date = datetime(1995, 1, 1).strftime("%Y-%m-%d")
|
start_date = datetime(1995, 1, 1).strftime("%Y-%m-%d")
|
||||||
|
|||||||
Binary file not shown.
@ -41,12 +41,12 @@ class ScorePredictor:
|
|||||||
inputs = Input(shape=(2139,))
|
inputs = Input(shape=(2139,))
|
||||||
|
|
||||||
# First dense layer
|
# First dense layer
|
||||||
x = Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.01))(inputs)
|
x = Dense(2048, activation='relu', kernel_regularizer=regularizers.l2(0.01))(inputs)
|
||||||
x = Dropout(0.3)(x)
|
x = Dropout(0.3)(x)
|
||||||
x = BatchNormalization()(x)
|
x = BatchNormalization()(x)
|
||||||
|
|
||||||
# Additional dense layers
|
# Additional dense layers
|
||||||
for units in [512,256, 256]:
|
for units in [1024,512, 256, 256]:
|
||||||
x = Dense(units, activation='relu', kernel_regularizer=regularizers.l2(0.01))(x)
|
x = Dense(units, activation='relu', kernel_regularizer=regularizers.l2(0.01))(x)
|
||||||
x = Dropout(0.2)(x)
|
x = Dropout(0.2)(x)
|
||||||
x = BatchNormalization()(x)
|
x = BatchNormalization()(x)
|
||||||
@ -64,17 +64,17 @@ class ScorePredictor:
|
|||||||
# Global average pooling
|
# Global average pooling
|
||||||
x = GlobalAveragePooling1D()(x)
|
x = GlobalAveragePooling1D()(x)
|
||||||
|
|
||||||
# Output layer
|
# Output layer (for class probabilities)
|
||||||
outputs = Dense(1, activation='sigmoid')(x)
|
outputs = Dense(2, activation='softmax')(x) # Two neurons for class probabilities with softmax
|
||||||
|
|
||||||
# Create the model
|
# Create the model
|
||||||
model = Model(inputs=inputs, outputs=outputs)
|
model = Model(inputs=inputs, outputs=outputs)
|
||||||
|
|
||||||
# Optimizer with a lower learning rate
|
# Optimizer with a lower learning rate
|
||||||
optimizer = Adam(learning_rate=0.1, clipnorm = 1.0)
|
optimizer = Adam(learning_rate=0.001, clipnorm=1.0)
|
||||||
|
|
||||||
# Compile the model
|
# Compile the model
|
||||||
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
|
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
|
||||||
|
|
||||||
return model
|
return model
|
||||||
|
|
||||||
@ -92,38 +92,63 @@ class ScorePredictor:
|
|||||||
X_train = self.preprocess_data(X_train)
|
X_train = self.preprocess_data(X_train)
|
||||||
#X_train = self.reshape_for_lstm(X_train)
|
#X_train = self.reshape_for_lstm(X_train)
|
||||||
|
|
||||||
checkpoint = ModelCheckpoint('ml_models/weights/fundamental_weights/weights.keras',
|
checkpoint = ModelCheckpoint('ml_models/weights/ai-score/weights.keras',
|
||||||
save_best_only=True, save_freq = 1,
|
save_best_only=True, save_freq = 1,
|
||||||
monitor='val_loss', mode='min')
|
monitor='val_loss', mode='min')
|
||||||
early_stopping = EarlyStopping(monitor='val_loss', patience=70, restore_best_weights=True)
|
early_stopping = EarlyStopping(monitor='val_loss', patience=100, restore_best_weights=True)
|
||||||
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=60, min_lr=0.00001)
|
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=80, min_lr=0.00001)
|
||||||
|
|
||||||
self.model.fit(X_train, y_train, epochs=100_000, batch_size=32,
|
self.model.fit(X_train, y_train, epochs=100_000, batch_size=32,
|
||||||
validation_split=0.1, callbacks=[checkpoint, early_stopping, reduce_lr])
|
validation_split=0.1, callbacks=[checkpoint, early_stopping, reduce_lr])
|
||||||
self.model.save('ml_models/weights/fundamental_weights/weights.keras')
|
self.model.save('ml_models/weights/ai-score/weights.keras')
|
||||||
|
|
||||||
def evaluate_model(self, X_test, y_test):
|
def evaluate_model(self, X_test, y_test):
|
||||||
|
# Preprocess the test data
|
||||||
X_test = self.preprocess_data(X_test)
|
X_test = self.preprocess_data(X_test)
|
||||||
X_test = self.reshape_for_lstm(X_test)
|
#X_test = self.reshape_for_lstm(X_test)
|
||||||
|
|
||||||
self.model = load_model('ml_models/weights/fundamental_weights/weights.keras')
|
# Load the trained model
|
||||||
|
self.model = load_model('ml_models/weights/ai-score/weights.keras')
|
||||||
|
|
||||||
test_predictions = self.model.predict(X_test).flatten()
|
# Get the model's predictions
|
||||||
|
test_predictions = self.model.predict(X_test)
|
||||||
|
#print(test_predictions)
|
||||||
|
|
||||||
test_predictions[test_predictions >= 0.5] = 1
|
# Extract the probabilities for class 1 (index 1 in the softmax output)
|
||||||
test_predictions[test_predictions < 0.5] = 0
|
class_1_probabilities = test_predictions[:, 1]
|
||||||
|
# Convert probabilities to binary predictions using a threshold of 0.5
|
||||||
|
binary_predictions = (class_1_probabilities >= 0.5).astype(int)
|
||||||
|
|
||||||
test_precision = precision_score(y_test, test_predictions)
|
# Calculate precision and accuracy using binary predictions
|
||||||
test_accuracy = accuracy_score(y_test, test_predictions)
|
test_precision = precision_score(y_test, binary_predictions)
|
||||||
|
test_accuracy = accuracy_score(y_test, binary_predictions)
|
||||||
|
|
||||||
print("Test Set Metrics:")
|
print("Test Set Metrics:")
|
||||||
print(f"Precision: {round(test_precision * 100)}%")
|
print(f"Precision: {round(test_precision * 100)}%")
|
||||||
print(f"Accuracy: {round(test_accuracy * 100)}%")
|
print(f"Accuracy: {round(test_accuracy * 100)}%")
|
||||||
|
|
||||||
next_value_prediction = 1 if test_predictions[-1] >= 0.5 else 0
|
# Define thresholds and corresponding scores
|
||||||
|
thresholds = [0.8, 0.75, 0.7, 0.6, 0.5, 0.45, 0.4, 0.35, 0.3, 0.2]
|
||||||
|
scores = [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
|
||||||
|
|
||||||
|
# Get the last prediction value (class 1 probability) for scoring
|
||||||
|
last_prediction_prob = class_1_probabilities[-1]
|
||||||
|
|
||||||
|
# Initialize score to 0 (or any default value)
|
||||||
|
score = 0
|
||||||
|
#print(last_prediction_prob)
|
||||||
|
# Determine the score based on the last prediction probability
|
||||||
|
for threshold, value in zip(thresholds, scores):
|
||||||
|
if last_prediction_prob >= threshold:
|
||||||
|
score = value
|
||||||
|
break # Exit the loop once the score is determined
|
||||||
|
|
||||||
|
# Return the evaluation results
|
||||||
return {'accuracy': round(test_accuracy * 100),
|
return {'accuracy': round(test_accuracy * 100),
|
||||||
'precision': round(test_precision * 100),
|
'precision': round(test_precision * 100),
|
||||||
'sentiment': 'Bullish' if next_value_prediction == 1 else 'Bearish'}, test_predictions
|
'score': score}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def feature_selection(self, X_train, y_train, k=100):
|
def feature_selection(self, X_train, y_train, k=100):
|
||||||
print('feature selection:')
|
print('feature selection:')
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user