bugfixing ai score model
This commit is contained in:
parent
5a220c85dd
commit
cc60f570bd
@ -316,7 +316,9 @@ async def process_symbol(ticker, con, start_date, end_date):
|
||||
df = await download_data(ticker, con, start_date, end_date)
|
||||
split_size = int(len(df) * (1-test_size))
|
||||
test_data = df.iloc[split_size:]
|
||||
best_features = [col for col in df.columns if col not in ['date','price','Target']]
|
||||
#selected_features = [col for col in df.columns if col not in ['date','price','Target']]
|
||||
best_features = ['freeCashFlowYield', 'cci', 'daily_return', 'cashAndCashEquivalents_to_cashAndShortTermInvestments', 'longTermDebt_to_totalLiabilitiesAndStockholdersEquity', 'longTermDebt_to_totalAssets', 'totalStockholdersEquity_to_totalLiabilitiesAndStockholdersEquity', 'totalStockholdersEquity_to_totalAssets']
|
||||
print(f"For the Ticker: {ticker}")
|
||||
data = predictor.evaluate_model(test_data[best_features], test_data['Target'])
|
||||
|
||||
if len(data) != 0:
|
||||
@ -359,16 +361,25 @@ async def train_process(tickers, con):
|
||||
|
||||
dfs = await chunked_gather(tickers, con, start_date, end_date, chunk_size=10)
|
||||
|
||||
train_list = []
|
||||
test_list = []
|
||||
|
||||
for df in dfs:
|
||||
try:
|
||||
split_size = int(len(df) * (1-test_size))
|
||||
split_size = int(len(df) * (1 - test_size))
|
||||
train_data = df.iloc[:split_size]
|
||||
test_data = df.iloc[split_size:]
|
||||
df_train = pd.concat([df_train, train_data], ignore_index=True)
|
||||
df_test = pd.concat([df_test, test_data], ignore_index=True)
|
||||
|
||||
# Append to the lists
|
||||
train_list.append(train_data)
|
||||
test_list.append(test_data)
|
||||
except:
|
||||
pass
|
||||
|
||||
# Concatenate all at once outside the loop
|
||||
df_train = pd.concat(train_list, ignore_index=True)
|
||||
df_test = pd.concat(test_list, ignore_index=True)
|
||||
|
||||
|
||||
best_features = [col for col in df_train.columns if col not in ['date','price','Target']]
|
||||
|
||||
@ -380,49 +391,48 @@ async def train_process(tickers, con):
|
||||
predictor = ScorePredictor()
|
||||
#print(selected_features)
|
||||
selected_features = [col for col in df_train if col not in ['price','date','Target']]
|
||||
#best_features = predictor.feature_selection(df_train[selected_features], df_train['Target'],k=5)
|
||||
#print(best_features)
|
||||
predictor.train_model(df_train[selected_features], df_train['Target'])
|
||||
best_features = predictor.feature_selection(df_train[selected_features], df_train['Target'],k=8)
|
||||
print(best_features)
|
||||
predictor.train_model(df_train[best_features], df_train['Target'])
|
||||
predictor.evaluate_model(df_test[best_features], df_test['Target'])
|
||||
|
||||
|
||||
async def run():
|
||||
|
||||
#Train first model
|
||||
|
||||
train_mode = False
|
||||
con = sqlite3.connect('stocks.db')
|
||||
|
||||
cursor = con.cursor()
|
||||
cursor.execute("PRAGMA journal_mode = wal")
|
||||
cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 1E9 AND symbol NOT LIKE '%.%'")
|
||||
stock_symbols = [row[0] for row in cursor.fetchall()] #['DHR','ABT','TXN','LIN','RIO','FCX','ECL','NVO','GOOGL','NFLX','SAP','UNH','JNJ','ABBV','MRK','PLD','NEE','DUK','AMT','EQIX','META','DOV','NWN','PG','PH','MMM','AWR','YYAI','PPSI','VYX','XP','BWXT','OLED','ROIC','NKE','LMT','PAYX','GME','AMD','AAPL','NVDA','PLTR']
|
||||
stock_symbols = list(set(stock_symbols))
|
||||
print('Number of Stocks')
|
||||
print(len(stock_symbols))
|
||||
await train_process(stock_symbols, con)
|
||||
|
||||
|
||||
|
||||
if train_mode:
|
||||
#Train first model
|
||||
cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 50E9 AND symbol NOT LIKE '%.%'")
|
||||
stock_symbols = [row[0] for row in cursor.fetchall()]
|
||||
print('Number of Stocks')
|
||||
print(len(stock_symbols))
|
||||
await train_process(stock_symbols, con)
|
||||
|
||||
|
||||
#Prediction Steps for all stock symbols
|
||||
#cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 1E9")
|
||||
#stock_symbols = [row[0] for row in cursor.fetchall()]
|
||||
total_symbols = stock_symbols
|
||||
if not train_mode:
|
||||
cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 1E9 AND symbol NOT LIKE '%.%'")
|
||||
stock_symbols = [row[0] for row in cursor.fetchall()]
|
||||
total_symbols = stock_symbols
|
||||
|
||||
print(f"Total tickers: {len(total_symbols)}")
|
||||
start_date = datetime(1995, 1, 1).strftime("%Y-%m-%d")
|
||||
end_date = datetime.today().strftime("%Y-%m-%d")
|
||||
print(f"Total tickers: {len(total_symbols)}")
|
||||
start_date = datetime(1995, 1, 1).strftime("%Y-%m-%d")
|
||||
end_date = datetime.today().strftime("%Y-%m-%d")
|
||||
|
||||
chunk_size = len(total_symbols)# // 100 # Divide the list into N chunks
|
||||
chunks = [total_symbols[i:i + chunk_size] for i in range(0, len(total_symbols), chunk_size)]
|
||||
for chunk in chunks:
|
||||
tasks = []
|
||||
for ticker in tqdm(chunk):
|
||||
tasks.append(process_symbol(ticker, con, start_date, end_date))
|
||||
chunk_size = len(total_symbols)// 100 # Divide the list into N chunks
|
||||
chunks = [total_symbols[i:i + chunk_size] for i in range(0, len(total_symbols), chunk_size)]
|
||||
for chunk in chunks:
|
||||
tasks = []
|
||||
for ticker in tqdm(chunk):
|
||||
tasks.append(process_symbol(ticker, con, start_date, end_date))
|
||||
|
||||
await asyncio.gather(*tasks)
|
||||
await asyncio.gather(*tasks)
|
||||
|
||||
|
||||
con.close()
|
||||
|
||||
try:
|
||||
|
||||
Binary file not shown.
@ -38,24 +38,24 @@ class ScorePredictor:
|
||||
clear_session()
|
||||
|
||||
# Input layer
|
||||
inputs = Input(shape=(2139,))
|
||||
inputs = Input(shape=(8,))
|
||||
|
||||
# First dense layer
|
||||
x = Dense(2048, activation='relu', kernel_regularizer=regularizers.l2(0.01))(inputs)
|
||||
x = Dropout(0.3)(x)
|
||||
x = Dense(512, activation='relu')(inputs)
|
||||
x = Dropout(0.5)(x)
|
||||
x = BatchNormalization()(x)
|
||||
|
||||
# Additional dense layers
|
||||
for units in [1024,512, 256, 256]:
|
||||
x = Dense(units, activation='relu', kernel_regularizer=regularizers.l2(0.01))(x)
|
||||
x = Dropout(0.2)(x)
|
||||
for units in [256,128]:
|
||||
x = Dense(units, activation='relu')(x)
|
||||
x = Dropout(0.5)(x)
|
||||
x = BatchNormalization()(x)
|
||||
|
||||
# Reshape for attention mechanism
|
||||
x = Reshape((256, 1))(x)
|
||||
x = Reshape((128, 1))(x)
|
||||
|
||||
# Attention mechanism
|
||||
attention = Dense(256, activation='relu')(x)
|
||||
attention = Dense(128, activation='relu')(x)
|
||||
attention = Dense(1, activation='softmax')(attention)
|
||||
|
||||
# Apply attention
|
||||
@ -95,8 +95,8 @@ class ScorePredictor:
|
||||
checkpoint = ModelCheckpoint('ml_models/weights/ai-score/weights.keras',
|
||||
save_best_only=True, save_freq = 1,
|
||||
monitor='val_loss', mode='min')
|
||||
early_stopping = EarlyStopping(monitor='val_loss', patience=100, restore_best_weights=True)
|
||||
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=80, min_lr=0.00001)
|
||||
early_stopping = EarlyStopping(monitor='val_loss', patience=50, restore_best_weights=True)
|
||||
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=30, min_lr=0.001)
|
||||
|
||||
self.model.fit(X_train, y_train, epochs=100_000, batch_size=32,
|
||||
validation_split=0.1, callbacks=[checkpoint, early_stopping, reduce_lr])
|
||||
@ -136,7 +136,7 @@ class ScorePredictor:
|
||||
|
||||
# Initialize score to 0 (or any default value)
|
||||
score = 0
|
||||
#print(last_prediction_prob)
|
||||
print(last_prediction_prob)
|
||||
# Determine the score based on the last prediction probability
|
||||
for threshold, value in zip(thresholds, scores):
|
||||
if last_prediction_prob >= threshold:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user