update cron job score
This commit is contained in:
parent
63c55bdf01
commit
355e67d982
65
app/cron_ai_agents.py
Normal file
65
app/cron_ai_agents.py
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
from datetime import datetime
|
||||||
|
import orjson
|
||||||
|
import sqlite3
|
||||||
|
import asyncio
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
|
||||||
|
# Load stock screener data
|
||||||
|
with open(f"json/stock-screener/data.json", 'rb') as file:
|
||||||
|
stock_screener_data = orjson.loads(file.read())
|
||||||
|
stock_screener_data_dict = {item['symbol']: item for item in stock_screener_data}
|
||||||
|
|
||||||
|
|
||||||
|
async def save_json(symbol, data):
|
||||||
|
"""Save JSON data to a file."""
|
||||||
|
with open(f"json/statistics/{symbol}.json", 'wb') as file:
|
||||||
|
file.write(orjson.dumps(data))
|
||||||
|
|
||||||
|
|
||||||
|
async def get_data(symbol):
|
||||||
|
"""Extract specified columns data for a given symbol."""
|
||||||
|
columns = ['marketCap','sharesOutStanding', 'sharesQoQ', 'sharesYoY','institutionalOwnership','floatShares',
|
||||||
|
'priceEarningsToGrowthRatio','priceEarningsRatio','forwardPE','priceToSalesRatio','forwardPS','priceToBookRatio','priceToFreeCashFlowsRatio',
|
||||||
|
'sharesShort','shortOutStandingPercent','shortFloatPercent','shortRatio',
|
||||||
|
'enterpriseValue','evEarnings','evSales','evEBITDA','evEBIT','evFCF',
|
||||||
|
'currentRatio','quickRatio','debtRatio','debtEquityRatio','interestCoverage','cashFlowToDebtRatio','totalDebtToCapitalization',
|
||||||
|
'returnOnEquity','returnOnAssets','returnOnCapital','revenuePerEmployee','profitPerEmployee',
|
||||||
|
'employees','assetTurnover','inventoryTurnover','incomeTaxExpense','effectiveTaxRate','beta','returnOnInvestedCapital',
|
||||||
|
'change1Y','sma50','sma200','rsi','avgVolume','revenue','netIncome','grossProfit','operatingIncome','ebitda','ebit','eps',
|
||||||
|
'cashAndCashEquivalents','totalDebt','retainedEarnings','totalAssets','workingCapital','operatingCashFlow',
|
||||||
|
'capitalExpenditure','freeCashFlow','freeCashFlowPerShare','grossProfitMargin','operatingProfitMargin','pretaxProfitMargin',
|
||||||
|
'netProfitMargin','ebitdaMargin','ebitMargin','freeCashFlowMargin',
|
||||||
|
'annualDividend','dividendYield','payoutRatio','dividendGrowth','earningsYield','freeCashFlowYield','altmanZScore','piotroskiScore',
|
||||||
|
'lastStockSplit','splitType','splitRatio','analystRating','analystCounter','priceTarget','upside'
|
||||||
|
]
|
||||||
|
|
||||||
|
if symbol in stock_screener_data_dict:
|
||||||
|
result = {}
|
||||||
|
for column in columns:
|
||||||
|
try:
|
||||||
|
result[column] = stock_screener_data_dict[symbol].get(column, None)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
return result
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
async def run():
|
||||||
|
"""Main function to run the data extraction process."""
|
||||||
|
# Connect to SQLite database
|
||||||
|
con = sqlite3.connect('stocks.db')
|
||||||
|
cursor = con.cursor()
|
||||||
|
cursor.execute("PRAGMA journal_mode = wal")
|
||||||
|
cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE symbol NOT LIKE '%.%'")
|
||||||
|
total_symbols = [row[0] for row in cursor.fetchall()]
|
||||||
|
con.close()
|
||||||
|
|
||||||
|
# Process symbols with progress bar
|
||||||
|
for symbol in tqdm(['TSLA'], desc="Extracting data"):
|
||||||
|
data = await get_data(symbol)
|
||||||
|
print(data)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
loop.run_until_complete(run())
|
||||||
@ -283,8 +283,10 @@ async def warm_start_training(tickers, con, skip_downloading, save_data):
|
|||||||
|
|
||||||
predictor = ScorePredictor()
|
predictor = ScorePredictor()
|
||||||
selected_features = [col for col in df_train if col not in ['price', 'date', 'Target']]
|
selected_features = [col for col in df_train if col not in ['price', 'date', 'Target']]
|
||||||
|
|
||||||
predictor.warm_start_training(df_train[selected_features], df_train['Target'])
|
predictor.warm_start_training(df_train[selected_features], df_train['Target'])
|
||||||
predictor.evaluate_model(df_test[selected_features], df_test['Target'])
|
predictor.evaluate_model(df_test)
|
||||||
|
|
||||||
return predictor
|
return predictor
|
||||||
|
|
||||||
async def fine_tune_and_evaluate(ticker, con, start_date, end_date, skip_downloading, save_data):
|
async def fine_tune_and_evaluate(ticker, con, start_date, end_date, skip_downloading, save_data):
|
||||||
@ -299,20 +301,30 @@ async def fine_tune_and_evaluate(ticker, con, start_date, end_date, skip_downloa
|
|||||||
train_data = df.iloc[:split_size]
|
train_data = df.iloc[:split_size]
|
||||||
test_data = df.iloc[split_size:]
|
test_data = df.iloc[split_size:]
|
||||||
|
|
||||||
selected_features = [col for col in df.columns if col not in ['date','price','Target']]
|
#selected_features = [col for col in df.columns if col not in ['date','price','Target']]
|
||||||
|
|
||||||
# Fine-tune the model
|
# Fine-tune the model
|
||||||
predictor = ScorePredictor()
|
predictor = ScorePredictor()
|
||||||
#predictor.fine_tune_model(train_data[selected_features], train_data['Target'])
|
#predictor.fine_tune_model(train_data[selected_features], train_data['Target'])
|
||||||
|
|
||||||
print(f"Evaluating fine-tuned model for {ticker}")
|
print(f"Evaluating fine-tuned model for {ticker}")
|
||||||
data = predictor.evaluate_model(test_data[selected_features], test_data['Target'])
|
data = predictor.evaluate_model(test_data)
|
||||||
|
|
||||||
if (data['precision'] >= 50 and data['accuracy'] >= 50 and
|
if (data['precision'] >= 50 and data['accuracy'] >= 50 and
|
||||||
data['accuracy'] < 100 and data['precision'] < 100 and
|
data['accuracy'] < 100 and data['precision'] < 100 and
|
||||||
data['f1_score'] >= 50 and data['recall_score'] >= 50 and
|
data['f1_score'] >= 50 and data['recall_score'] >= 50 and
|
||||||
data['roc_auc_score'] >= 50):
|
data['roc_auc_score'] >= 50):
|
||||||
await save_json(ticker, data)
|
await save_json(ticker, data)
|
||||||
|
data['backtest'] = [
|
||||||
|
{'date': entry['date'], 'yTest': entry['y_test'], 'yPred': entry['y_pred'], 'score': entry['score']}
|
||||||
|
for entry in data['backtest']
|
||||||
|
]
|
||||||
|
#print(data)
|
||||||
print(f"Saved results for {ticker}")
|
print(f"Saved results for {ticker}")
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
os.remove(f"json/ai-score/companies/{ticker}.json")
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error processing {ticker}: {e}")
|
print(f"Error processing {ticker}: {e}")
|
||||||
@ -336,7 +348,9 @@ async def run():
|
|||||||
# Warm start training
|
# Warm start training
|
||||||
stock_symbols = cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 500E6 AND symbol NOT LIKE '%.%'") #list(set(['CB','LOW','PFE','RTX','DIS','MS','BHP','BAC','PG','BABA','ACN','TMO','LLY','XOM','JPM','UNH','COST','HD','ASML','BRK-A','BRK-B','CAT','TT','SAP','APH','CVS','NOG','DVN','COP','OXY','MRO','MU','AVGO','INTC','LRCX','PLD','AMT','JNJ','ACN','TSM','V','ORCL','MA','BAC','BA','NFLX','ADBE','IBM','GME','NKE','ANGO','PNW','SHEL','XOM','WMT','BUD','AMZN','PEP','AMD','NVDA','AWR','TM','AAPL','GOOGL','META','MSFT','LMT','TSLA','DOV','PG','KO']))
|
stock_symbols = cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 500E6 AND symbol NOT LIKE '%.%'") #list(set(['CB','LOW','PFE','RTX','DIS','MS','BHP','BAC','PG','BABA','ACN','TMO','LLY','XOM','JPM','UNH','COST','HD','ASML','BRK-A','BRK-B','CAT','TT','SAP','APH','CVS','NOG','DVN','COP','OXY','MRO','MU','AVGO','INTC','LRCX','PLD','AMT','JNJ','ACN','TSM','V','ORCL','MA','BAC','BA','NFLX','ADBE','IBM','GME','NKE','ANGO','PNW','SHEL','XOM','WMT','BUD','AMZN','PEP','AMD','NVDA','AWR','TM','AAPL','GOOGL','META','MSFT','LMT','TSLA','DOV','PG','KO']))
|
||||||
stock_symbols = [row[0] for row in cursor.fetchall()]
|
stock_symbols = [row[0] for row in cursor.fetchall()]
|
||||||
print('Training for:', stock_symbols)
|
#Test Mode
|
||||||
|
#stock_symbols = ['AAPL','TSLA']
|
||||||
|
print('Training for:', len(stock_symbols))
|
||||||
predictor = await warm_start_training(stock_symbols, con, skip_downloading, save_data)
|
predictor = await warm_start_training(stock_symbols, con, skip_downloading, save_data)
|
||||||
|
|
||||||
#else:
|
#else:
|
||||||
|
|||||||
31
app/main.py
31
app/main.py
@ -4266,6 +4266,37 @@ async def get_data(data:TickerData, api_key: str = Security(get_api_key)):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/ai-score")
|
||||||
|
async def get_data(data:TickerData, api_key: str = Security(get_api_key)):
|
||||||
|
ticker = data.ticker.upper()
|
||||||
|
cache_key = f"ai-score-{ticker}"
|
||||||
|
cached_result = redis_client.get(cache_key)
|
||||||
|
if cached_result:
|
||||||
|
return StreamingResponse(
|
||||||
|
io.BytesIO(cached_result),
|
||||||
|
media_type="application/json",
|
||||||
|
headers={"Content-Encoding": "gzip"}
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(f"json/ai-score/companies/{ticker}.json", 'rb') as file:
|
||||||
|
res = orjson.loads(file.read())
|
||||||
|
except:
|
||||||
|
res = {}
|
||||||
|
|
||||||
|
data = orjson.dumps(res)
|
||||||
|
compressed_data = gzip.compress(data)
|
||||||
|
|
||||||
|
redis_client.set(cache_key, compressed_data)
|
||||||
|
redis_client.expire(cache_key,3600*3600)
|
||||||
|
|
||||||
|
return StreamingResponse(
|
||||||
|
io.BytesIO(compressed_data),
|
||||||
|
media_type="application/json",
|
||||||
|
headers={"Content-Encoding": "gzip"}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
async def fetch_data(client, endpoint, ticker):
|
async def fetch_data(client, endpoint, ticker):
|
||||||
url = f"{API_URL}{endpoint}"
|
url = f"{API_URL}{endpoint}"
|
||||||
try:
|
try:
|
||||||
|
|||||||
@ -36,7 +36,11 @@ class ScorePredictor:
|
|||||||
X = self.scaler.fit_transform(X)
|
X = self.scaler.fit_transform(X)
|
||||||
return X #self.pca.fit_transform(X)
|
return X #self.pca.fit_transform(X)
|
||||||
|
|
||||||
def preprocess_test_data(self, X):
|
def preprocess_test_data(self, df):
|
||||||
|
selected_features = [col for col in df.columns if col not in ['date','price','Target']]
|
||||||
|
|
||||||
|
X = df[selected_features]
|
||||||
|
|
||||||
X = np.where(np.isinf(X), np.nan, X)
|
X = np.where(np.isinf(X), np.nan, X)
|
||||||
X = np.nan_to_num(X)
|
X = np.nan_to_num(X)
|
||||||
X = self.scaler.fit_transform(X)
|
X = self.scaler.fit_transform(X)
|
||||||
@ -61,8 +65,10 @@ class ScorePredictor:
|
|||||||
self.model.fit(X_train, y_train, epochs=100, batch_size=128, validation_split=0.1, callbacks=[early_stopping, reduce_lr])
|
self.model.fit(X_train, y_train, epochs=100, batch_size=128, validation_split=0.1, callbacks=[early_stopping, reduce_lr])
|
||||||
print("Model fine-tuned (not saved).")
|
print("Model fine-tuned (not saved).")
|
||||||
|
|
||||||
def evaluate_model(self, X_test, y_test):
|
def evaluate_model(self, df):
|
||||||
X_test = self.preprocess_test_data(X_test)
|
|
||||||
|
X_test = self.preprocess_test_data(df)
|
||||||
|
y_test = df['Target']
|
||||||
|
|
||||||
with open(self.warm_start_model_path, 'rb') as f:
|
with open(self.warm_start_model_path, 'rb') as f:
|
||||||
self.model = pickle.load(f)
|
self.model = pickle.load(f)
|
||||||
@ -85,8 +91,9 @@ class ScorePredictor:
|
|||||||
print(f"ROC AUC: {round(test_roc_auc_score * 100)}%")
|
print(f"ROC AUC: {round(test_roc_auc_score * 100)}%")
|
||||||
|
|
||||||
last_prediction_prob = class_1_probabilities[-1]
|
last_prediction_prob = class_1_probabilities[-1]
|
||||||
print(pd.DataFrame({'y_test': y_test, 'y_pred': binary_predictions}))
|
backtest_results = pd.DataFrame({'date': df['date'], 'y_test': y_test, 'y_pred': binary_predictions, 'score': class_1_probabilities})
|
||||||
print(f"Last prediction probability: {last_prediction_prob}")
|
|
||||||
|
print(f"Last prediction probability: {round(last_prediction_prob,2)}")
|
||||||
|
|
||||||
thresholds = [0.8, 0.75, 0.7, 0.6, 0.5, 0.45, 0.4, 0.35, 0.3, 0]
|
thresholds = [0.8, 0.75, 0.7, 0.6, 0.5, 0.45, 0.4, 0.35, 0.3, 0]
|
||||||
scores = [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
|
scores = [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
|
||||||
@ -97,13 +104,18 @@ class ScorePredictor:
|
|||||||
score = value
|
score = value
|
||||||
break
|
break
|
||||||
|
|
||||||
|
conditions = [backtest_results['score'] >= t for t in thresholds]
|
||||||
|
backtest_results['score'] = np.select(conditions, scores, default=1) # Default score if no condition matches
|
||||||
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'accuracy': round(test_accuracy * 100),
|
'accuracy': round(test_accuracy * 100),
|
||||||
'precision': round(test_precision * 100),
|
'precision': round(test_precision * 100),
|
||||||
'f1_score': round(test_f1_score * 100),
|
'f1_score': round(test_f1_score * 100),
|
||||||
'recall_score': round(test_recall_score * 100),
|
'recall_score': round(test_recall_score * 100),
|
||||||
'roc_auc_score': round(test_roc_auc_score * 100),
|
'roc_auc_score': round(test_roc_auc_score * 100),
|
||||||
'score': score
|
'score': score,
|
||||||
|
'backtest': backtest_results.to_dict(orient="records")
|
||||||
}
|
}
|
||||||
def feature_selection(self, X_train, y_train, k=100):
|
def feature_selection(self, X_train, y_train, k=100):
|
||||||
print('Feature selection:')
|
print('Feature selection:')
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user