diff --git a/app/cron_ai_agents.py b/app/cron_ai_agents.py new file mode 100644 index 0000000..544b6e9 --- /dev/null +++ b/app/cron_ai_agents.py @@ -0,0 +1,65 @@ +from datetime import datetime +import orjson +import sqlite3 +import asyncio +from tqdm import tqdm + + +# Load stock screener data +with open(f"json/stock-screener/data.json", 'rb') as file: + stock_screener_data = orjson.loads(file.read()) +stock_screener_data_dict = {item['symbol']: item for item in stock_screener_data} + + +async def save_json(symbol, data): + """Save JSON data to a file.""" + with open(f"json/statistics/{symbol}.json", 'wb') as file: + file.write(orjson.dumps(data)) + + +async def get_data(symbol): + """Extract specified columns data for a given symbol.""" + columns = ['marketCap','sharesOutStanding', 'sharesQoQ', 'sharesYoY','institutionalOwnership','floatShares', + 'priceEarningsToGrowthRatio','priceEarningsRatio','forwardPE','priceToSalesRatio','forwardPS','priceToBookRatio','priceToFreeCashFlowsRatio', + 'sharesShort','shortOutStandingPercent','shortFloatPercent','shortRatio', + 'enterpriseValue','evEarnings','evSales','evEBITDA','evEBIT','evFCF', + 'currentRatio','quickRatio','debtRatio','debtEquityRatio','interestCoverage','cashFlowToDebtRatio','totalDebtToCapitalization', + 'returnOnEquity','returnOnAssets','returnOnCapital','revenuePerEmployee','profitPerEmployee', + 'employees','assetTurnover','inventoryTurnover','incomeTaxExpense','effectiveTaxRate','beta','returnOnInvestedCapital', + 'change1Y','sma50','sma200','rsi','avgVolume','revenue','netIncome','grossProfit','operatingIncome','ebitda','ebit','eps', + 'cashAndCashEquivalents','totalDebt','retainedEarnings','totalAssets','workingCapital','operatingCashFlow', + 'capitalExpenditure','freeCashFlow','freeCashFlowPerShare','grossProfitMargin','operatingProfitMargin','pretaxProfitMargin', + 'netProfitMargin','ebitdaMargin','ebitMargin','freeCashFlowMargin', + 'annualDividend','dividendYield','payoutRatio','dividendGrowth','earningsYield','freeCashFlowYield','altmanZScore','piotroskiScore', + 'lastStockSplit','splitType','splitRatio','analystRating','analystCounter','priceTarget','upside' + ] + + if symbol in stock_screener_data_dict: + result = {} + for column in columns: + try: + result[column] = stock_screener_data_dict[symbol].get(column, None) + except: + pass + return result + return {} + + +async def run(): + """Main function to run the data extraction process.""" + # Connect to SQLite database + con = sqlite3.connect('stocks.db') + cursor = con.cursor() + cursor.execute("PRAGMA journal_mode = wal") + cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE symbol NOT LIKE '%.%'") + total_symbols = [row[0] for row in cursor.fetchall()] + con.close() + + # Process symbols with progress bar + for symbol in tqdm(['TSLA'], desc="Extracting data"): + data = await get_data(symbol) + print(data) + +if __name__ == "__main__": + loop = asyncio.get_event_loop() + loop.run_until_complete(run()) \ No newline at end of file diff --git a/app/cron_ai_score.py b/app/cron_ai_score.py index f09376b..0e2d0d8 100644 --- a/app/cron_ai_score.py +++ b/app/cron_ai_score.py @@ -283,8 +283,10 @@ async def warm_start_training(tickers, con, skip_downloading, save_data): predictor = ScorePredictor() selected_features = [col for col in df_train if col not in ['price', 'date', 'Target']] + predictor.warm_start_training(df_train[selected_features], df_train['Target']) - predictor.evaluate_model(df_test[selected_features], df_test['Target']) + predictor.evaluate_model(df_test) + return predictor async def fine_tune_and_evaluate(ticker, con, start_date, end_date, skip_downloading, save_data): @@ -299,20 +301,30 @@ async def fine_tune_and_evaluate(ticker, con, start_date, end_date, skip_downloa train_data = df.iloc[:split_size] test_data = df.iloc[split_size:] - selected_features = [col for col in df.columns if col not in ['date','price','Target']] + #selected_features = [col for col in df.columns if col not in ['date','price','Target']] + # Fine-tune the model predictor = ScorePredictor() #predictor.fine_tune_model(train_data[selected_features], train_data['Target']) - print(f"Evaluating fine-tuned model for {ticker}") - data = predictor.evaluate_model(test_data[selected_features], test_data['Target']) + data = predictor.evaluate_model(test_data) if (data['precision'] >= 50 and data['accuracy'] >= 50 and data['accuracy'] < 100 and data['precision'] < 100 and data['f1_score'] >= 50 and data['recall_score'] >= 50 and data['roc_auc_score'] >= 50): await save_json(ticker, data) + data['backtest'] = [ + {'date': entry['date'], 'yTest': entry['y_test'], 'yPred': entry['y_pred'], 'score': entry['score']} + for entry in data['backtest'] + ] + #print(data) print(f"Saved results for {ticker}") + else: + try: + os.remove(f"json/ai-score/companies/{ticker}.json") + except: + pass except Exception as e: print(f"Error processing {ticker}: {e}") @@ -336,7 +348,9 @@ async def run(): # Warm start training stock_symbols = cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 500E6 AND symbol NOT LIKE '%.%'") #list(set(['CB','LOW','PFE','RTX','DIS','MS','BHP','BAC','PG','BABA','ACN','TMO','LLY','XOM','JPM','UNH','COST','HD','ASML','BRK-A','BRK-B','CAT','TT','SAP','APH','CVS','NOG','DVN','COP','OXY','MRO','MU','AVGO','INTC','LRCX','PLD','AMT','JNJ','ACN','TSM','V','ORCL','MA','BAC','BA','NFLX','ADBE','IBM','GME','NKE','ANGO','PNW','SHEL','XOM','WMT','BUD','AMZN','PEP','AMD','NVDA','AWR','TM','AAPL','GOOGL','META','MSFT','LMT','TSLA','DOV','PG','KO'])) stock_symbols = [row[0] for row in cursor.fetchall()] - print('Training for:', stock_symbols) + #Test Mode + #stock_symbols = ['AAPL','TSLA'] + print('Training for:', len(stock_symbols)) predictor = await warm_start_training(stock_symbols, con, skip_downloading, save_data) #else: diff --git a/app/main.py b/app/main.py index f238a9a..b7cbf9f 100755 --- a/app/main.py +++ b/app/main.py @@ -4266,6 +4266,37 @@ async def get_data(data:TickerData, api_key: str = Security(get_api_key)): ) +@app.post("/ai-score") +async def get_data(data:TickerData, api_key: str = Security(get_api_key)): + ticker = data.ticker.upper() + cache_key = f"ai-score-{ticker}" + cached_result = redis_client.get(cache_key) + if cached_result: + return StreamingResponse( + io.BytesIO(cached_result), + media_type="application/json", + headers={"Content-Encoding": "gzip"} + ) + + try: + with open(f"json/ai-score/companies/{ticker}.json", 'rb') as file: + res = orjson.loads(file.read()) + except: + res = {} + + data = orjson.dumps(res) + compressed_data = gzip.compress(data) + + redis_client.set(cache_key, compressed_data) + redis_client.expire(cache_key,3600*3600) + + return StreamingResponse( + io.BytesIO(compressed_data), + media_type="application/json", + headers={"Content-Encoding": "gzip"} + ) + + async def fetch_data(client, endpoint, ticker): url = f"{API_URL}{endpoint}" try: diff --git a/app/ml_models/score_model.py b/app/ml_models/score_model.py index 691d86d..dc73ffa 100644 --- a/app/ml_models/score_model.py +++ b/app/ml_models/score_model.py @@ -36,7 +36,11 @@ class ScorePredictor: X = self.scaler.fit_transform(X) return X #self.pca.fit_transform(X) - def preprocess_test_data(self, X): + def preprocess_test_data(self, df): + selected_features = [col for col in df.columns if col not in ['date','price','Target']] + + X = df[selected_features] + X = np.where(np.isinf(X), np.nan, X) X = np.nan_to_num(X) X = self.scaler.fit_transform(X) @@ -61,8 +65,10 @@ class ScorePredictor: self.model.fit(X_train, y_train, epochs=100, batch_size=128, validation_split=0.1, callbacks=[early_stopping, reduce_lr]) print("Model fine-tuned (not saved).") - def evaluate_model(self, X_test, y_test): - X_test = self.preprocess_test_data(X_test) + def evaluate_model(self, df): + + X_test = self.preprocess_test_data(df) + y_test = df['Target'] with open(self.warm_start_model_path, 'rb') as f: self.model = pickle.load(f) @@ -85,8 +91,9 @@ class ScorePredictor: print(f"ROC AUC: {round(test_roc_auc_score * 100)}%") last_prediction_prob = class_1_probabilities[-1] - print(pd.DataFrame({'y_test': y_test, 'y_pred': binary_predictions})) - print(f"Last prediction probability: {last_prediction_prob}") + backtest_results = pd.DataFrame({'date': df['date'], 'y_test': y_test, 'y_pred': binary_predictions, 'score': class_1_probabilities}) + + print(f"Last prediction probability: {round(last_prediction_prob,2)}") thresholds = [0.8, 0.75, 0.7, 0.6, 0.5, 0.45, 0.4, 0.35, 0.3, 0] scores = [10, 9, 8, 7, 6, 5, 4, 3, 2, 1] @@ -97,13 +104,18 @@ class ScorePredictor: score = value break + conditions = [backtest_results['score'] >= t for t in thresholds] + backtest_results['score'] = np.select(conditions, scores, default=1) # Default score if no condition matches + + return { 'accuracy': round(test_accuracy * 100), 'precision': round(test_precision * 100), 'f1_score': round(test_f1_score * 100), 'recall_score': round(test_recall_score * 100), 'roc_auc_score': round(test_roc_auc_score * 100), - 'score': score + 'score': score, + 'backtest': backtest_results.to_dict(orient="records") } def feature_selection(self, X_train, y_train, k=100): print('Feature selection:')