From f885de8b23fbe82c0c0e22548ccbf151ab08b13b Mon Sep 17 00:00:00 2001 From: MuslemRahimi Date: Thu, 3 Oct 2024 00:12:20 +0200 Subject: [PATCH] update cron job --- app/cron_ai_score.py | 46 +++++++++--------- app/cron_congress_trading.py | 1 + .../__pycache__/score_model.cpython-310.pyc | Bin 3079 -> 3174 bytes app/ml_models/score_model.py | 12 ++++- app/restart_json.py | 1 + 5 files changed, 35 insertions(+), 25 deletions(-) diff --git a/app/cron_ai_score.py b/app/cron_ai_score.py index 7d277ef..950b422 100644 --- a/app/cron_ai_score.py +++ b/app/cron_ai_score.py @@ -46,13 +46,13 @@ async def download_data(ticker, con, start_date, end_date): statements = [ f"json/financial-statements/ratios/quarter/{ticker}.json", f"json/financial-statements/key-metrics/quarter/{ticker}.json", - #f"json/financial-statements/cash-flow-statement/quarter/{ticker}.json", - #f"json/financial-statements/income-statement/quarter/{ticker}.json", - #f"json/financial-statements/balance-sheet-statement/quarter/{ticker}.json", + f"json/financial-statements/cash-flow-statement/quarter/{ticker}.json", + f"json/financial-statements/income-statement/quarter/{ticker}.json", + f"json/financial-statements/balance-sheet-statement/quarter/{ticker}.json", f"json/financial-statements/income-statement-growth/quarter/{ticker}.json", f"json/financial-statements/balance-sheet-statement-growth/quarter/{ticker}.json", f"json/financial-statements/cash-flow-statement-growth/quarter/{ticker}.json", - #f"json/financial-statements/owner-earnings/quarter/{ticker}.json", + f"json/financial-statements/owner-earnings/quarter/{ticker}.json", ] # Helper function to load JSON data asynchronously @@ -81,34 +81,34 @@ async def download_data(ticker, con, start_date, end_date): key_metrics = await filter_data(key_metrics, ignore_keys) - #cashflow = await load_json_from_file(statements[2]) - #cashflow = await filter_data(cashflow, ignore_keys) + cashflow = await load_json_from_file(statements[2]) + cashflow = await filter_data(cashflow, ignore_keys) - #income = await load_json_from_file(statements[3]) - #income = await filter_data(income, ignore_keys) + income = await load_json_from_file(statements[3]) + income = await filter_data(income, ignore_keys) - #balance = await load_json_from_file(statements[4]) - #balance = await filter_data(balance, ignore_keys) + balance = await load_json_from_file(statements[4]) + balance = await filter_data(balance, ignore_keys) - income_growth = await load_json_from_file(statements[2]) + income_growth = await load_json_from_file(statements[5]) income_growth = await filter_data(income_growth, ignore_keys) - balance_growth = await load_json_from_file(statements[3]) + balance_growth = await load_json_from_file(statements[6]) balance_growth = await filter_data(balance_growth, ignore_keys) - cashflow_growth = await load_json_from_file(statements[4]) + cashflow_growth = await load_json_from_file(statements[7]) cashflow_growth = await filter_data(cashflow_growth, ignore_keys) - #owner_earnings = await load_json_from_file(statements[8]) - #owner_earnings = await filter_data(owner_earnings, ignore_keys) + owner_earnings = await load_json_from_file(statements[8]) + owner_earnings = await filter_data(owner_earnings, ignore_keys) # Combine all the data combined_data = defaultdict(dict) # Merge the data based on 'date' - for entries in zip(ratios, key_metrics,income_growth, balance_growth, cashflow_growth): + for entries in zip(ratios,key_metrics,income, balance, cashflow, owner_earnings, income_growth, balance_growth, cashflow_growth): for entry in entries: date = entry['date'] for key, value in entry.items(): @@ -223,7 +223,7 @@ async def download_data(ticker, con, start_date, end_date): # Convert combined data into a DataFrame df_combined = pd.DataFrame(combined_data).dropna() - ''' + key_elements = [ 'revenue', 'costOfRevenue', @@ -288,7 +288,7 @@ async def download_data(ticker, con, start_date, end_date): # Add all new columns to the original DataFrame at once df_combined = pd.concat([df_combined, pd.DataFrame(new_columns)], axis=1) - ''' + # To defragment the DataFrame, make a copy df_combined = df_combined.copy() @@ -389,7 +389,7 @@ async def fine_tune_and_evaluate(ticker, con, start_date, end_date): data = predictor.evaluate_model(test_data[selected_features], test_data['Target']) if len(data) != 0: - if data['precision'] >= 60 and data['accuracy'] >= 60 and data['accuracy'] < 100 and data['precision'] < 100: + if data['precision'] >= 50 and data['accuracy'] >= 50 and data['accuracy'] < 100 and data['precision'] < 100: res = {'score': data['score']} await save_json(ticker, res) print(f"Saved results for {ticker}") @@ -409,14 +409,14 @@ async def run(): if train_mode: # Warm start training - cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 300E9 AND symbol NOT LIKE '%.%' AND symbol NOT LIKE '%-%'") + cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 10E9 AND symbol NOT LIKE '%.%' AND symbol NOT LIKE '%-%'") warm_start_symbols = [row[0] for row in cursor.fetchall()] print('Warm Start Training for:', warm_start_symbols) predictor = await warm_start_training(warm_start_symbols, con) else: # Fine-tuning and evaluation for all stocks cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 1E9 AND symbol NOT LIKE '%.%'") - stock_symbols = ['GME'] #[row[0] for row in cursor.fetchall()] + stock_symbols = [row[0] for row in cursor.fetchall()] print(f"Total tickers for fine-tuning: {len(stock_symbols)}") start_date = datetime(1995, 1, 1).strftime("%Y-%m-%d") @@ -424,9 +424,7 @@ async def run(): tasks = [] for ticker in tqdm(stock_symbols): await fine_tune_and_evaluate(ticker, con, start_date, end_date) - - #await asyncio.gather(*tasks) - + con.close() if __name__ == "__main__": diff --git a/app/cron_congress_trading.py b/app/cron_congress_trading.py index 906cabe..8c40503 100755 --- a/app/cron_congress_trading.py +++ b/app/cron_congress_trading.py @@ -117,6 +117,7 @@ def replace_representative(office): 'James E Hon Banks': 'Jim Banks', 'Michael F. Q. San Nicolas': 'Michael San Nicolas', 'Barbara J Honorable Comstock': 'Barbara Comstock', + 'Darin McKay LaHood': 'Darin LaHood', 'Mr ': '', 'Mr. ': '', 'Dr ': '', diff --git a/app/ml_models/__pycache__/score_model.cpython-310.pyc b/app/ml_models/__pycache__/score_model.cpython-310.pyc index 170f05e04154dfac118e03c1b8ac01b23fe4eec2..2d30ef18b901dbc280b01b7d264eea092cbe927f 100644 GIT binary patch delta 801 zcmZva&1=*^6u@_qY&OYux3zv14{LX=#Vw`9trZn&1TXbaDHNfC5@tKI+r@nOG6e;p zhzCItggNQ|(1UpL?8So@K|JIn2-W=yL`3kt>_MfzVSagepW(ecp4y+PC$*YaLRq`{ ztG8NvRridQD!yLDn=K$z=~3xHDPV#j4P^>Ln8FfeJT{(+aD@BJMV}=~8>P0V*No-e zcbGV|5&f4hNZ$BgN6OG2GV;w4sBhcN$KpU{Y7lXiK$g1^zZVFZs_ncMN)GWL?gzk? z%qO;zFym1g%Ak8+ft0zM?Q}<$+y&VWc$jYU+yfW&kVjp?^;=_uwe)dwy>SA&Im~9p zf6mx0*1fsS9O|r60XyJ2&Q)C%WL8g#w$t(*5SG=WtYp*j; zLDS?nu}*HMgRMKE%q_7KrMZ=)GKQ1nEvGPuRojH7Zj_Hz|EjJ1@)SFxH_PvD7sbv} zMaqFiO0>d+i=qS2dZ2@7Tg6Ah5*f}>6zd)M+}xcFRGhkAGs>~Zh zp)F^&1&1)Xj~C^e-|rQ&@AsDq`QpPRz3wb796|TcB{+l%0wzc+6D)#BFc8>a&snuc zbJ0JYIo8p$ZnIuY)!qyg?jBb34fhGVra!y?j)HlI*#&*n+r2VFrHUfUq+KN3BK(}|x8 delta 738 zcmZXS&ubJh6vrp?t26tnwnABLZ55Wqsq0o;SMlO1z4YKgSSpJk!_Z{L!TCj!RS$xQ z6hRS$ob>2jMD*sr;L+1umE!yng6~ZQp?#1~lJ}BtUS4w0ecA2g?RG;0yS@7-w3?qg z4Sl=-(`{&?50Kgm?WyJyN`SgFs7?)PQVVFmvA}0iZK`<-YF8iMqOhStxynLp0z9j1 zWj+%!P6L^9QMzgH#HXx~56Y!9&U_K1MZ$zHiX@h5*_e_w6&iOMHf(l?O}a$=Fy7wi z!nACMvGgSmG7;uH^>n_9UNYlr&>!LqdKL@L3ZCyVA$%IhpsPNZA1~u|_-TwApi|mK zy!S9+rAg;$QJQ(dGJXo9R>9*;dIn!r&#jf-5$%(Y1D^Uq2E0xs&f+XuSKqAf_iOzN zcnHdH54FrB4`_WTKab7R&RFvCjYi diff --git a/app/ml_models/score_model.py b/app/ml_models/score_model.py index 3318a4b..2edf1b7 100644 --- a/app/ml_models/score_model.py +++ b/app/ml_models/score_model.py @@ -19,7 +19,17 @@ class ScorePredictor: def __init__(self): self.scaler = MinMaxScaler() self.warm_start_model_path = 'ml_models/weights/ai-score/warm_start_weights.pkl' - self.model = XGBClassifier(n_estimators=100, max_depth = 10, min_samples_split=5, random_state=42, n_jobs=10) + self.model = XGBClassifier( + n_estimators=200, # Increased from 100 due to problem complexity + max_depth=6, # Reduced to prevent overfitting with many features + learning_rate=0.1, # Added to control the learning process + colsample_bytree=0.8, # Added to randomly sample columns for each tree + subsample=0.8, # Added to randomly sample training data + reg_alpha=1, # L1 regularization to handle many features + reg_lambda=1, # L2 regularization to handle many features + random_state=42, + n_jobs=10 + ) def preprocess_data(self, X): X = np.where(np.isinf(X), np.nan, X) diff --git a/app/restart_json.py b/app/restart_json.py index a7846f1..4e0c556 100755 --- a/app/restart_json.py +++ b/app/restart_json.py @@ -1275,6 +1275,7 @@ def replace_representative(office): 'James E Hon Banks': 'Jim Banks', 'Michael F. Q. San Nicolas': 'Michael San Nicolas', 'Barbara J Honorable Comstock': 'Barbara Comstock', + 'Darin McKay LaHood': 'Darin LaHood', 'Mr ': '', 'Mr. ': '', 'Dr ': '',