From 99e00425ac6d1411cbb7691a5f2f7fd5bae3a38c Mon Sep 17 00:00:00 2001
From: MuslemRahimi <moslem_rahimi@hotmail.de>
Date: Sat, 5 Oct 2024 11:23:48 +0200
Subject: [PATCH] bugfixing options cron job

---
 app/cron_ai_score.py                          |  40 +++++++-------
 app/cron_historical_price.py                  |   3 +-
 app/cron_options_bubble.py                    |   8 +--
 app/cron_options_gex.py                       |  13 +++--
 .../__pycache__/score_model.cpython-310.pyc   | Bin 3028 -> 3758 bytes
 app/ml_models/score_model.py                  |  49 ++++++++++++++----
 6 files changed, 70 insertions(+), 43 deletions(-)

diff --git a/app/cron_ai_score.py b/app/cron_ai_score.py
index 510359f..fb747ef 100644
--- a/app/cron_ai_score.py
+++ b/app/cron_ai_score.py
@@ -77,14 +77,14 @@ def top_uncorrelated_features(df, target_col='Target', top_n=10, threshold=0.75)
             selected_features.append(feature)
     return selected_features
 
-async def download_data(ticker, con, start_date, end_date):
+async def download_data(ticker, con, start_date, end_date, skip_downloading):
 
     file_path = f"ml_models/training_data/ai-score/{ticker}.json"
 
     if os.path.exists(file_path):
         with open(file_path, 'rb') as file:
             return pd.DataFrame(orjson.loads(file.read()))
-    else:
+    elif skip_downloading == False:
 
         try:
             # Define paths to the statement files
@@ -213,7 +213,7 @@ async def download_data(ticker, con, start_date, end_date):
             combined_data = sorted(combined_data, key=lambda x: x['date'])
             # Convert combined data into a DataFrame
             df_combined = pd.DataFrame(combined_data).dropna()
-            '''
+            
             fundamental_columns = [
                 'revenue',
                 'costOfRevenue',
@@ -262,7 +262,7 @@ async def download_data(ticker, con, start_date, end_date):
             # Compute ratios for all combinations of key elements
             new_columns = {}
             # Loop over combinations of column pairs
-            for columns in [fundamental_columns]:
+            for columns in [fundamental_columns, stats_columns, ta_columns]:
                 for num, denom in combinations(columns, 2):
                     # Compute ratio and reverse ratio
                     ratio = df_combined[num] / df_combined[denom]
@@ -278,7 +278,7 @@ async def download_data(ticker, con, start_date, end_date):
 
                 # Add all new columns to the original DataFrame at once
             df_combined = pd.concat([df_combined, pd.DataFrame(new_columns)], axis=1)
-            '''
+            
             # To defragment the DataFrame, make a copy
             df_combined = df_combined.copy()
             df_combined = df_combined.dropna()
@@ -301,7 +301,7 @@ async def download_data(ticker, con, start_date, end_date):
             pass
 
 
-async def chunked_gather(tickers, con, start_date, end_date, chunk_size=10):
+async def chunked_gather(tickers, con, start_date, end_date, skip_downloading, chunk_size=10):
     # Helper function to divide the tickers into chunks
     def chunks(lst, size):
         for i in range(0, len(lst), size):
@@ -309,9 +309,9 @@ async def chunked_gather(tickers, con, start_date, end_date, chunk_size=10):
     
     results = []
     
-    for chunk in chunks(tickers, chunk_size):
+    for chunk in tqdm(chunks(tickers, chunk_size)):
         # Create tasks for each chunk
-        tasks = [download_data(ticker, con, start_date, end_date) for ticker in chunk]
+        tasks = [download_data(ticker, con, start_date, end_date, skip_downloading) for ticker in chunk]
         # Await the results for the current chunk
         chunk_results = await asyncio.gather(*tasks)
         # Accumulate the results
@@ -319,14 +319,14 @@ async def chunked_gather(tickers, con, start_date, end_date, chunk_size=10):
     
     return results
 
-async def warm_start_training(tickers, con):
+async def warm_start_training(tickers, con, skip_downloading):
     start_date = datetime(1995, 1, 1).strftime("%Y-%m-%d")
     end_date = datetime.today().strftime("%Y-%m-%d")
     df_train = pd.DataFrame()
     df_test = pd.DataFrame()
     test_size = 0.2
 
-    dfs = await chunked_gather(tickers, con, start_date, end_date, chunk_size=10)
+    dfs = await chunked_gather(tickers, con, start_date, end_date, skip_downloading, chunk_size=10)
 
     train_list = []
     test_list = []
@@ -359,9 +359,9 @@ async def warm_start_training(tickers, con):
 
     return predictor
 
-async def fine_tune_and_evaluate(ticker, con, start_date, end_date):
+async def fine_tune_and_evaluate(ticker, con, start_date, end_date, skip_downloading):
     try:
-        df = await download_data(ticker,con, start_date, end_date)
+        df = await download_data(ticker,con, start_date, end_date, skip_downloading)
         if df is None or len(df) == 0:
             print(f"No data available for {ticker}")
             return
@@ -371,7 +371,7 @@ async def fine_tune_and_evaluate(ticker, con, start_date, end_date):
         train_data = df.iloc[:split_size]
         test_data = df.iloc[split_size:]
         
-        selected_features = top_uncorrelated_features(train_data,top_n=50) #[col for col in train_data if col not in ['price', 'date', 'Target']] #top_uncorrelated_features(train_data,top_n=20)
+        selected_features = [col for col in train_data if col not in ['price', 'date', 'Target']] #top_uncorrelated_features(train_data,top_n=20)
         # Fine-tune the model
         predictor = ScorePredictor()
         predictor.fine_tune_model(train_data[selected_features], train_data['Target'])
@@ -380,9 +380,8 @@ async def fine_tune_and_evaluate(ticker, con, start_date, end_date):
         data = predictor.evaluate_model(test_data[selected_features], test_data['Target'])
         
         if len(data) != 0:
-            if data['precision'] >= 50 and data['accuracy'] >= 50 and data['accuracy'] < 100 and data['precision'] < 100:
-                res = {'score': data['score']}
-                await save_json(ticker, res)
+            if data['precision'] >= 50 and data['accuracy'] >= 50 and data['accuracy'] < 100 and data['precision'] < 100 and data['f1_score'] > 50 and data['recall_score'] > 50 and data['roc_auc_score'] > 50:
+                await save_json(ticker, data)
                 print(f"Saved results for {ticker}")
         gc.collect()
     except Exception as e:
@@ -394,16 +393,17 @@ async def fine_tune_and_evaluate(ticker, con, start_date, end_date):
 
 async def run():
     train_mode = True  # Set this to False for fine-tuning and evaluation
+    skip_downloading = False
     con = sqlite3.connect('stocks.db')
     cursor = con.cursor()
     cursor.execute("PRAGMA journal_mode = wal")
     
     if train_mode:
         # Warm start training
-        cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 1E9 AND symbol NOT LIKE '%.%' AND symbol NOT LIKE '%-%'")
+        cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 500E6 AND symbol NOT LIKE '%.%' AND symbol NOT LIKE '%-%'")
         warm_start_symbols = [row[0] for row in cursor.fetchall()]
-        print('Warm Start Training for:', warm_start_symbols)
-        predictor = await warm_start_training(warm_start_symbols, con)
+        print('Warm Start Training')
+        predictor = await warm_start_training(warm_start_symbols, con, skip_downloading)
     else:
         # Fine-tuning and evaluation for all stocks
         cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 1E9 AND symbol NOT LIKE '%.%'")
@@ -416,7 +416,7 @@ async def run():
 
         tasks = []
         for ticker in tqdm(stock_symbols):
-            await fine_tune_and_evaluate(ticker, con, start_date, end_date)
+            await fine_tune_and_evaluate(ticker, con, start_date, end_date, skip_downloading)
             
     con.close()
 
diff --git a/app/cron_historical_price.py b/app/cron_historical_price.py
index c5244d1..1f1c9ac 100755
--- a/app/cron_historical_price.py
+++ b/app/cron_historical_price.py
@@ -84,6 +84,7 @@ async def get_historical_data(ticker, query_con, session):
 
 async def run():
     total_symbols = []
+    chunk_size = 400
     try:
         cursor = con.cursor()
         cursor.execute("PRAGMA journal_mode = wal")
@@ -130,8 +131,6 @@ try:
     start_date_max = datetime(1970, 1, 1).strftime("%Y-%m-%d")
     end_date = end_date.strftime("%Y-%m-%d")
 
-
-    chunk_size = 400
     asyncio.run(run())
     con.close()
     etf_con.close()
diff --git a/app/cron_options_bubble.py b/app/cron_options_bubble.py
index 6093c4c..10bd09c 100755
--- a/app/cron_options_bubble.py
+++ b/app/cron_options_bubble.py
@@ -42,7 +42,7 @@ def options_bubble_data(chunk):
         start_date_str = start_date.strftime('%Y-%m-%d')
 
         res_list = []
-        for page in range(0, 500):
+        for page in range(0, 5000):
             try:
                 data = fin.options_activity(company_tickers=company_tickers, page=page, pagesize=1000, date_from=start_date_str, date_to=end_date_str)
                 data = ujson.loads(fin.output(data))['option_activity']
@@ -129,11 +129,11 @@ async def main():
 
         print(len(total_symbols))
 
-        chunk_size = len(total_symbols) // 1000  # Divide the list into N chunks
+        chunk_size = len(total_symbols) // 2000  # Divide the list into N chunks
         chunks = [total_symbols[i:i + chunk_size] for i in range(0, len(total_symbols), chunk_size)]
-
+        print(chunks)
         loop = asyncio.get_running_loop()
-        with ThreadPoolExecutor(max_workers=2) as executor:
+        with ThreadPoolExecutor(max_workers=4) as executor:
             tasks = [loop.run_in_executor(executor, options_bubble_data, chunk) for chunk in chunks]
             for f in tqdm(asyncio.as_completed(tasks), total=len(tasks)):
                 await f
diff --git a/app/cron_options_gex.py b/app/cron_options_gex.py
index a56b6d0..a5fbf6b 100644
--- a/app/cron_options_gex.py
+++ b/app/cron_options_gex.py
@@ -335,16 +335,14 @@ def get_options_chain(option_data_list):
 
 def get_data(ticker):
     res_list = []
-    page = 0
-    while True:
+
+    for page in range(0,5000):
         try:
             data = fin.options_activity(date_from=start_date_str, date_to=end_date_str, company_tickers=ticker, page=page, pagesize=1000)
             data = ujson.loads(fin.output(data))['option_activity']
             filtered_data = [{key: value for key, value in item.items() if key not in ['description_extended', 'updated']} for item in data]
             res_list += filtered_data
-            page += 1
-        except Exception as e:
-            print(f"Error retrieving data for {ticker}: {e}")
+        except:
             break
     return res_list
 
@@ -369,7 +367,7 @@ etf_cursor.execute("PRAGMA journal_mode = wal")
 etf_cursor.execute("SELECT DISTINCT symbol FROM etfs")
 etf_symbols = [row[0] for row in etf_cursor.fetchall()]
 
-total_symbols = stock_symbols + etf_symbols
+total_symbols = ['SPY'] #stock_symbols + etf_symbols
 
 query_template = """
     SELECT date, close,change_percent
@@ -385,7 +383,8 @@ for ticker in total_symbols:
         df_price = df_price.rename(columns={"change_percent": "changesPercentage"})
 
         volatility = calculate_volatility(df_price)
-
+        print(df_price)
+        print(volatility)
         ticker_data = get_data(ticker)
         # Group ticker_data by 'date' and collect all items for each date
         grouped_history = defaultdict(list)
diff --git a/app/ml_models/__pycache__/score_model.cpython-310.pyc b/app/ml_models/__pycache__/score_model.cpython-310.pyc
index 6b754b6d96b669d46787cfcd7dc46adb7a8c825d..0974412fbd2d2519905c40c315c69ffcf09a0e68 100644
GIT binary patch
literal 3758
zcma)9ON<;x8SeM=ym!~Tey=m+0UAMeS%rXP4C_Z^U$RSNgJN1htEa1Hw!QrrSNF!A
zVdlb`W5f|b63Hf~T)1=P3@0QcN*q9ufH_CvkVu5@ukP8$U=TIxzyA8`^}p+%wArjF
zXz@Rl!Tn`L`IrVLn~uSCeA#aSn8MUZaa7rBj*ec7jM#9D*mO*_n75n?=z3(wRi`Rx
zMpTRIPCafojkxJF<CfEk+fG}~nbA_b>?}*%idNz?&YAL@Rc8(7RHC!-S?8>z+0nW9
zymP*szu;Vud|Y%c%KnmbN%ohW%d%g0*6%2+%4$y(Ry)+3D@?tobn7$7q3m`wVKXle
z@-PlWl^_eE-0P~MdH>GMTalM#VLuFb*A(p`5Bx9-)5OjEln0`Lu@^<<$nI~ISd*u|
z>mB&zu;uyw0r&h-F~-r2oiN$)9^b=moU97{ty?$H8TmsNqp!1|?;S)r3;o<DE>AWM
zgX{RRYXE_?fuk{%X-^c`L1zYfgPF`iZ?X!r(OZS|ut%LWFt%9}8fb~uJ(A-s9?(rG
z_f;G@Z-iIy-NKiB4Iozzm5Ji2xxDRN6*%q#G;;HS#pIoIAFIA=f(qN?j@s_b#9op)
zas4zNrb&?G*?23C+&G1*v&~0AIM~axO)q>^s&(^`$746kJ)XPe+{SP}3Ol=sGWhZ8
zukZYI{A7E!ic@FX0E1utg4%DkVIkZRWU!$JVKUK-0*@zQGH^KzEUK~h*k!>m-xIau
zAa*hPPLPQuKaDak9!7!N8|6F*x;Cds5LPD5!|OEAhrTD)PH-lfcZVKM)1yI8wBEdP
zbLZGygee!0QL`ZGf28miaLlLIHuusv*o?Wi7slacmZ$!H0xmaup1&U?Y||SKH;+{#
z^-<_)V>lAF>xM~~yDkZzodvMfhH9yXsvZ68&y33W+-bXR%=h`^Jo834*@g`K$yWh#
z<*ABKo2bvwKhV+ZQ=%9|nZn=?%%@g~QwXW!*Wo)uczcj#ot%5%E*W$XPF|-s>X80u
z>?N!NxuOvb;OQIPiqMjwFdyv&h$0gqmGp(4cnJ*SC2pR&un#{EmPE4;d2w=*^-~^y
zr12|6Rqso67NA3vHN<Ag2rUm2m$>L+>JuB_tX6}j40ZhSf@Yq(?Z2vtzlgIS_SQ+U
z;c@?u$owVj<u4O>g}@gHe2IV*w0a~YC$1OeTRq7rZj|DY(fC&Y#xMO3vF59N9V_RJ
za0?$9UuKet@>Dr8vr0}d(Vl6llIt+P@<7cI(+{lokoOM@dWC6+DrI0eg1#%XN4@d6
zw-LD=8Mz(l{+$dV#x@X9U&cLr9YE-?sIZ2izaIs{U<dI~80j!b_(hsEhdfO3u3p%=
za=$R9sEi6^Q7vaqMC9E6zCj|9(Q9f;JrA>wUtG}6b2gn)j)Bc|fK<+pfE+6)rzp@0
zHT<kK)unp$XOs-hx1kIJze8PB5_?O@)*nGZ^!q-(dGwTOT~r_MjhA*PA$Iy<61<up
zBmtZ2?KQ9<)x)=d2_s59CY8yh2T*eE8d78Ybz1s$TBY_EmDQ$`T{^i)Dc{1ftN~C{
zm!PW4pSN@<>&k+%7B-zy6zYWIH{vIFF^&~QK2xNUsXn3lK=~=&jlOn4nHm#=Y5V3=
zJvWe6On8<yF%Pw=m0KlN$txvh=XQxz^J<CJ@*300_s|=8ePS_lRXJ2)6zhXIy#jjW
zIdmIz`#JO~=+)=YYoOQulb$!8Hi;Wne_wm15-$UVHGZW|>*$;48<4RDS({9mw!mk5
z(tJ<(Kt@d7n$#zaL+uUahm9X-)An>}(wwySP5$9zY1)G8s1JXB%ssGDK42y1z)D_$
zmE28QZ0UUqvADcDqo@1t_|X2cJzoAUWvF{W-q{Iq9{SnUaUJ=qsAR5oW(><dRbLs`
zZj@!p)y}wfd#fY;0C?-Jta6UV?YrN()w%J#TXO;`C=20<OQ=VlV%=K#D9^SBk_+V@
z+k?Jrf7>3sCfmo`ocwR_``=Of^LDx8PxFO;oG<*tHYa(ywHdw2vnl}!&KVUaGs<DS
zO&A63jMCbS9yU(Z(d=`y@%oz{<R`vR<e+#1n)bY27=`)hYNu-ndtRT3DpnPG;dDBe
zr_*J^%&1c8w)qCpWg=^qM?z$rCoJ4D8U{i~{^Inma$<Gi98R$+Or9PjOlU(Us&C+#
zy3IWlpZYi)c9)C9XCd)Hq5L{6Um{d0wh$l_Yd+Pb?pATN6eUC{v(gI_j~6zWFKx{?
z7jlWF93M$s438?;WjTAQtX(Ncrz+Z7zQ==XFO676WfC-#iB+(4l1s@O-ys32N6Urz
zju#ySi<zkv5E(^Uk(h2{Q+5*okG4jssfOpf_}SWd<n-dVs@W=jnt>H7zzr&d{#7&P
zYc<vSY`ngZ&Q9O7+pSVnl|cWlD{3y?e}GER6%E&Yc;H3F4F3X{Dc&$?e|lp0HwpLz
zsI)r)Iy7tnoWYk7Vxv&A*X#;@R--ECx-STOgxu;Sejv0_A5S9cBroH4X}5G?a%N$o
z6dI0X@gORX2fZ|fTad4dc7_)e1@1;%gmvL`(q*OdlHW=n6l?R9@Yu3QLMc{Mxkqy>
zkmY$6Qj8QuuN_fkKIp~#5}id)g{Y%kjsh7-8E+D$;$@@64^tpvx|ioe1XP&z!zjqg
s_lo{iNuf_DN$RXa`|WFm(!Wh%o>7HG>RN(n8|r0z4Z<{n^WTd0Z!%8z6#xJL

delta 1395
zcmZuxOKjXk7@iqF)@yq=VY}H!9tDXK*ewZ=&>#?kLo0y<RF#5IwE`K}Gn<%t?Pcr$
zsl19%dP5w-91ygX0CC~c3#XpABIpH)lMkHPE8>EX0{`z7O4VY^-^_k9^S|c%_x8%K
z?RrwH*#w^L2HEL2NA+cT`{?ZOjmDrs2`9V~of<U<&2mnog~0-FG-?f6<=7sy%ds=)
zlw)_$eT8s~SFaOZJ!FGLPPa+VKKkPmb)Bx$XVv%goDPhu{-pi2RdAb;Ly{AZatg>Y
zjWe!sog0AWrv`93Cvc#0QN?WYVEQ8*nPR*<Uw(nlotLM5_|-ECH}*uDg(E*pWLi{5
z{zo1c<7~I6MZ%YH81Hz}&qU!$Kjz8EOEdgs#@>6$m9%F_G+LNx;739h3m^Kj+#)k?
zlyDJwV?W!KZ5=lmCDWsYo{DJr9g&ObiuT%41J+Umi<*?(^SXblyV?d#)j!%6y{IC6
z^->G%)P`Z^We`p}OysD?WEWX_SuVmjgC%6<#!`%B5{NYQxS#neK&BQzi`vwsI=yyU
z{a`Gr-;9qp&fw7wp3=a2VUEM#Y9tDs?~TTVo{U8-+qi0sWf*5YO?K6h**bHd$5Z#D
z4*`Z)y+_s0=H0Ng-$N&8Mm~<V^eFLp=~Uv4%JM?=G8=goi6?Ov9X^oPFpPzl?E%ax
z3foc_z^3!;Gwa-39p)x{=W*efs0-HX2V6U(lw=jIXI5@<qe%|w4Z_V^|M3;zD-ZHn
z^(#APxxuZQ>}!fPc0i)v(1``34P!Oexzi;Ro0EwH_TAjRL2j|qo0D5P$oT?!=Q;On
zHgPAl+|J#rh8*X$i9<<D-{&W8PXfywUc0G*eCz6`>Us5o{XE@NnZ5p$oP(66RRp}q
zX$1jOof-%zv#Ex_0OW@H#eQN{D=a?<_M{)|7gdl%5T;=g7e*Q+QuJIoyLz?m^l4x1
zIuFm+XM!>>dl__w+)|%A@2WMoe}MHYs-S0)?j{j0j1T;1Puzb!b%8WA$?SGE;ZXWC
z0BDZQ(>k@8O)b`;9onQ8zDx(U!7NauPT8@o+t38&@wJt0jPQ~aJPe>Y*VXUtJhjwc
z?qdhbm|hw98qS_Ucnje~kmU^fK-K^rg0Iww1#M<ImT8%;U9R+&3Tqz}<;Q_2*!~c-
z4~_1p@>M)r))6aO7%{ZMzQiUf)VH<mrgU)X1W6Q$AcK;o(p6jaN8X%?8~##6>EQoj
hDy$blx_gm$iGq>zGynz$=|N*I!{;)0R-6(W>|an<O8Ni*

diff --git a/app/ml_models/score_model.py b/app/ml_models/score_model.py
index a774ca5..f3ef114 100644
--- a/app/ml_models/score_model.py
+++ b/app/ml_models/score_model.py
@@ -5,6 +5,8 @@ import numpy as np
 from xgboost import XGBClassifier
 from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, accuracy_score
 from sklearn.preprocessing import MinMaxScaler
+from sklearn.decomposition import PCA  # Import PCA
+import lightgbm as lgb
 
 from tqdm import tqdm
 from collections import defaultdict
@@ -17,30 +19,47 @@ import time
 class ScorePredictor:
     def __init__(self):
         self.scaler = MinMaxScaler()
+        self.pca = PCA(n_components=5)
         self.warm_start_model_path = 'ml_models/weights/ai-score/warm_start_weights.pkl'
-        self.model = XGBClassifier(
+        self.model = lgb.LGBMClassifier(
+            n_estimators=20_000,  # If you want to use a larger model we've found 20_000 trees to be better
+            learning_rate=0.01, # and a learning rate of 0.001
+            max_depth=20, # and max_depth=6
+            num_leaves=2**6-1, # and num_leaves of 2**6-1
+            colsample_bytree=0.1
+        )
+        '''
+        XGBClassifier(
             n_estimators=200,
-            max_depth=10,
+            max_depth=5,
             learning_rate=0.1,
             random_state=42,
             n_jobs=10
         )
+        '''
 
-    def preprocess_data(self, X):
+    def preprocess_train_data(self, X):
+        """Preprocess training data by scaling and applying PCA."""
         X = np.where(np.isinf(X), np.nan, X)
         X = np.nan_to_num(X)
-        X = self.scaler.fit_transform(X)
-        return X
+        X = self.scaler.fit_transform(X)  # Transform using the fitted scaler
+        return X#self.pca.fit_transform(X)  # Fit PCA and transform
+
+    def preprocess_test_data(self, X):
+        """Preprocess test data by scaling and applying PCA."""
+        X = np.where(np.isinf(X), np.nan, X)
+        X = np.nan_to_num(X)
+        X = self.scaler.transform(X)  # Transform using the fitted scaler
+        return X#self.pca.transform(X)  # Transform using the fitted PCA
 
     def warm_start_training(self, X_train, y_train):
-        X_train = self.preprocess_data(X_train)
+        X_train = self.preprocess_train_data(X_train)
         self.model.fit(X_train, y_train)
         pickle.dump(self.model, open(f'{self.warm_start_model_path}', 'wb'))
         print("Warm start model saved.")
 
     def fine_tune_model(self, X_train, y_train):
-        X_train = self.preprocess_data(X_train)
-
+        X_train = self.preprocess_train_data(X_train)
         with open(f'{self.warm_start_model_path}', 'rb') as f:
             self.model = pickle.load(f)
 
@@ -49,7 +68,7 @@ class ScorePredictor:
 
 
     def evaluate_model(self, X_test, y_test):
-        X_test = self.preprocess_data(X_test)
+        X_test = self.preprocess_test_data(X_test)
         
         test_predictions = self.model.predict_proba(X_test)
         class_1_probabilities = test_predictions[:, 1]
@@ -57,10 +76,17 @@ class ScorePredictor:
         #print(test_predictions)
         test_precision = precision_score(y_test, binary_predictions)
         test_accuracy = accuracy_score(y_test, binary_predictions)
+        test_f1_score = f1_score(y_test, binary_predictions)
+        test_recall_score = recall_score(y_test, binary_predictions)
+        test_roc_auc_score = roc_auc_score(y_test, binary_predictions)
         
         print("Test Set Metrics:")
         print(f"Precision: {round(test_precision * 100)}%")
         print(f"Accuracy: {round(test_accuracy * 100)}%")
+        print(f"F1 Score: {round(test_f1_score * 100)}%")
+        print(f"Recall Score: {round(test_recall_score * 100)}%")
+        print(f"ROC AUC Score: {round(test_roc_auc_score * 100)}%")
+        
         print(pd.DataFrame({'y_test': y_test, 'y_pred': binary_predictions}))
         thresholds = [0.8, 0.75, 0.7, 0.6, 0.5, 0.45, 0.4, 0.35, 0.3, 0]
         scores = [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
@@ -75,5 +101,8 @@ class ScorePredictor:
                 break
 
         return {'accuracy': round(test_accuracy * 100), 
-                'precision': round(test_precision * 100), 
+                'precision': round(test_precision * 100),
+                'f1_score': round(test_f1_score * 100),
+                'recall_score': round(test_recall_score * 100),
+                'roc_auc_score': round(test_roc_auc_score * 100),
                 'score': score}
\ No newline at end of file