From 01e802047a62d0e9fe6cfe9a88cf67f0ad6551d6 Mon Sep 17 00:00:00 2001 From: MuslemRahimi Date: Mon, 17 Feb 2025 14:48:22 +0100 Subject: [PATCH] update price prediction model --- app/cron_price_analysis.py | 40 ++--------- app/main.py | 31 +++++++++ app/ml_models/prophet_model.py | 124 +++++++++++++++++++++------------ 3 files changed, 117 insertions(+), 78 deletions(-) diff --git a/app/cron_price_analysis.py b/app/cron_price_analysis.py index 7ed4385..6e8f3b0 100755 --- a/app/cron_price_analysis.py +++ b/app/cron_price_analysis.py @@ -11,15 +11,6 @@ import concurrent.futures def convert_symbols(symbol_list): - """ - Converts the symbols in the given list from 'BTCUSD' and 'USDTUSD' format to 'BTC-USD' and 'USDT-USD' format. - - Args: - symbol_list (list): A list of strings representing the symbols to be converted. - - Returns: - list: A new list with the symbols converted to the desired format. - """ converted_symbols = [] for symbol in symbol_list: # Determine the base and quote currencies @@ -53,13 +44,10 @@ async def download_data(ticker, start_date, end_date): except Exception as e: print(e) -async def process_symbol(ticker, start_date, end_date, crypto_symbols): +async def process_symbol(ticker, start_date, end_date): try: df = await download_data(ticker, start_date, end_date) data = PricePredictor().run(df) - - if ticker in crypto_symbols: - ticker = ticker.replace('-','') #convert back from BTC-USD to BTCUSD await save_json(ticker, data) except Exception as e: @@ -68,34 +56,16 @@ async def process_symbol(ticker, start_date, end_date, crypto_symbols): async def run(): con = sqlite3.connect('stocks.db') - etf_con = sqlite3.connect('etf.db') - crypto_con = sqlite3.connect('crypto.db') - + cursor = con.cursor() cursor.execute("PRAGMA journal_mode = wal") - #cursor.execute("SELECT DISTINCT symbol FROM stocks") - #cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap > 10E9 AND symbol NOT LIKE '%.%'") cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap > 1E9") stock_symbols = [row[0] for row in cursor.fetchall()] - - etf_cursor = etf_con.cursor() - etf_cursor.execute("PRAGMA journal_mode = wal") - etf_cursor.execute("SELECT DISTINCT symbol FROM etfs WHERE totalAssets > 5E9") - etf_symbols = [row[0] for row in etf_cursor.fetchall()] - - crypto_cursor = crypto_con.cursor() - crypto_cursor.execute("PRAGMA journal_mode = wal") - crypto_cursor.execute("SELECT DISTINCT symbol FROM cryptos") - crypto_symbols = [row[0] for row in crypto_cursor.fetchall()] - crypto_symbols = convert_symbols(crypto_symbols) #Convert BTCUSD to BTC-USD for yfinance - con.close() - etf_con.close() - crypto_con.close() - total_symbols = stock_symbols + etf_symbols + crypto_symbols + total_symbols = stock_symbols print(f"Total tickers: {len(total_symbols)}") - start_date = datetime(2000, 1, 1).strftime("%Y-%m-%d") + start_date = datetime(2015, 1, 1).strftime("%Y-%m-%d") end_date = datetime.today().strftime("%Y-%m-%d") chunk_size = len(total_symbols) // 70 # Divide the list into N chunks @@ -103,7 +73,7 @@ async def run(): for chunk in chunks: tasks = [] for ticker in tqdm(chunk): - tasks.append(process_symbol(ticker, start_date, end_date, crypto_symbols)) + tasks.append(process_symbol(ticker, start_date, end_date)) await asyncio.gather(*tasks) diff --git a/app/main.py b/app/main.py index 1018b2a..0a95bc9 100755 --- a/app/main.py +++ b/app/main.py @@ -4177,6 +4177,37 @@ async def get_data(api_key: str = Security(get_api_key)): ) +@app.post("/price-analysis") +async def get_data(data:TickerData, api_key: str = Security(get_api_key)): + ticker = data.ticker.upper() + cache_key = f"price-analysis-{ticker}" + cached_result = redis_client.get(cache_key) + if cached_result: + return StreamingResponse( + io.BytesIO(cached_result), + media_type="application/json", + headers={"Content-Encoding": "gzip"} + ) + + try: + with open(f"json/price-analysis/{ticker}.json", 'rb') as file: + res = orjson.loads(file.read()) + except: + res = {} + + data = orjson.dumps(res) + compressed_data = gzip.compress(data) + + redis_client.set(cache_key, compressed_data) + redis_client.expire(cache_key,3600*3600) + + return StreamingResponse( + io.BytesIO(compressed_data), + media_type="application/json", + headers={"Content-Encoding": "gzip"} + ) + + @app.get("/newsletter") async def get_newsletter(): try: diff --git a/app/ml_models/prophet_model.py b/app/ml_models/prophet_model.py index d423f34..4a11a1e 100755 --- a/app/ml_models/prophet_model.py +++ b/app/ml_models/prophet_model.py @@ -6,7 +6,32 @@ from datetime import datetime import yfinance as yf import asyncio from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score -#import matplotlib.pyplot as plt +# import matplotlib.pyplot as plt + + +def get_monthly_historical_data(df): + # Ensure the date column is in datetime format + df['ds'] = pd.to_datetime(df['ds']) + + # Get the last available date in your data + last_date = df['ds'].max() + # Calculate the date one year ago + one_year_ago = last_date - pd.DateOffset(years=1) + + # Filter data for the last year + last_year_df = df[df['ds'] > one_year_ago] + + # Group by year-month and select the last record in each month + monthly_df = last_year_df.groupby(last_year_df['ds'].dt.to_period('M')).apply(lambda x: x.iloc[-1]).reset_index(drop=True) + + # Rename columns to the desired output format + monthly_df = monthly_df[['ds', 'y']].rename(columns={'ds': 'date', 'y': 'close'}) + + # Format the date as a string in YYYY-MM-DD format + monthly_df['date'] = monthly_df['date'].dt.strftime('%Y-%m-%d') + + # Convert to list of dictionaries + return monthly_df.to_dict(orient='records') async def download_data(ticker, start_date, end_date): @@ -15,72 +40,85 @@ async def download_data(ticker, start_date, end_date): df = df.reset_index() df = df[['Date', 'Adj Close']] df = df.rename(columns={"Date": "ds", "Adj Close": "y"}) - if len(df) > 252*2: #At least 2 years of history is necessary - #df['y'] = df['y'].rolling(window=200).mean() - #df = df.dropna() + if len(df) > 252 * 2: # At least 2 years of history is necessary + # df['y'] = df['y'].rolling(window=200).mean() + # df = df.dropna() return df except Exception as e: - print(e) + print(e) class PricePredictor: def __init__(self, predict_ndays=365): - self.predict_ndays = predict_ndays - self.model = Prophet( - interval_width = 0.8, + self.predict_ndays = predict_ndays + self.model = Prophet( + interval_width=0.8, daily_seasonality=True, - yearly_seasonality = True, - ) + yearly_seasonality=True, + ) def run(self, df): - self.model.fit(df) - future = self.model.make_future_dataframe(periods=self.predict_ndays) - forecast = self.model.predict(future) + self.model.fit(df) + future = self.model.make_future_dataframe(periods=self.predict_ndays) + forecast = self.model.predict(future) - # Apply rolling average to smooth the upper bound - rolling_window = 200 - forecast['smoothed_upper'] = round(forecast['yhat_upper'].rolling(window=rolling_window, min_periods=1).mean(),2) - forecast['smoothed_lower'] = round(forecast['yhat_lower'].rolling(window=rolling_window, min_periods=1).mean(),2) - forecast['smoothed_mean'] = round(forecast['yhat'].rolling(window=rolling_window, min_periods=1).mean(),2) + # Apply rolling average to smooth the forecast intervals + rolling_window = 200 + forecast['smoothed_upper'] = forecast['yhat_upper'].rolling(window=rolling_window, min_periods=1).mean().round(2) + forecast['smoothed_lower'] = forecast['yhat_lower'].rolling(window=rolling_window, min_periods=1).mean().round(2) + forecast['smoothed_mean'] = forecast['yhat'].rolling(window=rolling_window, min_periods=1).mean().round(2) - actual_values = df['y'].values - predicted_values = forecast['yhat'].values[:-self.predict_ndays] + # Actual and predicted values for evaluation (optional) + actual_values = df['y'].values + predicted_values = forecast['yhat'].values[:-self.predict_ndays] - rmse = round(np.sqrt(mean_squared_error(actual_values, predicted_values)),2) - mape = round(np.mean(np.abs((actual_values - predicted_values) / actual_values)) * 100) - r2 = round(r2_score(actual_values, predicted_values)*100) + # Extract forecast values for plotting or analysis (if needed) + pred_date_list = forecast['ds'][-1200 - self.predict_ndays:].dt.strftime('%Y-%m-%d').tolist() + upper_list = forecast['smoothed_upper'][-1200 - self.predict_ndays:].tolist() + lower_list = forecast['smoothed_lower'][-1200 - self.predict_ndays:].tolist() + mean_list = forecast['smoothed_mean'][-1200 - self.predict_ndays:].tolist() - print("RMSE:", rmse) - print("MAPE:", mape) - print("R2 Score:", r2) - pred_date_list = forecast['ds'][-1200-self.predict_ndays:].dt.strftime('%Y-%m-%d').tolist() - upper_list = forecast['smoothed_upper'][-1200-self.predict_ndays:].tolist() - lower_list = forecast['smoothed_lower'][-1200-self.predict_ndays:].tolist() - mean_list = forecast['smoothed_mean'][-1200-self.predict_ndays:].tolist() + historical_date_list = df['ds'][-1200:].dt.strftime('%Y-%m-%d').tolist() + historical_price_list = df['y'][-1200:].round(2).tolist() - historical_date_list = df['ds'][-1200:].dt.strftime('%Y-%m-%d').tolist() - historical_price_list = round(df['y'][-1200:],2).tolist() + # Get monthly historical data and round the close value + monthly_historical_data = get_monthly_historical_data(df) + monthly_historical_data = [{**item, 'close': round(item['close'], 2)} for item in monthly_historical_data] - return {'rmse': rmse,'mape': mape,'r2Score':r2, 'historicalPrice': historical_price_list, 'predictionDate': pred_date_list, 'upperBand': upper_list, 'lowerBand': lower_list, 'meanResult': mean_list} + + future_forecast = forecast[forecast['ds'] > df['ds'].max()]['smoothed_mean'] + if not future_forecast.empty: + median_price = round(np.median(future_forecast), 2) + else: + median_price = round(forecast['smoothed_mean'].iloc[-1], 2) + + # Latest actual price from the dataset + latest_price = round(df['y'].iloc[-1], 2) + + return { + 'pastPriceList': monthly_historical_data, + 'avgPriceTarget': mean_list[-1], + 'highPriceTarget': upper_list[-1], + 'lowPriceTarget': lower_list[-1], + 'medianPriceTarget': median_price, + 'latestPrice': latest_price + } -#Test Mode +# Test Mode async def main(): for ticker in ['NVDA']: - start_date = datetime(2000, 1, 1).strftime("%Y-%m-%d") + start_date = datetime(2015, 1, 1).strftime("%Y-%m-%d") end_date = datetime.today().strftime("%Y-%m-%d") df = await download_data(ticker, start_date, end_date) - data = PricePredictor().run(df) - print(data) + if df is not None: + data = PricePredictor().run(df) # Run the main function -#asyncio.run(main()) +# asyncio.run(main()) - - - -# Plotting +# Plotting (optional) ''' fig, ax = plt.subplots(figsize=(10, 6)) @@ -93,4 +131,4 @@ ax.set_title('Forecasted Prices for {}'.format(ticker)) ax.legend() ax.grid(True) plt.show() -''' \ No newline at end of file +'''