update price prediction model

2025-02-17 14:48:22 +01:00 · 2025-02-17 14:48:22 +01:00 · 01e802047a
commit 01e802047a
parent 3282315d7e
3 changed files with 117 additions and 78 deletions
--- a/app/cron_price_analysis.py
+++ b/app/cron_price_analysis.py
@ -11,15 +11,6 @@ import concurrent.futures


 def convert_symbols(symbol_list):
-    """
-    Converts the symbols in the given list from 'BTCUSD' and 'USDTUSD' format to 'BTC-USD' and 'USDT-USD' format.
-    
-    Args:
-        symbol_list (list): A list of strings representing the symbols to be converted.
-    
-    Returns:
-        list: A new list with the symbols converted to the desired format.
-    """
    converted_symbols = []
    for symbol in symbol_list:
        # Determine the base and quote currencies
@ -53,13 +44,10 @@ async def download_data(ticker, start_date, end_date):
    except Exception as e:
        print(e)

-async def process_symbol(ticker, start_date, end_date, crypto_symbols):
+async def process_symbol(ticker, start_date, end_date):
    try:
        df = await download_data(ticker, start_date, end_date)
        data = PricePredictor().run(df)
-
-        if ticker in crypto_symbols:
-            ticker = ticker.replace('-','') #convert back from BTC-USD to BTCUSD
        await save_json(ticker, data)
    
    except Exception as e:
@ -68,34 +56,16 @@ async def process_symbol(ticker, start_date, end_date, crypto_symbols):

 async def run():
    con = sqlite3.connect('stocks.db')
-    etf_con = sqlite3.connect('etf.db')
-    crypto_con = sqlite3.connect('crypto.db')
-
+    
    cursor = con.cursor()
    cursor.execute("PRAGMA journal_mode = wal")
-    #cursor.execute("SELECT DISTINCT symbol FROM stocks")
-    #cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap > 10E9 AND symbol NOT LIKE '%.%'")
    cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap > 1E9")
    stock_symbols = [row[0] for row in cursor.fetchall()]
-
-    etf_cursor = etf_con.cursor()
-    etf_cursor.execute("PRAGMA journal_mode = wal")
-    etf_cursor.execute("SELECT DISTINCT symbol FROM etfs WHERE totalAssets > 5E9")
-    etf_symbols = [row[0] for row in etf_cursor.fetchall()]
-
-    crypto_cursor = crypto_con.cursor()
-    crypto_cursor.execute("PRAGMA journal_mode = wal")
-    crypto_cursor.execute("SELECT DISTINCT symbol FROM cryptos")
-    crypto_symbols = [row[0] for row in crypto_cursor.fetchall()]
-    crypto_symbols = convert_symbols(crypto_symbols) #Convert BTCUSD to BTC-USD for yfinance
-
    con.close()
-    etf_con.close()
-    crypto_con.close()

-    total_symbols = stock_symbols + etf_symbols + crypto_symbols
+    total_symbols = stock_symbols
    print(f"Total tickers: {len(total_symbols)}")
-    start_date = datetime(2000, 1, 1).strftime("%Y-%m-%d")
+    start_date = datetime(2015, 1, 1).strftime("%Y-%m-%d")
    end_date = datetime.today().strftime("%Y-%m-%d")

    chunk_size = len(total_symbols) // 70  # Divide the list into N chunks
@ -103,7 +73,7 @@ async def run():
    for chunk in chunks:
        tasks = []
        for ticker in tqdm(chunk):
-            tasks.append(process_symbol(ticker, start_date, end_date, crypto_symbols))
+            tasks.append(process_symbol(ticker, start_date, end_date))

        await asyncio.gather(*tasks)

--- a/app/main.py
+++ b/app/main.py
@ -4177,6 +4177,37 @@ async def get_data(api_key: str = Security(get_api_key)):
    )


+@app.post("/price-analysis")
+async def get_data(data:TickerData, api_key: str = Security(get_api_key)):
+    ticker = data.ticker.upper()
+    cache_key = f"price-analysis-{ticker}"
+    cached_result = redis_client.get(cache_key)
+    if cached_result:
+        return StreamingResponse(
+            io.BytesIO(cached_result),
+            media_type="application/json",
+            headers={"Content-Encoding": "gzip"}
+        )
+
+    try:
+        with open(f"json/price-analysis/{ticker}.json", 'rb') as file:
+            res = orjson.loads(file.read())
+    except:
+        res = {}
+        
+    data = orjson.dumps(res)
+    compressed_data = gzip.compress(data)
+
+    redis_client.set(cache_key, compressed_data)
+    redis_client.expire(cache_key,3600*3600)
+
+    return StreamingResponse(
+        io.BytesIO(compressed_data),
+        media_type="application/json",
+        headers={"Content-Encoding": "gzip"}
+    )
+
+
@app.get("/newsletter")
 async def get_newsletter():
    try:
--- a/app/ml_models/prophet_model.py
+++ b/app/ml_models/prophet_model.py
@ -6,7 +6,32 @@ from datetime import datetime
 import yfinance as yf
 import asyncio
 from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
-#import matplotlib.pyplot as plt
+# import matplotlib.pyplot as plt
+
+
+def get_monthly_historical_data(df):
+    # Ensure the date column is in datetime format
+    df['ds'] = pd.to_datetime(df['ds'])
+    
+    # Get the last available date in your data
+    last_date = df['ds'].max()
+    # Calculate the date one year ago
+    one_year_ago = last_date - pd.DateOffset(years=1)
+    
+    # Filter data for the last year
+    last_year_df = df[df['ds'] > one_year_ago]
+    
+    # Group by year-month and select the last record in each month
+    monthly_df = last_year_df.groupby(last_year_df['ds'].dt.to_period('M')).apply(lambda x: x.iloc[-1]).reset_index(drop=True)
+    
+    # Rename columns to the desired output format
+    monthly_df = monthly_df[['ds', 'y']].rename(columns={'ds': 'date', 'y': 'close'})
+    
+    # Format the date as a string in YYYY-MM-DD format
+    monthly_df['date'] = monthly_df['date'].dt.strftime('%Y-%m-%d')
+    
+    # Convert to list of dictionaries
+    return monthly_df.to_dict(orient='records')


 async def download_data(ticker, start_date, end_date):
@ -15,72 +40,85 @@ async def download_data(ticker, start_date, end_date):
        df = df.reset_index()
        df = df[['Date', 'Adj Close']]
        df = df.rename(columns={"Date": "ds", "Adj Close": "y"})
-        if len(df) > 252*2: #At least 2 years of history is necessary
-            #df['y'] = df['y'].rolling(window=200).mean()
-            #df = df.dropna()
+        if len(df) > 252 * 2:  # At least 2 years of history is necessary
+            # df['y'] = df['y'].rolling(window=200).mean()
+            # df = df.dropna()
            return df
    except Exception as e:
-    	print(e)
+        print(e)


 class PricePredictor:
    def __init__(self, predict_ndays=365):
-    	self.predict_ndays = predict_ndays
-    	self.model = Prophet(
-            interval_width = 0.8,
+        self.predict_ndays = predict_ndays
+        self.model = Prophet(
+            interval_width=0.8,
            daily_seasonality=True,
-            yearly_seasonality = True,
-      		)
+            yearly_seasonality=True,
+        )

    def run(self, df):
-    	self.model.fit(df)
-    	future = self.model.make_future_dataframe(periods=self.predict_ndays)
-    	forecast = self.model.predict(future)
+        self.model.fit(df)
+        future = self.model.make_future_dataframe(periods=self.predict_ndays)
+        forecast = self.model.predict(future)

-    	# Apply rolling average to smooth the upper bound
-    	rolling_window = 200
-    	forecast['smoothed_upper'] = round(forecast['yhat_upper'].rolling(window=rolling_window, min_periods=1).mean(),2)
-    	forecast['smoothed_lower'] = round(forecast['yhat_lower'].rolling(window=rolling_window, min_periods=1).mean(),2)
-    	forecast['smoothed_mean'] = round(forecast['yhat'].rolling(window=rolling_window, min_periods=1).mean(),2)
+        # Apply rolling average to smooth the forecast intervals
+        rolling_window = 200
+        forecast['smoothed_upper'] = forecast['yhat_upper'].rolling(window=rolling_window, min_periods=1).mean().round(2)
+        forecast['smoothed_lower'] = forecast['yhat_lower'].rolling(window=rolling_window, min_periods=1).mean().round(2)
+        forecast['smoothed_mean']  = forecast['yhat'].rolling(window=rolling_window, min_periods=1).mean().round(2)

-    	actual_values = df['y'].values
-    	predicted_values = forecast['yhat'].values[:-self.predict_ndays]
+        # Actual and predicted values for evaluation (optional)
+        actual_values = df['y'].values
+        predicted_values = forecast['yhat'].values[:-self.predict_ndays]

-    	rmse = round(np.sqrt(mean_squared_error(actual_values, predicted_values)),2)
-    	mape = round(np.mean(np.abs((actual_values - predicted_values) / actual_values)) * 100)
-    	r2 = round(r2_score(actual_values, predicted_values)*100)
+        # Extract forecast values for plotting or analysis (if needed)
+        pred_date_list = forecast['ds'][-1200 - self.predict_ndays:].dt.strftime('%Y-%m-%d').tolist()
+        upper_list    = forecast['smoothed_upper'][-1200 - self.predict_ndays:].tolist()
+        lower_list    = forecast['smoothed_lower'][-1200 - self.predict_ndays:].tolist()
+        mean_list     = forecast['smoothed_mean'][-1200 - self.predict_ndays:].tolist()

-    	print("RMSE:", rmse)
-    	print("MAPE:", mape)
-    	print("R2 Score:", r2)
-    	pred_date_list = forecast['ds'][-1200-self.predict_ndays:].dt.strftime('%Y-%m-%d').tolist()
-    	upper_list = forecast['smoothed_upper'][-1200-self.predict_ndays:].tolist()
-    	lower_list = forecast['smoothed_lower'][-1200-self.predict_ndays:].tolist()
-    	mean_list = forecast['smoothed_mean'][-1200-self.predict_ndays:].tolist()
+        historical_date_list = df['ds'][-1200:].dt.strftime('%Y-%m-%d').tolist()
+        historical_price_list = df['y'][-1200:].round(2).tolist()

-    	historical_date_list = df['ds'][-1200:].dt.strftime('%Y-%m-%d').tolist()
-    	historical_price_list = round(df['y'][-1200:],2).tolist()
+        # Get monthly historical data and round the close value
+        monthly_historical_data = get_monthly_historical_data(df)
+        monthly_historical_data = [{**item, 'close': round(item['close'], 2)} for item in monthly_historical_data]

-    	return {'rmse': rmse,'mape': mape,'r2Score':r2, 'historicalPrice': historical_price_list, 'predictionDate': pred_date_list, 'upperBand': upper_list, 'lowerBand': lower_list, 'meanResult': mean_list}
+       
+        future_forecast = forecast[forecast['ds'] > df['ds'].max()]['smoothed_mean']
+        if not future_forecast.empty:
+            median_price = round(np.median(future_forecast), 2)
+        else:
+            median_price = round(forecast['smoothed_mean'].iloc[-1], 2)
+
+        # Latest actual price from the dataset
+        latest_price = round(df['y'].iloc[-1], 2)
+
+        return {
+            'pastPriceList': monthly_historical_data,
+            'avgPriceTarget': mean_list[-1],
+            'highPriceTarget': upper_list[-1],
+            'lowPriceTarget': lower_list[-1],
+            'medianPriceTarget': median_price,
+            'latestPrice': latest_price
+        }



-#Test Mode
+# Test Mode
 async def main():
    for ticker in ['NVDA']:
-        start_date = datetime(2000, 1, 1).strftime("%Y-%m-%d")
+        start_date = datetime(2015, 1, 1).strftime("%Y-%m-%d")
        end_date = datetime.today().strftime("%Y-%m-%d")
        df = await download_data(ticker, start_date, end_date)
-        data = PricePredictor().run(df)
-        print(data)
+        if df is not None:
+            data = PricePredictor().run(df)

 # Run the main function
-#asyncio.run(main())
+# asyncio.run(main())

-
-
-
-# Plotting
+# Plotting (optional)
 '''
 fig, ax = plt.subplots(figsize=(10, 6))

@ -93,4 +131,4 @@ ax.set_title('Forecasted Prices for {}'.format(ticker))
 ax.legend()
 ax.grid(True)
 plt.show()
-'''
+'''