update price prediction model
This commit is contained in:
parent
3282315d7e
commit
01e802047a
@ -11,15 +11,6 @@ import concurrent.futures
|
|||||||
|
|
||||||
|
|
||||||
def convert_symbols(symbol_list):
|
def convert_symbols(symbol_list):
|
||||||
"""
|
|
||||||
Converts the symbols in the given list from 'BTCUSD' and 'USDTUSD' format to 'BTC-USD' and 'USDT-USD' format.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
symbol_list (list): A list of strings representing the symbols to be converted.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
list: A new list with the symbols converted to the desired format.
|
|
||||||
"""
|
|
||||||
converted_symbols = []
|
converted_symbols = []
|
||||||
for symbol in symbol_list:
|
for symbol in symbol_list:
|
||||||
# Determine the base and quote currencies
|
# Determine the base and quote currencies
|
||||||
@ -53,13 +44,10 @@ async def download_data(ticker, start_date, end_date):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print(e)
|
||||||
|
|
||||||
async def process_symbol(ticker, start_date, end_date, crypto_symbols):
|
async def process_symbol(ticker, start_date, end_date):
|
||||||
try:
|
try:
|
||||||
df = await download_data(ticker, start_date, end_date)
|
df = await download_data(ticker, start_date, end_date)
|
||||||
data = PricePredictor().run(df)
|
data = PricePredictor().run(df)
|
||||||
|
|
||||||
if ticker in crypto_symbols:
|
|
||||||
ticker = ticker.replace('-','') #convert back from BTC-USD to BTCUSD
|
|
||||||
await save_json(ticker, data)
|
await save_json(ticker, data)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -68,34 +56,16 @@ async def process_symbol(ticker, start_date, end_date, crypto_symbols):
|
|||||||
|
|
||||||
async def run():
|
async def run():
|
||||||
con = sqlite3.connect('stocks.db')
|
con = sqlite3.connect('stocks.db')
|
||||||
etf_con = sqlite3.connect('etf.db')
|
|
||||||
crypto_con = sqlite3.connect('crypto.db')
|
|
||||||
|
|
||||||
cursor = con.cursor()
|
cursor = con.cursor()
|
||||||
cursor.execute("PRAGMA journal_mode = wal")
|
cursor.execute("PRAGMA journal_mode = wal")
|
||||||
#cursor.execute("SELECT DISTINCT symbol FROM stocks")
|
|
||||||
#cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap > 10E9 AND symbol NOT LIKE '%.%'")
|
|
||||||
cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap > 1E9")
|
cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap > 1E9")
|
||||||
stock_symbols = [row[0] for row in cursor.fetchall()]
|
stock_symbols = [row[0] for row in cursor.fetchall()]
|
||||||
|
|
||||||
etf_cursor = etf_con.cursor()
|
|
||||||
etf_cursor.execute("PRAGMA journal_mode = wal")
|
|
||||||
etf_cursor.execute("SELECT DISTINCT symbol FROM etfs WHERE totalAssets > 5E9")
|
|
||||||
etf_symbols = [row[0] for row in etf_cursor.fetchall()]
|
|
||||||
|
|
||||||
crypto_cursor = crypto_con.cursor()
|
|
||||||
crypto_cursor.execute("PRAGMA journal_mode = wal")
|
|
||||||
crypto_cursor.execute("SELECT DISTINCT symbol FROM cryptos")
|
|
||||||
crypto_symbols = [row[0] for row in crypto_cursor.fetchall()]
|
|
||||||
crypto_symbols = convert_symbols(crypto_symbols) #Convert BTCUSD to BTC-USD for yfinance
|
|
||||||
|
|
||||||
con.close()
|
con.close()
|
||||||
etf_con.close()
|
|
||||||
crypto_con.close()
|
|
||||||
|
|
||||||
total_symbols = stock_symbols + etf_symbols + crypto_symbols
|
total_symbols = stock_symbols
|
||||||
print(f"Total tickers: {len(total_symbols)}")
|
print(f"Total tickers: {len(total_symbols)}")
|
||||||
start_date = datetime(2000, 1, 1).strftime("%Y-%m-%d")
|
start_date = datetime(2015, 1, 1).strftime("%Y-%m-%d")
|
||||||
end_date = datetime.today().strftime("%Y-%m-%d")
|
end_date = datetime.today().strftime("%Y-%m-%d")
|
||||||
|
|
||||||
chunk_size = len(total_symbols) // 70 # Divide the list into N chunks
|
chunk_size = len(total_symbols) // 70 # Divide the list into N chunks
|
||||||
@ -103,7 +73,7 @@ async def run():
|
|||||||
for chunk in chunks:
|
for chunk in chunks:
|
||||||
tasks = []
|
tasks = []
|
||||||
for ticker in tqdm(chunk):
|
for ticker in tqdm(chunk):
|
||||||
tasks.append(process_symbol(ticker, start_date, end_date, crypto_symbols))
|
tasks.append(process_symbol(ticker, start_date, end_date))
|
||||||
|
|
||||||
await asyncio.gather(*tasks)
|
await asyncio.gather(*tasks)
|
||||||
|
|
||||||
|
|||||||
31
app/main.py
31
app/main.py
@ -4177,6 +4177,37 @@ async def get_data(api_key: str = Security(get_api_key)):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/price-analysis")
|
||||||
|
async def get_data(data:TickerData, api_key: str = Security(get_api_key)):
|
||||||
|
ticker = data.ticker.upper()
|
||||||
|
cache_key = f"price-analysis-{ticker}"
|
||||||
|
cached_result = redis_client.get(cache_key)
|
||||||
|
if cached_result:
|
||||||
|
return StreamingResponse(
|
||||||
|
io.BytesIO(cached_result),
|
||||||
|
media_type="application/json",
|
||||||
|
headers={"Content-Encoding": "gzip"}
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(f"json/price-analysis/{ticker}.json", 'rb') as file:
|
||||||
|
res = orjson.loads(file.read())
|
||||||
|
except:
|
||||||
|
res = {}
|
||||||
|
|
||||||
|
data = orjson.dumps(res)
|
||||||
|
compressed_data = gzip.compress(data)
|
||||||
|
|
||||||
|
redis_client.set(cache_key, compressed_data)
|
||||||
|
redis_client.expire(cache_key,3600*3600)
|
||||||
|
|
||||||
|
return StreamingResponse(
|
||||||
|
io.BytesIO(compressed_data),
|
||||||
|
media_type="application/json",
|
||||||
|
headers={"Content-Encoding": "gzip"}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@app.get("/newsletter")
|
@app.get("/newsletter")
|
||||||
async def get_newsletter():
|
async def get_newsletter():
|
||||||
try:
|
try:
|
||||||
|
|||||||
@ -6,7 +6,32 @@ from datetime import datetime
|
|||||||
import yfinance as yf
|
import yfinance as yf
|
||||||
import asyncio
|
import asyncio
|
||||||
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
|
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
|
||||||
#import matplotlib.pyplot as plt
|
# import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
|
||||||
|
def get_monthly_historical_data(df):
|
||||||
|
# Ensure the date column is in datetime format
|
||||||
|
df['ds'] = pd.to_datetime(df['ds'])
|
||||||
|
|
||||||
|
# Get the last available date in your data
|
||||||
|
last_date = df['ds'].max()
|
||||||
|
# Calculate the date one year ago
|
||||||
|
one_year_ago = last_date - pd.DateOffset(years=1)
|
||||||
|
|
||||||
|
# Filter data for the last year
|
||||||
|
last_year_df = df[df['ds'] > one_year_ago]
|
||||||
|
|
||||||
|
# Group by year-month and select the last record in each month
|
||||||
|
monthly_df = last_year_df.groupby(last_year_df['ds'].dt.to_period('M')).apply(lambda x: x.iloc[-1]).reset_index(drop=True)
|
||||||
|
|
||||||
|
# Rename columns to the desired output format
|
||||||
|
monthly_df = monthly_df[['ds', 'y']].rename(columns={'ds': 'date', 'y': 'close'})
|
||||||
|
|
||||||
|
# Format the date as a string in YYYY-MM-DD format
|
||||||
|
monthly_df['date'] = monthly_df['date'].dt.strftime('%Y-%m-%d')
|
||||||
|
|
||||||
|
# Convert to list of dictionaries
|
||||||
|
return monthly_df.to_dict(orient='records')
|
||||||
|
|
||||||
|
|
||||||
async def download_data(ticker, start_date, end_date):
|
async def download_data(ticker, start_date, end_date):
|
||||||
@ -15,9 +40,9 @@ async def download_data(ticker, start_date, end_date):
|
|||||||
df = df.reset_index()
|
df = df.reset_index()
|
||||||
df = df[['Date', 'Adj Close']]
|
df = df[['Date', 'Adj Close']]
|
||||||
df = df.rename(columns={"Date": "ds", "Adj Close": "y"})
|
df = df.rename(columns={"Date": "ds", "Adj Close": "y"})
|
||||||
if len(df) > 252*2: #At least 2 years of history is necessary
|
if len(df) > 252 * 2: # At least 2 years of history is necessary
|
||||||
#df['y'] = df['y'].rolling(window=200).mean()
|
# df['y'] = df['y'].rolling(window=200).mean()
|
||||||
#df = df.dropna()
|
# df = df.dropna()
|
||||||
return df
|
return df
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print(e)
|
||||||
@ -27,9 +52,9 @@ class PricePredictor:
|
|||||||
def __init__(self, predict_ndays=365):
|
def __init__(self, predict_ndays=365):
|
||||||
self.predict_ndays = predict_ndays
|
self.predict_ndays = predict_ndays
|
||||||
self.model = Prophet(
|
self.model = Prophet(
|
||||||
interval_width = 0.8,
|
interval_width=0.8,
|
||||||
daily_seasonality=True,
|
daily_seasonality=True,
|
||||||
yearly_seasonality = True,
|
yearly_seasonality=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
def run(self, df):
|
def run(self, df):
|
||||||
@ -37,50 +62,63 @@ class PricePredictor:
|
|||||||
future = self.model.make_future_dataframe(periods=self.predict_ndays)
|
future = self.model.make_future_dataframe(periods=self.predict_ndays)
|
||||||
forecast = self.model.predict(future)
|
forecast = self.model.predict(future)
|
||||||
|
|
||||||
# Apply rolling average to smooth the upper bound
|
# Apply rolling average to smooth the forecast intervals
|
||||||
rolling_window = 200
|
rolling_window = 200
|
||||||
forecast['smoothed_upper'] = round(forecast['yhat_upper'].rolling(window=rolling_window, min_periods=1).mean(),2)
|
forecast['smoothed_upper'] = forecast['yhat_upper'].rolling(window=rolling_window, min_periods=1).mean().round(2)
|
||||||
forecast['smoothed_lower'] = round(forecast['yhat_lower'].rolling(window=rolling_window, min_periods=1).mean(),2)
|
forecast['smoothed_lower'] = forecast['yhat_lower'].rolling(window=rolling_window, min_periods=1).mean().round(2)
|
||||||
forecast['smoothed_mean'] = round(forecast['yhat'].rolling(window=rolling_window, min_periods=1).mean(),2)
|
forecast['smoothed_mean'] = forecast['yhat'].rolling(window=rolling_window, min_periods=1).mean().round(2)
|
||||||
|
|
||||||
|
# Actual and predicted values for evaluation (optional)
|
||||||
actual_values = df['y'].values
|
actual_values = df['y'].values
|
||||||
predicted_values = forecast['yhat'].values[:-self.predict_ndays]
|
predicted_values = forecast['yhat'].values[:-self.predict_ndays]
|
||||||
|
|
||||||
rmse = round(np.sqrt(mean_squared_error(actual_values, predicted_values)),2)
|
# Extract forecast values for plotting or analysis (if needed)
|
||||||
mape = round(np.mean(np.abs((actual_values - predicted_values) / actual_values)) * 100)
|
pred_date_list = forecast['ds'][-1200 - self.predict_ndays:].dt.strftime('%Y-%m-%d').tolist()
|
||||||
r2 = round(r2_score(actual_values, predicted_values)*100)
|
upper_list = forecast['smoothed_upper'][-1200 - self.predict_ndays:].tolist()
|
||||||
|
lower_list = forecast['smoothed_lower'][-1200 - self.predict_ndays:].tolist()
|
||||||
print("RMSE:", rmse)
|
mean_list = forecast['smoothed_mean'][-1200 - self.predict_ndays:].tolist()
|
||||||
print("MAPE:", mape)
|
|
||||||
print("R2 Score:", r2)
|
|
||||||
pred_date_list = forecast['ds'][-1200-self.predict_ndays:].dt.strftime('%Y-%m-%d').tolist()
|
|
||||||
upper_list = forecast['smoothed_upper'][-1200-self.predict_ndays:].tolist()
|
|
||||||
lower_list = forecast['smoothed_lower'][-1200-self.predict_ndays:].tolist()
|
|
||||||
mean_list = forecast['smoothed_mean'][-1200-self.predict_ndays:].tolist()
|
|
||||||
|
|
||||||
historical_date_list = df['ds'][-1200:].dt.strftime('%Y-%m-%d').tolist()
|
historical_date_list = df['ds'][-1200:].dt.strftime('%Y-%m-%d').tolist()
|
||||||
historical_price_list = round(df['y'][-1200:],2).tolist()
|
historical_price_list = df['y'][-1200:].round(2).tolist()
|
||||||
|
|
||||||
return {'rmse': rmse,'mape': mape,'r2Score':r2, 'historicalPrice': historical_price_list, 'predictionDate': pred_date_list, 'upperBand': upper_list, 'lowerBand': lower_list, 'meanResult': mean_list}
|
# Get monthly historical data and round the close value
|
||||||
|
monthly_historical_data = get_monthly_historical_data(df)
|
||||||
|
monthly_historical_data = [{**item, 'close': round(item['close'], 2)} for item in monthly_historical_data]
|
||||||
|
|
||||||
|
|
||||||
|
future_forecast = forecast[forecast['ds'] > df['ds'].max()]['smoothed_mean']
|
||||||
|
if not future_forecast.empty:
|
||||||
|
median_price = round(np.median(future_forecast), 2)
|
||||||
|
else:
|
||||||
|
median_price = round(forecast['smoothed_mean'].iloc[-1], 2)
|
||||||
|
|
||||||
|
# Latest actual price from the dataset
|
||||||
|
latest_price = round(df['y'].iloc[-1], 2)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'pastPriceList': monthly_historical_data,
|
||||||
|
'avgPriceTarget': mean_list[-1],
|
||||||
|
'highPriceTarget': upper_list[-1],
|
||||||
|
'lowPriceTarget': lower_list[-1],
|
||||||
|
'medianPriceTarget': median_price,
|
||||||
|
'latestPrice': latest_price
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#Test Mode
|
# Test Mode
|
||||||
async def main():
|
async def main():
|
||||||
for ticker in ['NVDA']:
|
for ticker in ['NVDA']:
|
||||||
start_date = datetime(2000, 1, 1).strftime("%Y-%m-%d")
|
start_date = datetime(2015, 1, 1).strftime("%Y-%m-%d")
|
||||||
end_date = datetime.today().strftime("%Y-%m-%d")
|
end_date = datetime.today().strftime("%Y-%m-%d")
|
||||||
df = await download_data(ticker, start_date, end_date)
|
df = await download_data(ticker, start_date, end_date)
|
||||||
|
if df is not None:
|
||||||
data = PricePredictor().run(df)
|
data = PricePredictor().run(df)
|
||||||
print(data)
|
|
||||||
|
|
||||||
# Run the main function
|
# Run the main function
|
||||||
#asyncio.run(main())
|
# asyncio.run(main())
|
||||||
|
|
||||||
|
# Plotting (optional)
|
||||||
|
|
||||||
|
|
||||||
# Plotting
|
|
||||||
'''
|
'''
|
||||||
fig, ax = plt.subplots(figsize=(10, 6))
|
fig, ax = plt.subplots(figsize=(10, 6))
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user