backend/app/mc.py
2024-05-26 22:28:08 +02:00

209 lines
6.7 KiB
Python
Executable File

import numpy as np
import pandas as pd
from scipy.stats import norm
from datetime import datetime, date, timedelta
import time
import sqlite3
import concurrent.futures
import json
from tqdm import tqdm
import argparse
#source https://medium.com/analytics-vidhya/monte-carlo-simulations-for-predicting-stock-prices-python-a64f53585662
def parse_args():
parser = argparse.ArgumentParser(description='Process stock or ETF data.')
parser.add_argument('--db', choices=['stocks', 'etf'], required=True, help='Database name (stocks or etf)')
parser.add_argument('--table', choices=['stocks', 'etfs'], required=True, help='Table name (stocks or etfs)')
return parser.parse_args()
class GeometricBrownianMotion:
def __init__(self, data, pred_ndays):
#self.start = start
#self.end = end
#self.ticker = ticker
self.data = data
self.days = pred_ndays
self.num_sim = 1000
self.percentile = 0.01
np.random.seed(42)
def run(self):
self.data['date'] = pd.to_datetime(self.data['date'])
dt = self.days/self.num_sim
lr = np.log(1+self.data['close'].pct_change())
u = lr.mean()
sigma = lr.std()
drift = u -sigma**2.0 / 2.0
Z = norm.ppf(np.random.rand(self.days, self.num_sim)) #days, trials
dr = np.exp(drift *dt + sigma * Z * np.sqrt(dt))
#Calculating the stock price for every trial
new_prediction = np.zeros_like(dr)
new_prediction[0] = self.data['close'].iloc[-1]
for t in range(1, self.days):
new_prediction[t] = new_prediction[t-1]*dr[t]
#future_dates = pd.DataFrame([self.data['date'].iloc[-1] + timedelta(days=d) for d in range(0, self.days)])
#future_dates = future_dates.reset_index()
#future_dates['date'] = future_dates[0]
#new_prediction=pd.concat([future_dates['Date'], pd.DataFrame(new_prediction)],axis=1)
new_prediction = pd.DataFrame(new_prediction)
percentile_price = pd.DataFrame()
# Compute percentile of (99%,50%,1%) formula (100-1,100-50,100-1)
#Likelihood that value x does not drop x-y is 99 % in the next d days
for i in range(len(new_prediction)):
next_price = new_prediction.iloc[i, :]
next_price = sorted(next_price, key=int)
pp = np.percentile(next_price, [1, 50, 100-self.percentile])
# Concatenate the new data to the existing DataFrame
df_temp = pd.DataFrame({'min': pp[0], 'mean': pp[1], 'max': pp[2]}, index=[0])
percentile_price = pd.concat([percentile_price, df_temp], ignore_index=True)
#percentile_price = pd.concat([future_dates['date'],percentile_price],axis=1)
#dates_formatted =future_dates['date'].dt.strftime("%Y-%m-%d").tolist()
dict_price = {
#'date': dates_formatted,
'min': percentile_price['min'].tolist()[-1],
'mean': percentile_price['mean'].tolist()[-1],
'max': percentile_price['max'].tolist()[-1]
}
'''
fig,ax = plt.subplots()
ax.plot(self.data['date'],self.data['date'],color='purple')
ax.plot(percentile_price['date'],percentile_price['brown_mean'],color='black')
ax.plot(percentile_price['date'],percentile_price['brown_max'],color='green')
ax.plot(percentile_price['date'],percentile_price['brown_min'],color='red')
plt.fill_between(percentile_price['date'],percentile_price['brown_max'],percentile_price['brown_mean'],alpha=0.3,color='green')
#plt.fill_between(percentile_price['date'],percentile_price['brown_mean'],percentile_price['brown_min'],alpha=0.3,color='red')
plt.xlabel('%s days in the future' % self.days)
plt.ylabel('Stock price prediction')
plt.show()
'''
#return percentile_price[['date','mean']], percentile_price[['Date','max']], percentile_price[['Date','min']]
return dict_price
def create_column(con):
"""
Create the 'pricePrediction' column if it doesn't exist in the db table.
"""
query_check = f"PRAGMA table_info({table_name})"
cursor = con.execute(query_check)
columns = [col[1] for col in cursor.fetchall()]
if 'pricePrediction' not in columns:
print('yellow')
query = f"ALTER TABLE {table_name} ADD COLUMN pricePrediction TEXT"
con.execute(query)
con.commit()
def update_database(pred_dict, symbol, con):
query = f"UPDATE {table_name} SET pricePrediction = ? WHERE symbol = ?"
pred_json = json.dumps(pred_dict) # Convert the pred dictionary to JSON string
con.execute(query, (pred_json, symbol))
con.commit()
def process_symbol(ticker):
try:
query_template = """
SELECT
date, close
FROM
"{ticker}"
WHERE
date BETWEEN ? AND ?
"""
query = query_template.format(ticker=ticker)
df = pd.read_sql_query(query, con, params=(start_date, end_date))
time_list = [7,30,90,180]
pred_dict = {}
try:
for time_period in time_list:
if time_period == 7:
pred_dict['1W'] = GeometricBrownianMotion(df, time_period).run()
elif time_period == 30:
pred_dict['1M'] = GeometricBrownianMotion(df, time_period).run()
elif time_period == 90:
pred_dict['3M'] = GeometricBrownianMotion(df, time_period).run()
elif time_period == 180:
pred_dict['6M'] = GeometricBrownianMotion(df, time_period).run()
except:
pred_dict = {'1W': {'min': 0, 'mean': 0, 'max': 0}, '1M': {'min': 0, 'mean': 0, 'max': 0}, '3M': {'min': 0, 'mean': 0, 'max': 0}, '6M': {'min': 0, 'mean': 0, 'max': 0}}
create_column(con)
update_database(pred_dict, ticker, con)
except:
print(f"Failed create price prediction for {ticker}")
args = parse_args()
db_name = args.db
table_name = args.table
con = sqlite3.connect(f'backup_db/{db_name}.db')
symbol_query = f"SELECT DISTINCT symbol FROM {table_name}"
symbol_cursor = con.execute(symbol_query)
symbols = [symbol[0] for symbol in symbol_cursor.fetchall()]
start_date = datetime(1970, 1, 1)
end_date = datetime.today()
# Number of concurrent workers
num_processes = 4 # You can adjust this based on your system's capabilities
futures = []
with concurrent.futures.ProcessPoolExecutor(max_workers=num_processes) as executor:
for symbol in symbols:
futures.append(executor.submit(process_symbol, symbol))
# Use tqdm to wrap around the futures for progress tracking
for future in tqdm(concurrent.futures.as_completed(futures), total=len(symbols), desc="Processing"):
pass
con.close()
'''
query_template = """
SELECT
date, close
FROM
{ticker}
WHERE
date BETWEEN ? AND ?
"""
ticker = 'AMD'
start_date = datetime(2020,1,1)
end_date = datetime.today()
con = sqlite3.connect('stocks.db')
query = query_template.format(ticker=ticker)
df = pd.read_sql_query(query, con, params=(start_date, end_date))
#Compute the logarithmic returns
GeometricBrownianMotion(df).run()
'''