backend/app/mc.py

import numpy as np
import pandas as pd
from scipy.stats import norm
from datetime import datetime, date, timedelta
import time
import sqlite3
import concurrent.futures
import json
from tqdm import tqdm
import argparse

#source https://medium.com/analytics-vidhya/monte-carlo-simulations-for-predicting-stock-prices-python-a64f53585662

def parse_args():
    parser = argparse.ArgumentParser(description='Process stock or ETF data.')
    parser.add_argument('--db', choices=['stocks', 'etf'], required=True, help='Database name (stocks or etf)')
    parser.add_argument('--table', choices=['stocks', 'etfs'], required=True, help='Table name (stocks or etfs)')
    return parser.parse_args()

class GeometricBrownianMotion:
	def __init__(self, data, pred_ndays):
		#self.start = start
		#self.end = end
		#self.ticker = ticker
		self.data = data
		self.days = pred_ndays
		self.num_sim = 1000

		self.percentile = 0.01


		np.random.seed(42)


	def run(self):

		self.data['date'] = pd.to_datetime(self.data['date'])
		dt = self.days/self.num_sim
		lr = np.log(1+self.data['close'].pct_change())
		u = lr.mean()
		sigma = lr.std()
		drift = u -sigma**2.0 / 2.0
		Z = norm.ppf(np.random.rand(self.days, self.num_sim)) #days, trials
		dr = np.exp(drift *dt + sigma * Z * np.sqrt(dt))

		#Calculating the stock price for every trial
		new_prediction = np.zeros_like(dr)
		new_prediction[0] = self.data['close'].iloc[-1]
		for t in range(1, self.days):
		    new_prediction[t] = new_prediction[t-1]*dr[t]


		#future_dates = pd.DataFrame([self.data['date'].iloc[-1] + timedelta(days=d) for d in range(0, self.days)])
		#future_dates = future_dates.reset_index()
		#future_dates['date'] = future_dates[0]


		#new_prediction=pd.concat([future_dates['Date'], pd.DataFrame(new_prediction)],axis=1)

		new_prediction = pd.DataFrame(new_prediction)

		percentile_price = pd.DataFrame()
		# Compute percentile of (99%,50%,1%) formula (100-1,100-50,100-1)
		#Likelihood that value x does not drop x-y is 99 % in the next d days

		for i in range(len(new_prediction)):
			next_price = new_prediction.iloc[i, :]
			next_price = sorted(next_price, key=int)
			pp = np.percentile(next_price, [1, 50, 100-self.percentile])

			# Concatenate the new data to the existing DataFrame
			df_temp = pd.DataFrame({'min': pp[0], 'mean': pp[1], 'max': pp[2]}, index=[0])
			percentile_price = pd.concat([percentile_price, df_temp], ignore_index=True)

		#percentile_price = pd.concat([future_dates['date'],percentile_price],axis=1)
		#dates_formatted =future_dates['date'].dt.strftime("%Y-%m-%d").tolist()
		dict_price = {
            #'date': dates_formatted,
            'min': percentile_price['min'].tolist()[-1],
            'mean': percentile_price['mean'].tolist()[-1],
            'max': percentile_price['max'].tolist()[-1]
        }

		'''
		fig,ax = plt.subplots()
		ax.plot(self.data['date'],self.data['date'],color='purple')
		ax.plot(percentile_price['date'],percentile_price['brown_mean'],color='black')
		ax.plot(percentile_price['date'],percentile_price['brown_max'],color='green')
		ax.plot(percentile_price['date'],percentile_price['brown_min'],color='red')

		plt.fill_between(percentile_price['date'],percentile_price['brown_max'],percentile_price['brown_mean'],alpha=0.3,color='green')
		#plt.fill_between(percentile_price['date'],percentile_price['brown_mean'],percentile_price['brown_min'],alpha=0.3,color='red')
		plt.xlabel('%s days in the future' % self.days)
		plt.ylabel('Stock price prediction')
		plt.show()
		'''

		#return percentile_price[['date','mean']], percentile_price[['Date','max']], percentile_price[['Date','min']]

		return dict_price


def create_column(con):
    """
    Create the 'pricePrediction' column if it doesn't exist in the db table.
    """
    query_check = f"PRAGMA table_info({table_name})"
    cursor = con.execute(query_check)
    columns = [col[1] for col in cursor.fetchall()]

    if 'pricePrediction' not in columns:
    	print('yellow')
    	query = f"ALTER TABLE {table_name} ADD COLUMN pricePrediction TEXT"
    	con.execute(query)
    	con.commit()

def update_database(pred_dict, symbol, con):
    query = f"UPDATE {table_name} SET pricePrediction = ? WHERE symbol = ?"
    pred_json = json.dumps(pred_dict)  # Convert the pred dictionary to JSON string
    con.execute(query, (pred_json, symbol))
    con.commit()


def process_symbol(ticker):
    try:
        query_template = """
            SELECT
                date, close
            FROM
                "{ticker}"
            WHERE
                date BETWEEN ? AND ?
        """

        query = query_template.format(ticker=ticker)
        df = pd.read_sql_query(query, con, params=(start_date, end_date))
        time_list = [7,30,90,180]

        pred_dict = {}
        try:
        	for time_period in time_list:
        		if time_period == 7:
        			pred_dict['1W'] = GeometricBrownianMotion(df, time_period).run()
        		elif time_period == 30:
        			pred_dict['1M'] = GeometricBrownianMotion(df, time_period).run()
        		elif time_period == 90:
        			pred_dict['3M'] = GeometricBrownianMotion(df, time_period).run()
        		elif time_period == 180:
        			pred_dict['6M'] = GeometricBrownianMotion(df, time_period).run()

        except:
        	pred_dict = {'1W': {'min': 0, 'mean': 0, 'max': 0}, '1M': {'min': 0, 'mean': 0, 'max': 0}, '3M': {'min': 0, 'mean': 0, 'max': 0}, '6M': {'min': 0, 'mean': 0, 'max': 0}}

        create_column(con)
        update_database(pred_dict, ticker, con)

    except:
        print(f"Failed create price prediction for {ticker}")


args = parse_args()
db_name = args.db
table_name = args.table

con = sqlite3.connect(f'backup_db/{db_name}.db')

symbol_query = f"SELECT DISTINCT symbol FROM {table_name}"

symbol_cursor = con.execute(symbol_query)
symbols = [symbol[0] for symbol in symbol_cursor.fetchall()]

start_date = datetime(1970, 1, 1)
end_date = datetime.today()

# Number of concurrent workers
num_processes = 4 # You can adjust this based on your system's capabilities
futures = []

with concurrent.futures.ProcessPoolExecutor(max_workers=num_processes) as executor:
    for symbol in symbols:
        futures.append(executor.submit(process_symbol, symbol))

    # Use tqdm to wrap around the futures for progress tracking
    for future in tqdm(concurrent.futures.as_completed(futures), total=len(symbols), desc="Processing"):
        pass
con.close()


'''
query_template = """
    SELECT
        date, close
    FROM
        {ticker}
    WHERE
        date BETWEEN ? AND ?
"""
ticker = 'AMD'
start_date = datetime(2020,1,1)
end_date = datetime.today()
con = sqlite3.connect('stocks.db')
query = query_template.format(ticker=ticker)
df = pd.read_sql_query(query, con, params=(start_date, end_date))
#Compute the logarithmic returns
GeometricBrownianMotion(df).run()
'''