209 lines
6.7 KiB
Python
Executable File
209 lines
6.7 KiB
Python
Executable File
import numpy as np
|
|
import pandas as pd
|
|
from scipy.stats import norm
|
|
from datetime import datetime, date, timedelta
|
|
import time
|
|
import sqlite3
|
|
import concurrent.futures
|
|
import json
|
|
from tqdm import tqdm
|
|
import argparse
|
|
|
|
#source https://medium.com/analytics-vidhya/monte-carlo-simulations-for-predicting-stock-prices-python-a64f53585662
|
|
|
|
def parse_args():
|
|
parser = argparse.ArgumentParser(description='Process stock or ETF data.')
|
|
parser.add_argument('--db', choices=['stocks', 'etf'], required=True, help='Database name (stocks or etf)')
|
|
parser.add_argument('--table', choices=['stocks', 'etfs'], required=True, help='Table name (stocks or etfs)')
|
|
return parser.parse_args()
|
|
|
|
class GeometricBrownianMotion:
|
|
def __init__(self, data, pred_ndays):
|
|
#self.start = start
|
|
#self.end = end
|
|
#self.ticker = ticker
|
|
self.data = data
|
|
self.days = pred_ndays
|
|
self.num_sim = 1000
|
|
|
|
self.percentile = 0.01
|
|
|
|
|
|
np.random.seed(42)
|
|
|
|
|
|
def run(self):
|
|
|
|
self.data['date'] = pd.to_datetime(self.data['date'])
|
|
dt = self.days/self.num_sim
|
|
lr = np.log(1+self.data['close'].pct_change())
|
|
u = lr.mean()
|
|
sigma = lr.std()
|
|
drift = u -sigma**2.0 / 2.0
|
|
Z = norm.ppf(np.random.rand(self.days, self.num_sim)) #days, trials
|
|
dr = np.exp(drift *dt + sigma * Z * np.sqrt(dt))
|
|
|
|
#Calculating the stock price for every trial
|
|
new_prediction = np.zeros_like(dr)
|
|
new_prediction[0] = self.data['close'].iloc[-1]
|
|
for t in range(1, self.days):
|
|
new_prediction[t] = new_prediction[t-1]*dr[t]
|
|
|
|
|
|
#future_dates = pd.DataFrame([self.data['date'].iloc[-1] + timedelta(days=d) for d in range(0, self.days)])
|
|
#future_dates = future_dates.reset_index()
|
|
#future_dates['date'] = future_dates[0]
|
|
|
|
|
|
#new_prediction=pd.concat([future_dates['Date'], pd.DataFrame(new_prediction)],axis=1)
|
|
|
|
new_prediction = pd.DataFrame(new_prediction)
|
|
|
|
percentile_price = pd.DataFrame()
|
|
# Compute percentile of (99%,50%,1%) formula (100-1,100-50,100-1)
|
|
#Likelihood that value x does not drop x-y is 99 % in the next d days
|
|
|
|
for i in range(len(new_prediction)):
|
|
next_price = new_prediction.iloc[i, :]
|
|
next_price = sorted(next_price, key=int)
|
|
pp = np.percentile(next_price, [1, 50, 100-self.percentile])
|
|
|
|
# Concatenate the new data to the existing DataFrame
|
|
df_temp = pd.DataFrame({'min': pp[0], 'mean': pp[1], 'max': pp[2]}, index=[0])
|
|
percentile_price = pd.concat([percentile_price, df_temp], ignore_index=True)
|
|
|
|
#percentile_price = pd.concat([future_dates['date'],percentile_price],axis=1)
|
|
#dates_formatted =future_dates['date'].dt.strftime("%Y-%m-%d").tolist()
|
|
dict_price = {
|
|
#'date': dates_formatted,
|
|
'min': percentile_price['min'].tolist()[-1],
|
|
'mean': percentile_price['mean'].tolist()[-1],
|
|
'max': percentile_price['max'].tolist()[-1]
|
|
}
|
|
|
|
'''
|
|
fig,ax = plt.subplots()
|
|
ax.plot(self.data['date'],self.data['date'],color='purple')
|
|
ax.plot(percentile_price['date'],percentile_price['brown_mean'],color='black')
|
|
ax.plot(percentile_price['date'],percentile_price['brown_max'],color='green')
|
|
ax.plot(percentile_price['date'],percentile_price['brown_min'],color='red')
|
|
|
|
plt.fill_between(percentile_price['date'],percentile_price['brown_max'],percentile_price['brown_mean'],alpha=0.3,color='green')
|
|
#plt.fill_between(percentile_price['date'],percentile_price['brown_mean'],percentile_price['brown_min'],alpha=0.3,color='red')
|
|
plt.xlabel('%s days in the future' % self.days)
|
|
plt.ylabel('Stock price prediction')
|
|
plt.show()
|
|
'''
|
|
|
|
#return percentile_price[['date','mean']], percentile_price[['Date','max']], percentile_price[['Date','min']]
|
|
|
|
return dict_price
|
|
|
|
|
|
|
|
def create_column(con):
|
|
"""
|
|
Create the 'pricePrediction' column if it doesn't exist in the db table.
|
|
"""
|
|
query_check = f"PRAGMA table_info({table_name})"
|
|
cursor = con.execute(query_check)
|
|
columns = [col[1] for col in cursor.fetchall()]
|
|
|
|
if 'pricePrediction' not in columns:
|
|
print('yellow')
|
|
query = f"ALTER TABLE {table_name} ADD COLUMN pricePrediction TEXT"
|
|
con.execute(query)
|
|
con.commit()
|
|
|
|
def update_database(pred_dict, symbol, con):
|
|
query = f"UPDATE {table_name} SET pricePrediction = ? WHERE symbol = ?"
|
|
pred_json = json.dumps(pred_dict) # Convert the pred dictionary to JSON string
|
|
con.execute(query, (pred_json, symbol))
|
|
con.commit()
|
|
|
|
|
|
|
|
|
|
def process_symbol(ticker):
|
|
try:
|
|
query_template = """
|
|
SELECT
|
|
date, close
|
|
FROM
|
|
"{ticker}"
|
|
WHERE
|
|
date BETWEEN ? AND ?
|
|
"""
|
|
|
|
query = query_template.format(ticker=ticker)
|
|
df = pd.read_sql_query(query, con, params=(start_date, end_date))
|
|
time_list = [7,30,90,180]
|
|
|
|
pred_dict = {}
|
|
try:
|
|
for time_period in time_list:
|
|
if time_period == 7:
|
|
pred_dict['1W'] = GeometricBrownianMotion(df, time_period).run()
|
|
elif time_period == 30:
|
|
pred_dict['1M'] = GeometricBrownianMotion(df, time_period).run()
|
|
elif time_period == 90:
|
|
pred_dict['3M'] = GeometricBrownianMotion(df, time_period).run()
|
|
elif time_period == 180:
|
|
pred_dict['6M'] = GeometricBrownianMotion(df, time_period).run()
|
|
|
|
except:
|
|
pred_dict = {'1W': {'min': 0, 'mean': 0, 'max': 0}, '1M': {'min': 0, 'mean': 0, 'max': 0}, '3M': {'min': 0, 'mean': 0, 'max': 0}, '6M': {'min': 0, 'mean': 0, 'max': 0}}
|
|
|
|
create_column(con)
|
|
update_database(pred_dict, ticker, con)
|
|
|
|
except:
|
|
print(f"Failed create price prediction for {ticker}")
|
|
|
|
|
|
args = parse_args()
|
|
db_name = args.db
|
|
table_name = args.table
|
|
|
|
con = sqlite3.connect(f'backup_db/{db_name}.db')
|
|
|
|
symbol_query = f"SELECT DISTINCT symbol FROM {table_name}"
|
|
|
|
symbol_cursor = con.execute(symbol_query)
|
|
symbols = [symbol[0] for symbol in symbol_cursor.fetchall()]
|
|
|
|
start_date = datetime(1970, 1, 1)
|
|
end_date = datetime.today()
|
|
|
|
# Number of concurrent workers
|
|
num_processes = 4 # You can adjust this based on your system's capabilities
|
|
futures = []
|
|
|
|
with concurrent.futures.ProcessPoolExecutor(max_workers=num_processes) as executor:
|
|
for symbol in symbols:
|
|
futures.append(executor.submit(process_symbol, symbol))
|
|
|
|
# Use tqdm to wrap around the futures for progress tracking
|
|
for future in tqdm(concurrent.futures.as_completed(futures), total=len(symbols), desc="Processing"):
|
|
pass
|
|
con.close()
|
|
|
|
|
|
'''
|
|
query_template = """
|
|
SELECT
|
|
date, close
|
|
FROM
|
|
{ticker}
|
|
WHERE
|
|
date BETWEEN ? AND ?
|
|
"""
|
|
ticker = 'AMD'
|
|
start_date = datetime(2020,1,1)
|
|
end_date = datetime.today()
|
|
con = sqlite3.connect('stocks.db')
|
|
query = query_template.format(ticker=ticker)
|
|
df = pd.read_sql_query(query, con, params=(start_date, end_date))
|
|
#Compute the logarithmic returns
|
|
GeometricBrownianMotion(df).run()
|
|
''' |