bugfixing stocks screener
This commit is contained in:
parent
63cddb5caf
commit
8872b5d1c7
@ -38,11 +38,24 @@ def calculate_fdi(high, low, close, window=30):
|
||||
return (2 - n1) * 100
|
||||
|
||||
|
||||
def hurst_exponent(ts, max_lag=100):
|
||||
lags = range(2, max_lag)
|
||||
tau = [np.sqrt(np.std(np.subtract(ts[lag:], ts[:-lag]))) for lag in lags]
|
||||
poly = np.polyfit(np.log(lags), np.log(tau), 1)
|
||||
return poly[0] * 2.0
|
||||
def find_top_correlated_features(df, target_column, exclude_columns, top_n=10):
|
||||
# Ensure the target column is not in the exclude list
|
||||
exclude_columns = [col for col in exclude_columns if col != target_column]
|
||||
|
||||
# Select columns to consider for correlation
|
||||
columns_to_consider = [col for col in df.columns if col not in exclude_columns + [target_column]]
|
||||
|
||||
# Calculate the correlation matrix
|
||||
correlation_matrix = df[columns_to_consider + [target_column]].corr()
|
||||
|
||||
# Get correlations with the target column, excluding the target column itself
|
||||
target_correlations = correlation_matrix[target_column].drop(target_column)
|
||||
|
||||
# Sort by absolute correlation value and select top N
|
||||
top_correlated = target_correlations.abs().sort_values(ascending=False).head(top_n)
|
||||
|
||||
return top_correlated
|
||||
|
||||
|
||||
async def download_data(ticker, con, start_date, end_date):
|
||||
try:
|
||||
@ -53,9 +66,9 @@ async def download_data(ticker, con, start_date, end_date):
|
||||
#f"json/financial-statements/cash-flow-statement/quarter/{ticker}.json",
|
||||
#f"json/financial-statements/income-statement/quarter/{ticker}.json",
|
||||
#f"json/financial-statements/balance-sheet-statement/quarter/{ticker}.json",
|
||||
#f"json/financial-statements/income-statement-growth/quarter/{ticker}.json",
|
||||
#f"json/financial-statements/balance-sheet-statement-growth/quarter/{ticker}.json",
|
||||
#f"json/financial-statements/cash-flow-statement-growth/quarter/{ticker}.json",
|
||||
f"json/financial-statements/income-statement-growth/quarter/{ticker}.json",
|
||||
f"json/financial-statements/balance-sheet-statement-growth/quarter/{ticker}.json",
|
||||
f"json/financial-statements/cash-flow-statement-growth/quarter/{ticker}.json",
|
||||
#f"json/financial-statements/key-metrics/quarter/{ticker}.json",
|
||||
#f"json/financial-statements/owner-earnings/quarter/{ticker}.json",
|
||||
]
|
||||
@ -90,26 +103,23 @@ async def download_data(ticker, con, start_date, end_date):
|
||||
|
||||
balance = await load_json_from_file(statements[3])
|
||||
balance = await filter_data(balance, ignore_keys)
|
||||
|
||||
income_growth = await load_json_from_file(statements[4])
|
||||
'''
|
||||
income_growth = await load_json_from_file(statements[2])
|
||||
income_growth = await filter_data(income_growth, ignore_keys)
|
||||
|
||||
balance_growth = await load_json_from_file(statements[5])
|
||||
balance_growth = await load_json_from_file(statements[3])
|
||||
balance_growth = await filter_data(balance_growth, ignore_keys)
|
||||
|
||||
|
||||
cashflow_growth = await load_json_from_file(statements[6])
|
||||
cashflow_growth = await load_json_from_file(statements[4])
|
||||
cashflow_growth = await filter_data(cashflow_growth, ignore_keys)
|
||||
|
||||
owner_earnings = await load_json_from_file(statements[7])
|
||||
owner_earnings = await filter_data(owner_earnings, ignore_keys)
|
||||
'''
|
||||
|
||||
# Combine all the data
|
||||
combined_data = defaultdict(dict)
|
||||
|
||||
# Merge the data based on 'date'
|
||||
for entries in zip(ratios, key_metrics):
|
||||
for entries in zip(ratios, key_metrics, income_growth, balance_growth, cashflow_growth):
|
||||
for entry in entries:
|
||||
date = entry['date']
|
||||
for key, value in entry.items():
|
||||
@ -117,8 +127,6 @@ async def download_data(ticker, con, start_date, end_date):
|
||||
combined_data[date][key] = value
|
||||
|
||||
combined_data = list(combined_data.values())
|
||||
#Generate more features
|
||||
#combined_data = calculate_combinations(combined_data)
|
||||
|
||||
# Download historical stock data using yfinance
|
||||
df = yf.download(ticker, start=start_date, end=end_date, interval="1d").reset_index()
|
||||
@ -386,11 +394,13 @@ async def train_process(tickers, con):
|
||||
df_train = pd.concat(train_list, ignore_index=True)
|
||||
df_test = pd.concat(test_list, ignore_index=True)
|
||||
|
||||
|
||||
|
||||
best_features = [col for col in df_train.columns if col not in ['date','price','Target']]
|
||||
|
||||
df_train = df_train.sample(frac=1).reset_index(drop=True) #df_train.reset_index(drop=True)
|
||||
print(df_train)
|
||||
top_correlated = find_top_correlated_features(df_train, 'Target', ['date', 'price'])
|
||||
print(top_correlated)
|
||||
#print(df_train)
|
||||
print('======Train Set Datapoints======')
|
||||
print(len(df_train))
|
||||
|
||||
@ -405,7 +415,7 @@ async def train_process(tickers, con):
|
||||
|
||||
async def run():
|
||||
|
||||
train_mode = False
|
||||
train_mode = True
|
||||
con = sqlite3.connect('stocks.db')
|
||||
cursor = con.cursor()
|
||||
cursor.execute("PRAGMA journal_mode = wal")
|
||||
@ -413,7 +423,7 @@ async def run():
|
||||
if train_mode:
|
||||
#Train first model
|
||||
cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 300E9 AND symbol NOT LIKE '%.%'")
|
||||
stock_symbols = [row[0] for row in cursor.fetchall()]
|
||||
stock_symbols = ['AAPL','AWR','TSLA','MSFT'] #[row[0] for row in cursor.fetchall()]
|
||||
print('Number of Stocks')
|
||||
print(len(stock_symbols))
|
||||
await train_process(stock_symbols, con)
|
||||
|
||||
Binary file not shown.
@ -16,7 +16,7 @@ from sklearn.feature_selection import SelectKBest, f_classif
|
||||
from tensorflow.keras.backend import clear_session
|
||||
from keras import regularizers
|
||||
from keras.layers import Layer
|
||||
|
||||
from tensorflow.keras import backend as K
|
||||
|
||||
from tqdm import tqdm
|
||||
from collections import defaultdict
|
||||
@ -26,7 +26,31 @@ import aiofiles
|
||||
import pickle
|
||||
import time
|
||||
|
||||
# Based on the paper: https://arxiv.org/pdf/1603.00751
|
||||
class SelfAttention(Layer):
|
||||
def __init__(self, **kwargs):
|
||||
super(SelfAttention, self).__init__(**kwargs)
|
||||
|
||||
def build(self, input_shape):
|
||||
self.W = self.add_weight(name='attention_weight', shape=(input_shape[-1], 1),
|
||||
initializer='random_normal', trainable=True)
|
||||
super(SelfAttention, self).build(input_shape)
|
||||
|
||||
def call(self, x):
|
||||
# Alignment scores. Pass them through tanh function
|
||||
e = K.tanh(K.dot(x, self.W))
|
||||
# Remove dimension of size 1
|
||||
e = K.squeeze(e, axis=-1)
|
||||
# Compute the weights
|
||||
alpha = K.softmax(e)
|
||||
# Reshape to tensor of same shape as x for multiplication
|
||||
alpha = K.expand_dims(alpha, axis=-1)
|
||||
# Compute the context vector
|
||||
context = x * alpha
|
||||
context = K.sum(context, axis=1)
|
||||
return context, alpha
|
||||
|
||||
def compute_output_shape(self, input_shape):
|
||||
return (input_shape[0], input_shape[-1]), (input_shape[0], input_shape[1])
|
||||
|
||||
|
||||
class ScorePredictor:
|
||||
@ -41,28 +65,30 @@ class ScorePredictor:
|
||||
inputs = Input(shape=(139,))
|
||||
|
||||
# First dense layer
|
||||
x = Dense(128, activation='leaky_relu')(inputs)
|
||||
x = Dropout(0.5)(x)
|
||||
x = Dense(128, activation='elu')(inputs)
|
||||
x = Dropout(0.2)(x)
|
||||
x = BatchNormalization()(x)
|
||||
|
||||
# Additional dense layers
|
||||
for units in [64,32]:
|
||||
x = Dense(units, activation='leaky_relu')(x)
|
||||
x = Dropout(0.3)(x)
|
||||
x = Dense(units, activation='elu')(x)
|
||||
x = Dropout(0.2)(x)
|
||||
x = BatchNormalization()(x)
|
||||
|
||||
# Reshape for attention mechanism
|
||||
x = Reshape((32, 1))(x)
|
||||
|
||||
# Attention mechanism
|
||||
attention = Dense(32, activation='leaky_relu')(x)
|
||||
attention = Dense(1, activation='softmax')(attention)
|
||||
#attention = Dense(32, activation='elu')(x)
|
||||
#attention = Dense(1, activation='softmax')(attention)
|
||||
|
||||
# Apply attention
|
||||
x = Multiply()([x, attention])
|
||||
#x = Multiply()([x, attention])
|
||||
|
||||
x, _ = SelfAttention()(x)
|
||||
|
||||
# Global average pooling
|
||||
x = GlobalAveragePooling1D()(x)
|
||||
#x = GlobalAveragePooling1D()(x)
|
||||
|
||||
# Output layer (for class probabilities)
|
||||
outputs = Dense(2, activation='softmax')(x) # Two neurons for class probabilities with softmax
|
||||
|
||||
@ -15,6 +15,7 @@ import re
|
||||
import hashlib
|
||||
import glob
|
||||
from tqdm import tqdm
|
||||
from utils.country_list import country_list
|
||||
|
||||
from dotenv import load_dotenv
|
||||
import os
|
||||
|
||||
0
app/utils/__init__.py
Normal file
0
app/utils/__init__.py
Normal file
BIN
app/utils/__pycache__/__init__.cpython-310.pyc
Normal file
BIN
app/utils/__pycache__/__init__.cpython-310.pyc
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user