update
This commit is contained in:
parent
212ff2772a
commit
6462cfa259
@ -464,5 +464,5 @@ def run(symbol):
|
|||||||
ujson.dump(final_dataset, file)
|
ujson.dump(final_dataset, file)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
for symbol in ['ORCL']: #['GOOGL','AMD','SAVE','BA','ADBE','NFLX','PLTR','MSFT','META','TSLA','NVDA','AAPL','GME']:
|
for symbol in ['ORCL']: #['ORCL','GOOGL','AMD','SAVE','BA','ADBE','NFLX','PLTR','MSFT','META','TSLA','NVDA','AAPL','GME']:
|
||||||
run(symbol)
|
run(symbol)
|
||||||
@ -7,12 +7,13 @@ from tqdm import tqdm
|
|||||||
import os
|
import os
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from aiohttp import TCPConnector
|
from aiohttp import TCPConnector
|
||||||
|
import gc
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
api_key = os.getenv('FMP_API_KEY')
|
api_key = os.getenv('FMP_API_KEY')
|
||||||
|
|
||||||
# Rate limiting
|
# Rate limiting
|
||||||
MAX_REQUESTS_PER_MINUTE = 100
|
MAX_REQUESTS_PER_MINUTE = 500
|
||||||
request_semaphore = asyncio.Semaphore(MAX_REQUESTS_PER_MINUTE)
|
request_semaphore = asyncio.Semaphore(MAX_REQUESTS_PER_MINUTE)
|
||||||
|
|
||||||
async def fetch_data(session, url):
|
async def fetch_data(session, url):
|
||||||
@ -62,13 +63,12 @@ async def get_data(session, symbol, time_period):
|
|||||||
|
|
||||||
async def fetch_all_data(session, symbol, time_period):
|
async def fetch_all_data(session, symbol, time_period):
|
||||||
end_date = datetime.utcnow()
|
end_date = datetime.utcnow()
|
||||||
start_date = end_date - timedelta(days=365*20)
|
start_date = end_date - timedelta(days=180)
|
||||||
|
|
||||||
step = timedelta(days=5) # Step of 5 days
|
step = timedelta(days=5) # Step of 5 days
|
||||||
current_start_date = start_date
|
current_start_date = start_date
|
||||||
|
|
||||||
all_data = [] # To accumulate all the data
|
all_data = [] # To accumulate all the data
|
||||||
|
|
||||||
while current_start_date < end_date:
|
while current_start_date < end_date:
|
||||||
current_end_date = min(current_start_date + step, end_date)
|
current_end_date = min(current_start_date + step, end_date)
|
||||||
|
|
||||||
@ -87,6 +87,7 @@ async def fetch_all_data(session, symbol, time_period):
|
|||||||
# Sort the data by date before saving
|
# Sort the data by date before saving
|
||||||
all_data.sort(key=lambda x: x['date'])
|
all_data.sort(key=lambda x: x['date'])
|
||||||
await save_json(symbol, all_data, time_period)
|
await save_json(symbol, all_data, time_period)
|
||||||
|
gc.collect()
|
||||||
|
|
||||||
|
|
||||||
async def save_json(symbol, data, interval):
|
async def save_json(symbol, data, interval):
|
||||||
@ -97,13 +98,14 @@ async def save_json(symbol, data, interval):
|
|||||||
|
|
||||||
async def process_symbol(session, symbol):
|
async def process_symbol(session, symbol):
|
||||||
await get_data(session, symbol, '1hour')
|
await get_data(session, symbol, '1hour')
|
||||||
|
await get_data(session, symbol, '30min')
|
||||||
|
|
||||||
async def run():
|
async def run():
|
||||||
# Load symbols from databases
|
# Load symbols from databases
|
||||||
con = sqlite3.connect('stocks.db')
|
con = sqlite3.connect('stocks.db')
|
||||||
cursor = con.cursor()
|
cursor = con.cursor()
|
||||||
cursor.execute("PRAGMA journal_mode = wal")
|
cursor.execute("PRAGMA journal_mode = wal")
|
||||||
cursor.execute("SELECT DISTINCT symbol FROM stocks")
|
cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE symbol NOT LIKE '%.%'")
|
||||||
stock_symbols = [row[0] for row in cursor.fetchall()]
|
stock_symbols = [row[0] for row in cursor.fetchall()]
|
||||||
|
|
||||||
etf_con = sqlite3.connect('etf.db')
|
etf_con = sqlite3.connect('etf.db')
|
||||||
@ -115,19 +117,27 @@ async def run():
|
|||||||
etf_con.close()
|
etf_con.close()
|
||||||
|
|
||||||
# List of total symbols to process
|
# List of total symbols to process
|
||||||
total_symbols = ['GOOGL'] # Use stock_symbols + etf_symbols if needed
|
total_symbols = stock_symbols # Use stock_symbols + etf_symbols if needed
|
||||||
|
|
||||||
|
chunk_size = len(total_symbols) // 500 # Divide the list into N chunks
|
||||||
|
chunks = [total_symbols[i:i + chunk_size] for i in range(0, len(total_symbols), chunk_size)]
|
||||||
|
|
||||||
# Setting up aiohttp connector with rate limiting
|
for chunk in tqdm(chunks):
|
||||||
connector = TCPConnector(limit=MAX_REQUESTS_PER_MINUTE)
|
print(len(chunk))
|
||||||
async with aiohttp.ClientSession(connector=connector) as session:
|
connector = TCPConnector(limit=MAX_REQUESTS_PER_MINUTE)
|
||||||
tasks = [process_symbol(session, symbol) for symbol in total_symbols]
|
async with aiohttp.ClientSession(connector=connector) as session:
|
||||||
|
tasks = [process_symbol(session, symbol) for symbol in chunk]
|
||||||
# Use tqdm to track progress of tasks
|
|
||||||
for i, task in enumerate(tqdm(asyncio.as_completed(tasks), total=len(tasks)), 1):
|
# Use tqdm to track progress of tasks
|
||||||
await task # Ensure all tasks are awaited properly
|
for i, task in enumerate(tqdm(asyncio.as_completed(tasks), total=len(tasks)), 1):
|
||||||
if i % MAX_REQUESTS_PER_MINUTE == 0:
|
await task # Ensure all tasks are awaited properly
|
||||||
print(f'Processed {i} symbols, sleeping to respect rate limits...')
|
if i % MAX_REQUESTS_PER_MINUTE == 0:
|
||||||
await asyncio.sleep(60) # Pause for 60 seconds to avoid hitting rate limits
|
print(f'Processed {i} symbols, sleeping to respect rate limits...')
|
||||||
|
gc.collect()
|
||||||
|
await asyncio.sleep(30) # Pause for 60 seconds to avoid hitting rate limits
|
||||||
|
|
||||||
|
gc.collect()
|
||||||
|
await asyncio.sleep(30)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
asyncio.run(run())
|
asyncio.run(run())
|
||||||
@ -421,7 +421,7 @@ async def get_stock(data: HistoricalPrice, api_key: str = Security(get_api_key))
|
|||||||
headers={"Content-Encoding": "gzip"}
|
headers={"Content-Encoding": "gzip"}
|
||||||
)
|
)
|
||||||
|
|
||||||
if time_period == '1day':
|
if time_period == 'max':
|
||||||
try:
|
try:
|
||||||
with open(f"json/historical-price/max/{ticker}.json", 'rb') as file:
|
with open(f"json/historical-price/max/{ticker}.json", 'rb') as file:
|
||||||
res = orjson.loads(file.read())
|
res = orjson.loads(file.read())
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user