backend/app/cron_financial_statements.py
2025-01-04 14:07:31 +01:00

153 lines
7.0 KiB
Python

import os
import ujson
import orjson
import asyncio
import aiohttp
import sqlite3
from tqdm import tqdm
from dotenv import load_dotenv
load_dotenv()
api_key = os.getenv('FMP_API_KEY')
# Configurations
include_current_quarter = False
max_concurrent_requests = 100 # Limit concurrent requests
async def fetch_data(session, url, symbol, attempt=0):
try:
async with session.get(url) as response:
if response.status == 200:
data = await response.json()
return data
else:
print(f"Error fetching data for {symbol}: HTTP {response.status}")
return None
except Exception as e:
print(f"Exception during fetching data for {symbol}: {e}")
return None
async def save_json(symbol, period, data_type, data):
os.makedirs(f"json/financial-statements/{data_type}/{period}/", exist_ok=True)
with open(f"json/financial-statements/{data_type}/{period}/{symbol}.json", 'w') as file:
ujson.dumps(data,file)
async def calculate_margins(symbol):
for period in ['annual', 'quarter']:
# Load income statement data
income_path = f"json/financial-statements/income-statement/{period}/{symbol}.json"
with open(income_path, "r") as file:
income_data = orjson.loads(file.read())
# Load cash flow statement data
cash_flow_path = f"json/financial-statements/cash-flow-statement/{period}/{symbol}.json"
with open(cash_flow_path, "r") as file:
cash_flow_data = orjson.loads(file.read())
# Load ratios data
ratios_path = f"json/financial-statements/ratios/{period}/{symbol}.json"
with open(ratios_path, "r") as file:
ratio_data = orjson.loads(file.read())
# Ensure all datasets are available and iterate through the items
if income_data and cash_flow_data and ratio_data:
for ratio_item, income_item, cash_flow_item in zip(ratio_data, income_data, cash_flow_data):
# Extract required data
revenue = income_item.get('revenue', 0)
ebitda = income_item.get('ebitda',0)
free_cash_flow = cash_flow_item.get('freeCashFlow', 0)
# Calculate freeCashFlowMargin if data is valid
if revenue != 0: # Avoid division by zero
ratio_item['freeCashFlowMargin'] = round((free_cash_flow / revenue) * 100, 2)
ratio_item['ebitdaMargin'] = round((ebitda / revenue) * 100,2)
ratio_item['grossProfitMargin'] = round(ratio_item['grossProfitMargin']*100,2)
ratio_item['operatingProfitMargin'] = round(ratio_item['operatingProfitMargin']*100,2)
ratio_item['pretaxProfitMargin'] = round(ratio_item['pretaxProfitMargin']*100,2)
ratio_item['netProfitMargin'] = round(ratio_item['netProfitMargin']*100,2)
else:
ratio_item['freeCashFlowMargin'] = None # Handle missing or zero revenue
ratio_item['ebitdaMargin'] = None
ratio_item['grossProfitMargin'] = None
ratio_item['operatingProfitMargin'] = None
ratio_item['pretaxProfitMargin'] = None
ratio_item['netProfitMargin'] = None
# Save the updated ratios data back to the JSON file
with open(ratios_path, "wb") as file:
file.write(orjson.dumps(data,option=orjson.OPT_SERIALIZE_NUMPY).decode('utf-8'))
async def get_financial_statements(session, symbol, semaphore, request_counter):
base_url = "https://financialmodelingprep.com/api/v3"
periods = ['quarter', 'annual']
financial_data_types = ['key-metrics', 'income-statement', 'balance-sheet-statement', 'cash-flow-statement', 'ratios']
growth_data_types = ['income-statement-growth', 'balance-sheet-statement-growth', 'cash-flow-statement-growth']
async with semaphore:
for period in periods:
# Fetch regular financial statements
for data_type in financial_data_types:
url = f"{base_url}/{data_type}/{symbol}?period={period}&apikey={api_key}"
data = await fetch_data(session, url, symbol)
if data:
await save_json(symbol, period, data_type, data)
request_counter[0] += 1 # Increment the request counter
if request_counter[0] >= 500:
await asyncio.sleep(60) # Pause for 60 seconds
request_counter[0] = 0 # Reset the request counter after the pause
# Fetch financial statement growth data
for growth_type in growth_data_types:
growth_url = f"{base_url}/{growth_type}/{symbol}?period={period}&apikey={api_key}"
growth_data = await fetch_data(session, growth_url, symbol)
if growth_data:
await save_json(symbol, period, growth_type, growth_data)
request_counter[0] += 1 # Increment the request counter
if request_counter[0] >= 500:
await asyncio.sleep(60) # Pause for 60 seconds
request_counter[0] = 0 # Reset the request counter after the pause
url = f"https://financialmodelingprep.com/api/v3/key-metrics-ttm/{symbol}?apikey={api_key}"
data = await fetch_data(session, url, symbol)
if data:
await save_json(symbol, 'ttm', 'key-metrics', data)
# Fetch owner earnings data
owner_earnings_url = f"https://financialmodelingprep.com/api/v4/owner_earnings?symbol={symbol}&apikey={api_key}"
owner_earnings_data = await fetch_data(session, owner_earnings_url, symbol)
if owner_earnings_data:
await save_json(symbol, 'quarter', 'owner-earnings', owner_earnings_data)
request_counter[0] += 1 # Increment the request counter
if request_counter[0] >= 500:
await asyncio.sleep(60) # Pause for 60 seconds
request_counter[0] = 0 # Reset the request counter after the pause
await calculate_margins(symbol)
async def run():
con = sqlite3.connect('stocks.db')
cursor = con.cursor()
cursor.execute("PRAGMA journal_mode = wal")
cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE symbol NOT LIKE '%.%'")
symbols = [row[0] for row in cursor.fetchall()]
con.close()
semaphore = asyncio.Semaphore(max_concurrent_requests)
request_counter = [0] # Using a list to keep a mutable counter across async tasks
async with aiohttp.ClientSession() as session:
tasks = []
for symbol in tqdm(symbols):
task = asyncio.create_task(get_financial_statements(session, symbol, semaphore, request_counter))
tasks.append(task)
await asyncio.gather(*tasks)
if __name__ == "__main__":
asyncio.run(run())