diff --git a/app/create_crypto_db.py b/app/create_crypto_db.py index 8d3620c..27ce5f5 100755 --- a/app/create_crypto_db.py +++ b/app/create_crypto_db.py @@ -209,7 +209,6 @@ class CryptoDatabase: urls = [ f"https://financialmodelingprep.com/api/v3/quote/{symbol}?apikey={API_KEY}", - f"https://financialmodelingprep.com/api/v4/crypto_news?tickers={symbol}&limit=50&apikey={API_KEY}", f"https://api.coingecko.com/api/v3/coins/markets?vs_currency=usd&ids={crypto_id}" ] @@ -231,9 +230,7 @@ class CryptoDatabase: 'previousClose': parsed_data[0]['previousClose'], } fundamental_data.update(data_dict) - - elif isinstance(parsed_data, list) and "crypto_news" in url: - fundamental_data['crypto_news'] = ujson.dumps(parsed_data) + elif "coingecko" in url: headers = { "accept": "application/json", diff --git a/app/create_etf_db.py b/app/create_etf_db.py index 8c2eb50..f6adf75 100755 --- a/app/create_etf_db.py +++ b/app/create_etf_db.py @@ -188,7 +188,6 @@ class ETFDatabase: f"https://financialmodelingprep.com/api/v3/etf-country-weightings/{symbol}?apikey={api_key}", f"https://financialmodelingprep.com/api/v3/quote/{symbol}?apikey={api_key}", f"https://financialmodelingprep.com/api/v3/historical-price-full/stock_dividend/{symbol}?apikey={api_key}", - f"https://financialmodelingprep.com/api/v3/stock_news?tickers={symbol}&limit=50&apikey={api_key}", f"https://financialmodelingprep.com/api/v4/institutional-ownership/institutional-holders/symbol-ownership-percent?date=2023-09-30&symbol={symbol}&page=0&apikey={api_key}", ] @@ -228,8 +227,7 @@ class ETFDatabase: } fundamental_data.update(data_dict) - elif isinstance(parsed_data, list) and "stock_news" in url: - fundamental_data['etf_news'] = ujson.dumps(parsed_data) + elif isinstance(parsed_data, list) and "etf-holder" in url: fundamental_data['holding'] = ujson.dumps(parsed_data) data_dict = {'numberOfHoldings': len(json.loads(fundamental_data['holding']))} diff --git a/app/create_stock_db.py b/app/create_stock_db.py index 3483386..aaa6857 100755 --- a/app/create_stock_db.py +++ b/app/create_stock_db.py @@ -102,7 +102,6 @@ class StockDatabase: f"https://financialmodelingprep.com/api/v3/quote/{symbol}?apikey={api_key}", f"https://financialmodelingprep.com/api/v3/income-statement/{symbol}?period=quarter&apikey={api_key}", f"https://financialmodelingprep.com/api/v3/income-statement-growth/{symbol}?period=quarter&apikey={api_key}", - f"https://financialmodelingprep.com/api/v3/stock_news?tickers={symbol}&limit=50&apikey={api_key}", f"https://financialmodelingprep.com/api/v4/esg-environmental-social-governance-data-ratings?symbol={symbol}&apikey={api_key}", f"https://financialmodelingprep.com/api/v4/esg-environmental-social-governance-data?symbol={symbol}&apikey={api_key}", f"https://financialmodelingprep.com/api/v3/historical-price-full/stock_dividend/{symbol}?limit=400&apikey={api_key}", @@ -285,9 +284,6 @@ class StockDatabase: elif isinstance(parsed_data, list) and "cash-flow-statement-growth/" in url: # Handle list response, save as JSON object fundamental_data['cashflow_growth'] = ujson.dumps(parsed_data) - elif isinstance(parsed_data, list) and "stock_news" in url: - # Handle list response, save as JSON object - fundamental_data['stock_news'] = ujson.dumps(parsed_data) elif isinstance(parsed_data, list) and "esg-environmental-social-governance-data?" in url: # Handle list response, save as JSON object fundamental_data['esg_data'] = ujson.dumps(parsed_data[0]) diff --git a/app/cron_company_news.py b/app/cron_company_news.py new file mode 100644 index 0000000..55bf5d3 --- /dev/null +++ b/app/cron_company_news.py @@ -0,0 +1,85 @@ +import ujson +import asyncio +import aiohttp +import sqlite3 +from tqdm import tqdm +from dotenv import load_dotenv +import os + +load_dotenv() +api_key = os.getenv('FMP_API_KEY') + +async def filter_and_deduplicate(data, excluded_domains=None, deduplicate_key='title'): + """ + Filter out items with specified domains in their URL and remove duplicates based on a specified key. + + Args: + data (list): List of dictionaries containing item data. + excluded_domains (list): List of domain strings to exclude. Defaults to ['prnewswire.com', 'globenewswire.com', 'accesswire.com']. + deduplicate_key (str): The key to use for deduplication. Defaults to 'title'. + + Returns: + list: Filtered and deduplicated list of items. + """ + if excluded_domains is None: + excluded_domains = ['prnewswire.com', 'globenewswire.com', 'accesswire.com'] + + seen_keys = set() + filtered_data = [] + + for item in data: + if not any(domain in item['url'] for domain in excluded_domains): + key = item.get(deduplicate_key) + if key and key not in seen_keys: + filtered_data.append(item) + seen_keys.add(key) + + return filtered_data + +async def fetch_news(session, url): + async with session.get(url) as response: + return await response.json() + +async def save_news(data, symbol): + #os.makedirs("json/market-news/companies", exist_ok=True) + with open(f"json/market-news/companies/{symbol}.json", 'w') as file: + ujson.dump(data, file) + +async def process_symbols(symbols): + limit = 200 + chunk_size = 50 # Adjust this value based on API limitations + + async with aiohttp.ClientSession() as session: + for i in tqdm(range(0, len(symbols), chunk_size)): + chunk = symbols[i:i+chunk_size] + company_tickers = ','.join(chunk) + url = f'https://financialmodelingprep.com/api/v3/stock_news?tickers={company_tickers}&limit={limit}&apikey={api_key}' + + data = await fetch_news(session, url) + + custom_domains = ['prnewswire.com', 'globenewswire.com', 'accesswire.com'] + data = await filter_and_deduplicate(data, excluded_domains=custom_domains) + + tasks = [save_news(data, symbol) for symbol in chunk] + await asyncio.gather(*tasks) + +def get_symbols(db_name, table_name): + with sqlite3.connect(db_name) as con: + cursor = con.cursor() + cursor.execute("PRAGMA journal_mode = wal") + cursor.execute(f"SELECT DISTINCT symbol FROM {table_name} WHERE symbol NOT LIKE '%.%'") + return [row[0] for row in cursor.fetchall()] + +async def main(): + stock_symbols = get_symbols('stocks.db', 'stocks') + etf_symbols = get_symbols('etf.db', 'etfs') + crypto_symbols = get_symbols('crypto.db', 'cryptos') + total_symbols = stock_symbols + etf_symbols + crypto_symbols + + await process_symbols(total_symbols) + +if __name__ == "__main__": + try: + asyncio.run(main()) + except Exception as e: + print(f"An error occurred: {e}") \ No newline at end of file diff --git a/app/main.py b/app/main.py index 95d2210..bdd5abf 100755 --- a/app/main.py +++ b/app/main.py @@ -579,31 +579,29 @@ async def stock_news(data: TickerData, api_key: str = Security(get_api_key)): cached_result = redis_client.get(cache_key) if cached_result: - return orjson.loads(cached_result) + return StreamingResponse( + io.BytesIO(cached_result), + media_type="application/json", + headers={"Content-Encoding": "gzip"}) - if ticker in etf_symbols: - table_name = 'etfs' - column_name = 'etf_news' - query_con = etf_con - elif ticker in crypto_symbols: - table_name = 'cryptos' - column_name = 'crypto_news' - query_con = crypto_con - else: - table_name = 'stocks' - column_name = 'stock_news' - query_con = con - - query_template = f"SELECT {column_name} FROM {table_name} WHERE symbol = ?" - df = pd.read_sql_query(query_template, query_con, params=(ticker,)) try: - res = orjson.loads(df[column_name].iloc[0]) + with open(f"json/market-news/companies/{ticker}.json", 'rb') as file: + res = orjson.loads(file.read()) except: res = [] - redis_client.set(cache_key, orjson.dumps(res), 3600*3600) # Set cache expiration time to 1 hour - return res + data = orjson.dumps(res) + compressed_data = gzip.compress(data) + redis_client.set(cache_key, compressed_data) + redis_client.expire(cache_key, 60*5) + + return StreamingResponse( + io.BytesIO(compressed_data), + media_type="application/json", + headers={"Content-Encoding": "gzip"} + ) + @app.post("/stock-dividend") diff --git a/app/primary_cron_job.py b/app/primary_cron_job.py index 82b4e05..cdb3d83 100755 --- a/app/primary_cron_job.py +++ b/app/primary_cron_job.py @@ -153,6 +153,7 @@ def run_cron_market_movers(): def run_cron_market_news(): run_command(["python3", "cron_market_news.py"]) + run_command(["python3", "cron_company_news.py"]) command = [ "sudo", "rsync", "-avz", "-e", "ssh", "/root/backend/app/json/market-news", @@ -457,6 +458,7 @@ def run_cramer_tracker(): ] run_command(command) + # Create functions to run each schedule in a separate thread def run_threaded(job_func): job_thread = threading.Thread(target=job_func)