update company news in realtime

2024-08-09 11:15:43 +02:00 · 2024-08-09 11:15:43 +02:00 · eba8c45cfe
commit eba8c45cfe
parent 03d069ea0e
6 changed files with 106 additions and 30 deletions
--- a/app/create_crypto_db.py
+++ b/app/create_crypto_db.py
@ -209,7 +209,6 @@ class CryptoDatabase:

            urls = [
                f"https://financialmodelingprep.com/api/v3/quote/{symbol}?apikey={API_KEY}",
-                f"https://financialmodelingprep.com/api/v4/crypto_news?tickers={symbol}&limit=50&apikey={API_KEY}",
                f"https://api.coingecko.com/api/v3/coins/markets?vs_currency=usd&ids={crypto_id}"
            ]

@ -231,9 +230,7 @@ class CryptoDatabase:
                                        'previousClose': parsed_data[0]['previousClose'],
                                        }
                            fundamental_data.update(data_dict)
-
-                        elif isinstance(parsed_data, list) and "crypto_news" in url:
-                            fundamental_data['crypto_news'] = ujson.dumps(parsed_data)
+                            
                        elif "coingecko" in url:
                            headers = {
                                "accept": "application/json",
--- a/app/create_etf_db.py
+++ b/app/create_etf_db.py
@ -188,7 +188,6 @@ class ETFDatabase:
                f"https://financialmodelingprep.com/api/v3/etf-country-weightings/{symbol}?apikey={api_key}",
                f"https://financialmodelingprep.com/api/v3/quote/{symbol}?apikey={api_key}",
                f"https://financialmodelingprep.com/api/v3/historical-price-full/stock_dividend/{symbol}?apikey={api_key}",
-                f"https://financialmodelingprep.com/api/v3/stock_news?tickers={symbol}&limit=50&apikey={api_key}",
                f"https://financialmodelingprep.com/api/v4/institutional-ownership/institutional-holders/symbol-ownership-percent?date=2023-09-30&symbol={symbol}&page=0&apikey={api_key}",
            ]

@ -228,8 +227,7 @@ class ETFDatabase:
                                        }
                            fundamental_data.update(data_dict)

-                        elif isinstance(parsed_data, list) and "stock_news" in url:
-                            fundamental_data['etf_news'] = ujson.dumps(parsed_data)
+
                        elif isinstance(parsed_data, list) and "etf-holder" in url:
                            fundamental_data['holding'] = ujson.dumps(parsed_data)
                            data_dict = {'numberOfHoldings': len(json.loads(fundamental_data['holding']))}
--- a/app/create_stock_db.py
+++ b/app/create_stock_db.py
@ -102,7 +102,6 @@ class StockDatabase:
                f"https://financialmodelingprep.com/api/v3/quote/{symbol}?apikey={api_key}",
                f"https://financialmodelingprep.com/api/v3/income-statement/{symbol}?period=quarter&apikey={api_key}",
                f"https://financialmodelingprep.com/api/v3/income-statement-growth/{symbol}?period=quarter&apikey={api_key}",
-                f"https://financialmodelingprep.com/api/v3/stock_news?tickers={symbol}&limit=50&apikey={api_key}",
                f"https://financialmodelingprep.com/api/v4/esg-environmental-social-governance-data-ratings?symbol={symbol}&apikey={api_key}",
                f"https://financialmodelingprep.com/api/v4/esg-environmental-social-governance-data?symbol={symbol}&apikey={api_key}",
                f"https://financialmodelingprep.com/api/v3/historical-price-full/stock_dividend/{symbol}?limit=400&apikey={api_key}",
@ -285,9 +284,6 @@ class StockDatabase:
                        elif isinstance(parsed_data, list) and "cash-flow-statement-growth/" in url:
                            # Handle list response, save as JSON object
                            fundamental_data['cashflow_growth'] = ujson.dumps(parsed_data)
-                        elif isinstance(parsed_data, list) and "stock_news" in url:
-                            # Handle list response, save as JSON object
-                            fundamental_data['stock_news'] = ujson.dumps(parsed_data)
                        elif isinstance(parsed_data, list) and "esg-environmental-social-governance-data?" in url:
                            # Handle list response, save as JSON object
                            fundamental_data['esg_data'] = ujson.dumps(parsed_data[0])
--- a/app/cron_company_news.py
+++ b/app/cron_company_news.py
@ -0,0 +1,85 @@
+import ujson
+import asyncio
+import aiohttp
+import sqlite3
+from tqdm import tqdm
+from dotenv import load_dotenv
+import os
+
+load_dotenv()
+api_key = os.getenv('FMP_API_KEY')
+
+async def filter_and_deduplicate(data, excluded_domains=None, deduplicate_key='title'):
+    """
+    Filter out items with specified domains in their URL and remove duplicates based on a specified key.
+    
+    Args:
+    data (list): List of dictionaries containing item data.
+    excluded_domains (list): List of domain strings to exclude. Defaults to ['prnewswire.com', 'globenewswire.com', 'accesswire.com'].
+    deduplicate_key (str): The key to use for deduplication. Defaults to 'title'.
+    
+    Returns:
+    list: Filtered and deduplicated list of items.
+    """
+    if excluded_domains is None:
+        excluded_domains = ['prnewswire.com', 'globenewswire.com', 'accesswire.com']
+    
+    seen_keys = set()
+    filtered_data = []
+    
+    for item in data:
+        if not any(domain in item['url'] for domain in excluded_domains):
+            key = item.get(deduplicate_key)
+            if key and key not in seen_keys:
+                filtered_data.append(item)
+                seen_keys.add(key)
+    
+    return filtered_data
+
+async def fetch_news(session, url):
+    async with session.get(url) as response:
+        return await response.json()
+
+async def save_news(data, symbol):
+    #os.makedirs("json/market-news/companies", exist_ok=True)
+    with open(f"json/market-news/companies/{symbol}.json", 'w') as file:
+        ujson.dump(data, file)
+
+async def process_symbols(symbols):
+    limit = 200
+    chunk_size = 50  # Adjust this value based on API limitations
+    
+    async with aiohttp.ClientSession() as session:
+        for i in tqdm(range(0, len(symbols), chunk_size)):
+            chunk = symbols[i:i+chunk_size]
+            company_tickers = ','.join(chunk)
+            url = f'https://financialmodelingprep.com/api/v3/stock_news?tickers={company_tickers}&limit={limit}&apikey={api_key}'
+            
+            data = await fetch_news(session, url)
+
+            custom_domains = ['prnewswire.com', 'globenewswire.com', 'accesswire.com']
+            data = await filter_and_deduplicate(data, excluded_domains=custom_domains)
+            
+            tasks = [save_news(data, symbol) for symbol in chunk]
+            await asyncio.gather(*tasks)
+
+def get_symbols(db_name, table_name):
+    with sqlite3.connect(db_name) as con:
+        cursor = con.cursor()
+        cursor.execute("PRAGMA journal_mode = wal")
+        cursor.execute(f"SELECT DISTINCT symbol FROM {table_name} WHERE symbol NOT LIKE '%.%'")
+        return [row[0] for row in cursor.fetchall()]
+
+async def main():
+    stock_symbols = get_symbols('stocks.db', 'stocks')
+    etf_symbols = get_symbols('etf.db', 'etfs')
+    crypto_symbols = get_symbols('crypto.db', 'cryptos')
+    total_symbols = stock_symbols + etf_symbols + crypto_symbols
+
+    await process_symbols(total_symbols)
+
+if __name__ == "__main__":
+    try:
+        asyncio.run(main())
+    except Exception as e:
+        print(f"An error occurred: {e}")
--- a/app/main.py
+++ b/app/main.py
@ -579,31 +579,29 @@ async def stock_news(data: TickerData, api_key: str = Security(get_api_key)):

    cached_result = redis_client.get(cache_key)
    if cached_result:
-        return orjson.loads(cached_result)
+        return StreamingResponse(
+        io.BytesIO(cached_result),
+        media_type="application/json",
+        headers={"Content-Encoding": "gzip"})

-    if ticker in etf_symbols:
-        table_name = 'etfs'
-        column_name = 'etf_news'
-        query_con = etf_con
-    elif ticker in crypto_symbols:
-        table_name = 'cryptos'
-        column_name = 'crypto_news'
-        query_con = crypto_con
-    else:
-        table_name = 'stocks'
-        column_name = 'stock_news'
-        query_con = con
-
-    query_template = f"SELECT {column_name} FROM {table_name} WHERE symbol = ?"
-    df = pd.read_sql_query(query_template, query_con, params=(ticker,))

    try:
-        res = orjson.loads(df[column_name].iloc[0])
+        with open(f"json/market-news/companies/{ticker}.json", 'rb') as file:
+            res = orjson.loads(file.read())
    except:
        res = []

-    redis_client.set(cache_key, orjson.dumps(res), 3600*3600)  # Set cache expiration time to 1 hour
-    return res
+    data = orjson.dumps(res)
+    compressed_data = gzip.compress(data)
+    redis_client.set(cache_key, compressed_data)
+    redis_client.expire(cache_key, 60*5)
+
+    return StreamingResponse(
+        io.BytesIO(compressed_data),
+        media_type="application/json",
+        headers={"Content-Encoding": "gzip"}
+    )
+


@app.post("/stock-dividend")
--- a/app/primary_cron_job.py
+++ b/app/primary_cron_job.py
@ -153,6 +153,7 @@ def run_cron_market_movers():
    
 def run_cron_market_news():
    run_command(["python3", "cron_market_news.py"])
+    run_command(["python3", "cron_company_news.py"])
    command = [
            "sudo", "rsync", "-avz", "-e", "ssh",
            "/root/backend/app/json/market-news",
@ -457,6 +458,7 @@ def run_cramer_tracker():
    ]
    run_command(command)

+
 # Create functions to run each schedule in a separate thread
 def run_threaded(job_func):
    job_thread = threading.Thread(target=job_func)