update news cron job

2024-12-15 15:47:10 +01:00 · 2024-12-15 15:47:10 +01:00 · 38418c895f
commit 38418c895f
parent 0f1010a26d
1 changed files with 28 additions and 62 deletions
--- a/app/cron_market_news.py
+++ b/app/cron_market_news.py
@ -1,31 +1,18 @@
 import ujson
 import asyncio
 import aiohttp
 import finnhub
 import sqlite3
 from dotenv import load_dotenv
 import os
 load_dotenv()
 api_key = os.getenv('FMP_API_KEY')
 finnhub_api_key = os.getenv('FINNHUB_API_KEY')
 finnhub_client = finnhub.Client(api_key=finnhub_api_key)
 headers = {"accept": "application/json"}
 def filter_and_deduplicate(data, excluded_domains=None, deduplicate_key='title'):
-    """
+ 
    Filter out items with specified domains in their URL and remove duplicates based on a specified key.
    Args:
    data (list): List of dictionaries containing item data.
    excluded_domains (list): List of domain strings to exclude. Defaults to ['prnewswire.com', 'globenewswire.com', 'accesswire.com'].
    deduplicate_key (str): The key to use for deduplication. Defaults to 'title'.
    Returns:
    list: Filtered and deduplicated list of items.
    """
    if excluded_domains is None:
        excluded_domains = ['prnewswire.com', 'globenewswire.com', 'accesswire.com']
@ -41,31 +28,6 @@ def filter_and_deduplicate(data, excluded_domains=None, deduplicate_key='title')
    return filtered_data
 '''
 async def run():
    limit = 200
    urls = [
    f'https://financialmodelingprep.com/api/v3/stock_news?limit={limit}&apikey={api_key}',
    f"https://financialmodelingprep.com/api/v4/general_news?limit={limit}&apikey={api_key}",
    f"https://financialmodelingprep.com/api/v4/crypto_news?limit={limit}&apikey={api_key}",
    ]
    for url in urls:
        res_list = []
        async with aiohttp.ClientSession() as session:
            async with session.get(url) as response:
                data = await response.json()
        if "stock_news" in url:
            data_name = 'stock-news'
        elif "general_news" in url:
            data_name = 'general-news'
        elif "crypto_news" in url:
            data_name = 'crypto-news'
        with open(f"json/market-news/{data_name}.json", 'w') as file:
            ujson.dump(data, file)
 '''
 async def run():
@ -74,36 +36,40 @@ async def run():
    cursor.execute("PRAGMA journal_mode = wal")
    cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 10E9 AND symbol NOT LIKE '%.%'")
    stock_symbols = [row[0] for row in cursor.fetchall()]
    print(len(stock_symbols))
    con.close()
-    limit = 100
+    limit = 200
    company_tickers = ','.join(stock_symbols)
    urls = [
-        f'https://financialmodelingprep.com/api/v3/stock_news?tickers={company_tickers}&limit={limit}&apikey={api_key}',
+        f'https://financialmodelingprep.com/stable/news/stock-latest?limit={limit}&apikey={api_key}',
        f'https://financialmodelingprep.com/stable/news/general-latest?limit={limit}&apikey={api_key}',
        f"https://financialmodelingprep.com/stable/news/press-releases-latest?limit={limit}&apikey={api_key}"
    ]
    for url in urls:
-        async with aiohttp.ClientSession() as session:
+        try:
-            async with session.get(url) as response:
+            async with aiohttp.ClientSession() as session:
-                data = await response.json()
+                async with session.get(url) as response:
-
+                    data = await response.json()
-        if "stock_news" in url:
+                    
-            custom_domains = ['prnewswire.com', 'globenewswire.com', 'accesswire.com']
+                    if "stock-latest" in url or "press-releases-latest" in url:
-            data = filter_and_deduplicate(data, excluded_domains=custom_domains)
+                        data = [item for item in data if item['symbol'] in stock_symbols]
            data_name = 'stock-news'
            if "stock-latest" in url:
                custom_domains = ['prnewswire.com', 'globenewswire.com', 'accesswire.com']
                data = filter_and_deduplicate(data, excluded_domains=custom_domains)
                data_name = 'stock-news'
-        #elif "press-releases" in url:
+            if "general-latest" in url:
-        #    data_name = 'press-releases'
+                custom_domains = ['prnewswire.com', 'globenewswire.com', 'accesswire.com']
                data = filter_and_deduplicate(data, excluded_domains=custom_domains)
                data_name = 'general-news'
            if "press-releases-latest" in url:
                data_name = 'press-news'
-        with open(f"json/market-news/{data_name}.json", 'w') as file:
+            if len(data) > 0:
-            ujson.dump(data, file)
+                with open(f"json/market-news/{data_name}.json", 'w') as file:
-
+                    ujson.dump(data, file)
-
+        except:
-
+            pass
    general_news = finnhub_client.general_news('general')
    general_news = [item for item in general_news if item["source"] != "" and item["image"] != ""]
    with open(f"json/market-news/general-news.json", 'w') as file:
            ujson.dump(general_news, file)
 try:
    asyncio.run(run())