diff --git a/app/cron_market_news.py b/app/cron_market_news.py index 03671da..bdf8a9a 100755 --- a/app/cron_market_news.py +++ b/app/cron_market_news.py @@ -1,31 +1,18 @@ import ujson import asyncio import aiohttp -import finnhub import sqlite3 from dotenv import load_dotenv import os load_dotenv() api_key = os.getenv('FMP_API_KEY') -finnhub_api_key = os.getenv('FINNHUB_API_KEY') -finnhub_client = finnhub.Client(api_key=finnhub_api_key) headers = {"accept": "application/json"} def filter_and_deduplicate(data, excluded_domains=None, deduplicate_key='title'): - """ - Filter out items with specified domains in their URL and remove duplicates based on a specified key. - - Args: - data (list): List of dictionaries containing item data. - excluded_domains (list): List of domain strings to exclude. Defaults to ['prnewswire.com', 'globenewswire.com', 'accesswire.com']. - deduplicate_key (str): The key to use for deduplication. Defaults to 'title'. - - Returns: - list: Filtered and deduplicated list of items. - """ + if excluded_domains is None: excluded_domains = ['prnewswire.com', 'globenewswire.com', 'accesswire.com'] @@ -41,31 +28,6 @@ def filter_and_deduplicate(data, excluded_domains=None, deduplicate_key='title') return filtered_data -''' -async def run(): - limit = 200 - urls = [ - f'https://financialmodelingprep.com/api/v3/stock_news?limit={limit}&apikey={api_key}', - f"https://financialmodelingprep.com/api/v4/general_news?limit={limit}&apikey={api_key}", - f"https://financialmodelingprep.com/api/v4/crypto_news?limit={limit}&apikey={api_key}", - ] - for url in urls: - res_list = [] - - async with aiohttp.ClientSession() as session: - async with session.get(url) as response: - data = await response.json() - if "stock_news" in url: - data_name = 'stock-news' - elif "general_news" in url: - data_name = 'general-news' - elif "crypto_news" in url: - data_name = 'crypto-news' - - with open(f"json/market-news/{data_name}.json", 'w') as file: - ujson.dump(data, file) -''' - async def run(): @@ -74,36 +36,40 @@ async def run(): cursor.execute("PRAGMA journal_mode = wal") cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 10E9 AND symbol NOT LIKE '%.%'") stock_symbols = [row[0] for row in cursor.fetchall()] - print(len(stock_symbols)) con.close() - limit = 100 - company_tickers = ','.join(stock_symbols) + limit = 200 urls = [ - f'https://financialmodelingprep.com/api/v3/stock_news?tickers={company_tickers}&limit={limit}&apikey={api_key}', + f'https://financialmodelingprep.com/stable/news/stock-latest?limit={limit}&apikey={api_key}', + f'https://financialmodelingprep.com/stable/news/general-latest?limit={limit}&apikey={api_key}', + f"https://financialmodelingprep.com/stable/news/press-releases-latest?limit={limit}&apikey={api_key}" ] for url in urls: - async with aiohttp.ClientSession() as session: - async with session.get(url) as response: - data = await response.json() - - if "stock_news" in url: - custom_domains = ['prnewswire.com', 'globenewswire.com', 'accesswire.com'] - data = filter_and_deduplicate(data, excluded_domains=custom_domains) - data_name = 'stock-news' + try: + async with aiohttp.ClientSession() as session: + async with session.get(url) as response: + data = await response.json() + + if "stock-latest" in url or "press-releases-latest" in url: + data = [item for item in data if item['symbol'] in stock_symbols] + if "stock-latest" in url: + custom_domains = ['prnewswire.com', 'globenewswire.com', 'accesswire.com'] + data = filter_and_deduplicate(data, excluded_domains=custom_domains) + data_name = 'stock-news' - #elif "press-releases" in url: - # data_name = 'press-releases' + if "general-latest" in url: + custom_domains = ['prnewswire.com', 'globenewswire.com', 'accesswire.com'] + data = filter_and_deduplicate(data, excluded_domains=custom_domains) + data_name = 'general-news' + + if "press-releases-latest" in url: + data_name = 'press-news' - with open(f"json/market-news/{data_name}.json", 'w') as file: - ujson.dump(data, file) - - - - general_news = finnhub_client.general_news('general') - general_news = [item for item in general_news if item["source"] != "" and item["image"] != ""] - with open(f"json/market-news/general-news.json", 'w') as file: - ujson.dump(general_news, file) + if len(data) > 0: + with open(f"json/market-news/{data_name}.json", 'w') as file: + ujson.dump(data, file) + except: + pass try: asyncio.run(run())