diff --git a/app/cron_market_news.py b/app/cron_market_news.py index b8aefba..0c21293 100755 --- a/app/cron_market_news.py +++ b/app/cron_market_news.py @@ -2,6 +2,7 @@ import ujson import asyncio import aiohttp import finnhub +import sqlite3 from dotenv import load_dotenv import os load_dotenv() @@ -13,6 +14,33 @@ finnhub_client = finnhub.Client(api_key=finnhub_api_key) headers = {"accept": "application/json"} +def filter_and_deduplicate(data, excluded_domains=None, deduplicate_key='title'): + """ + Filter out items with specified domains in their URL and remove duplicates based on a specified key. + + Args: + data (list): List of dictionaries containing item data. + excluded_domains (list): List of domain strings to exclude. Defaults to ['prnewswire.com', 'globenewswire.com', 'accesswire.com']. + deduplicate_key (str): The key to use for deduplication. Defaults to 'title'. + + Returns: + list: Filtered and deduplicated list of items. + """ + if excluded_domains is None: + excluded_domains = ['prnewswire.com', 'globenewswire.com', 'accesswire.com'] + + seen_keys = set() + filtered_data = [] + + for item in data: + if not any(domain in item['url'] for domain in excluded_domains): + key = item.get(deduplicate_key) + if key and key not in seen_keys: + filtered_data.append(item) + seen_keys.add(key) + + return filtered_data + ''' async def run(): limit = 200 @@ -41,23 +69,31 @@ async def run(): async def run(): + con = sqlite3.connect('stocks.db') + cursor = con.cursor() + cursor.execute("PRAGMA journal_mode = wal") + cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 10E9 AND symbol NOT LIKE '%.%'") + stock_symbols = [row[0] for row in cursor.fetchall()] + print(len(stock_symbols)) + con.close() limit = 200 + company_tickers = ','.join(stock_symbols) urls = [ - f'https://financialmodelingprep.com/api/v3/stock_news?limit={limit}&apikey={api_key}', - f'https://financialmodelingprep.com/api/v4/crypto_news?limit={limit}&apikey={api_key}', + f'https://financialmodelingprep.com/api/v3/stock_news?tickers={company_tickers}&limit={limit}&apikey={api_key}', ] for url in urls: - res_list = [] - async with aiohttp.ClientSession() as session: async with session.get(url) as response: data = await response.json() + if "stock_news" in url: + custom_domains = ['prnewswire.com', 'globenewswire.com', 'accesswire.com'] + data = filter_and_deduplicate(data, excluded_domains=custom_domains) data_name = 'stock-news' - elif "crypto_news" in url: - data_name = 'crypto-news' - with open(f"json/market-news/{data_name}.json", 'w') as file: - ujson.dump(data, file) + #elif "press-releases" in url: + # data_name = 'press-releases' + #with open(f"json/market-news/{data_name}.json", 'w') as file: + # ujson.dump(data, file) diff --git a/app/main.py b/app/main.py index 8d23112..503b098 100755 --- a/app/main.py +++ b/app/main.py @@ -189,6 +189,9 @@ async def openapi(username: str = Depends(get_current_username)): class TickerData(BaseModel): ticker: str +class MarketNews(BaseModel): + newsType: str + class OptionsFlowData(BaseModel): ticker: str = '' start_date: str = '' @@ -539,9 +542,11 @@ async def get_market_movers(api_key: str = Security(get_api_key)): -@app.get("/market-news") -async def get_market_news(api_key: str = Security(get_api_key)): - cache_key = f"get-market-news" +@app.post("/market-news") +async def get_market_news(data: MarketNews, api_key: str = Security(get_api_key)): + news_type = data.newsType + + cache_key = f"market-news-{news_type}" cached_result = redis_client.get(cache_key) if cached_result: return StreamingResponse( @@ -550,7 +555,7 @@ async def get_market_news(api_key: str = Security(get_api_key)): headers={"Content-Encoding": "gzip"}) try: - with open(f"json/market-news/stock-news.json", 'rb') as file: + with open(f"json/market-news/{news_type}.json", 'rb') as file: res = orjson.loads(file.read()) except: res = [] @@ -558,61 +563,7 @@ async def get_market_news(api_key: str = Security(get_api_key)): data = orjson.dumps(res) compressed_data = gzip.compress(data) redis_client.set(cache_key, compressed_data) - redis_client.expire(cache_key, 60*15) # Set cache expiration time to 15 min - - return StreamingResponse( - io.BytesIO(compressed_data), - media_type="application/json", - headers={"Content-Encoding": "gzip"} - ) - -@app.get("/general-news") -async def get_general_news(api_key: str = Security(get_api_key)): - cache_key = f"get-general-news" - cached_result = redis_client.get(cache_key) - if cached_result: - return StreamingResponse( - io.BytesIO(cached_result), - media_type="application/json", - headers={"Content-Encoding": "gzip"}) - - try: - with open(f"json/market-news/general-news.json", 'rb') as file: - res = orjson.loads(file.read()) - except: - res = [] - - data = orjson.dumps(res) - compressed_data = gzip.compress(data) - redis_client.set(cache_key, compressed_data) - redis_client.expire(cache_key, 60*15) # Set cache expiration time to 15 min - - return StreamingResponse( - io.BytesIO(compressed_data), - media_type="application/json", - headers={"Content-Encoding": "gzip"} - ) - -@app.get("/crypto-news") -async def get_crypto_news(api_key: str = Security(get_api_key)): - cache_key = f"get-crypto-news" - cached_result = redis_client.get(cache_key) - if cached_result: - return StreamingResponse( - io.BytesIO(cached_result), - media_type="application/json", - headers={"Content-Encoding": "gzip"}) - - try: - with open(f"json/market-news/crypto-news.json", 'rb') as file: - res = orjson.loads(file.read()) - except: - res = [] - - data = orjson.dumps(res) - compressed_data = gzip.compress(data) - redis_client.set(cache_key, compressed_data) - redis_client.expire(cache_key, 60*15) # Set cache expiration time to 15 min + redis_client.expire(cache_key, 60*5) # Set cache expiration time to 15 min return StreamingResponse( io.BytesIO(compressed_data),