update cron job market news

This commit is contained in:
MuslemRahimi 2024-08-06 00:46:45 +02:00
parent 531d5f4f62
commit eef7a645cf
2 changed files with 54 additions and 67 deletions

View File

@ -2,6 +2,7 @@ import ujson
import asyncio
import aiohttp
import finnhub
import sqlite3
from dotenv import load_dotenv
import os
load_dotenv()
@ -13,6 +14,33 @@ finnhub_client = finnhub.Client(api_key=finnhub_api_key)
headers = {"accept": "application/json"}
def filter_and_deduplicate(data, excluded_domains=None, deduplicate_key='title'):
"""
Filter out items with specified domains in their URL and remove duplicates based on a specified key.
Args:
data (list): List of dictionaries containing item data.
excluded_domains (list): List of domain strings to exclude. Defaults to ['prnewswire.com', 'globenewswire.com', 'accesswire.com'].
deduplicate_key (str): The key to use for deduplication. Defaults to 'title'.
Returns:
list: Filtered and deduplicated list of items.
"""
if excluded_domains is None:
excluded_domains = ['prnewswire.com', 'globenewswire.com', 'accesswire.com']
seen_keys = set()
filtered_data = []
for item in data:
if not any(domain in item['url'] for domain in excluded_domains):
key = item.get(deduplicate_key)
if key and key not in seen_keys:
filtered_data.append(item)
seen_keys.add(key)
return filtered_data
'''
async def run():
limit = 200
@ -41,23 +69,31 @@ async def run():
async def run():
con = sqlite3.connect('stocks.db')
cursor = con.cursor()
cursor.execute("PRAGMA journal_mode = wal")
cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 10E9 AND symbol NOT LIKE '%.%'")
stock_symbols = [row[0] for row in cursor.fetchall()]
print(len(stock_symbols))
con.close()
limit = 200
company_tickers = ','.join(stock_symbols)
urls = [
f'https://financialmodelingprep.com/api/v3/stock_news?limit={limit}&apikey={api_key}',
f'https://financialmodelingprep.com/api/v4/crypto_news?limit={limit}&apikey={api_key}',
f'https://financialmodelingprep.com/api/v3/stock_news?tickers={company_tickers}&limit={limit}&apikey={api_key}',
]
for url in urls:
res_list = []
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
data = await response.json()
if "stock_news" in url:
custom_domains = ['prnewswire.com', 'globenewswire.com', 'accesswire.com']
data = filter_and_deduplicate(data, excluded_domains=custom_domains)
data_name = 'stock-news'
elif "crypto_news" in url:
data_name = 'crypto-news'
with open(f"json/market-news/{data_name}.json", 'w') as file:
ujson.dump(data, file)
#elif "press-releases" in url:
# data_name = 'press-releases'
#with open(f"json/market-news/{data_name}.json", 'w') as file:
# ujson.dump(data, file)

View File

@ -189,6 +189,9 @@ async def openapi(username: str = Depends(get_current_username)):
class TickerData(BaseModel):
ticker: str
class MarketNews(BaseModel):
newsType: str
class OptionsFlowData(BaseModel):
ticker: str = ''
start_date: str = ''
@ -539,9 +542,11 @@ async def get_market_movers(api_key: str = Security(get_api_key)):
@app.get("/market-news")
async def get_market_news(api_key: str = Security(get_api_key)):
cache_key = f"get-market-news"
@app.post("/market-news")
async def get_market_news(data: MarketNews, api_key: str = Security(get_api_key)):
news_type = data.newsType
cache_key = f"market-news-{news_type}"
cached_result = redis_client.get(cache_key)
if cached_result:
return StreamingResponse(
@ -550,7 +555,7 @@ async def get_market_news(api_key: str = Security(get_api_key)):
headers={"Content-Encoding": "gzip"})
try:
with open(f"json/market-news/stock-news.json", 'rb') as file:
with open(f"json/market-news/{news_type}.json", 'rb') as file:
res = orjson.loads(file.read())
except:
res = []
@ -558,61 +563,7 @@ async def get_market_news(api_key: str = Security(get_api_key)):
data = orjson.dumps(res)
compressed_data = gzip.compress(data)
redis_client.set(cache_key, compressed_data)
redis_client.expire(cache_key, 60*15) # Set cache expiration time to 15 min
return StreamingResponse(
io.BytesIO(compressed_data),
media_type="application/json",
headers={"Content-Encoding": "gzip"}
)
@app.get("/general-news")
async def get_general_news(api_key: str = Security(get_api_key)):
cache_key = f"get-general-news"
cached_result = redis_client.get(cache_key)
if cached_result:
return StreamingResponse(
io.BytesIO(cached_result),
media_type="application/json",
headers={"Content-Encoding": "gzip"})
try:
with open(f"json/market-news/general-news.json", 'rb') as file:
res = orjson.loads(file.read())
except:
res = []
data = orjson.dumps(res)
compressed_data = gzip.compress(data)
redis_client.set(cache_key, compressed_data)
redis_client.expire(cache_key, 60*15) # Set cache expiration time to 15 min
return StreamingResponse(
io.BytesIO(compressed_data),
media_type="application/json",
headers={"Content-Encoding": "gzip"}
)
@app.get("/crypto-news")
async def get_crypto_news(api_key: str = Security(get_api_key)):
cache_key = f"get-crypto-news"
cached_result = redis_client.get(cache_key)
if cached_result:
return StreamingResponse(
io.BytesIO(cached_result),
media_type="application/json",
headers={"Content-Encoding": "gzip"})
try:
with open(f"json/market-news/crypto-news.json", 'rb') as file:
res = orjson.loads(file.read())
except:
res = []
data = orjson.dumps(res)
compressed_data = gzip.compress(data)
redis_client.set(cache_key, compressed_data)
redis_client.expire(cache_key, 60*15) # Set cache expiration time to 15 min
redis_client.expire(cache_key, 60*5) # Set cache expiration time to 15 min
return StreamingResponse(
io.BytesIO(compressed_data),