update news cron job

This commit is contained in:
MuslemRahimi 2024-12-15 15:47:10 +01:00
parent 0f1010a26d
commit 38418c895f

View File

@ -1,31 +1,18 @@
import ujson import ujson
import asyncio import asyncio
import aiohttp import aiohttp
import finnhub
import sqlite3 import sqlite3
from dotenv import load_dotenv from dotenv import load_dotenv
import os import os
load_dotenv() load_dotenv()
api_key = os.getenv('FMP_API_KEY') api_key = os.getenv('FMP_API_KEY')
finnhub_api_key = os.getenv('FINNHUB_API_KEY')
finnhub_client = finnhub.Client(api_key=finnhub_api_key)
headers = {"accept": "application/json"} headers = {"accept": "application/json"}
def filter_and_deduplicate(data, excluded_domains=None, deduplicate_key='title'): def filter_and_deduplicate(data, excluded_domains=None, deduplicate_key='title'):
"""
Filter out items with specified domains in their URL and remove duplicates based on a specified key.
Args:
data (list): List of dictionaries containing item data.
excluded_domains (list): List of domain strings to exclude. Defaults to ['prnewswire.com', 'globenewswire.com', 'accesswire.com'].
deduplicate_key (str): The key to use for deduplication. Defaults to 'title'.
Returns:
list: Filtered and deduplicated list of items.
"""
if excluded_domains is None: if excluded_domains is None:
excluded_domains = ['prnewswire.com', 'globenewswire.com', 'accesswire.com'] excluded_domains = ['prnewswire.com', 'globenewswire.com', 'accesswire.com']
@ -41,31 +28,6 @@ def filter_and_deduplicate(data, excluded_domains=None, deduplicate_key='title')
return filtered_data return filtered_data
'''
async def run():
limit = 200
urls = [
f'https://financialmodelingprep.com/api/v3/stock_news?limit={limit}&apikey={api_key}',
f"https://financialmodelingprep.com/api/v4/general_news?limit={limit}&apikey={api_key}",
f"https://financialmodelingprep.com/api/v4/crypto_news?limit={limit}&apikey={api_key}",
]
for url in urls:
res_list = []
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
data = await response.json()
if "stock_news" in url:
data_name = 'stock-news'
elif "general_news" in url:
data_name = 'general-news'
elif "crypto_news" in url:
data_name = 'crypto-news'
with open(f"json/market-news/{data_name}.json", 'w') as file:
ujson.dump(data, file)
'''
async def run(): async def run():
@ -74,36 +36,40 @@ async def run():
cursor.execute("PRAGMA journal_mode = wal") cursor.execute("PRAGMA journal_mode = wal")
cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 10E9 AND symbol NOT LIKE '%.%'") cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE marketCap >= 10E9 AND symbol NOT LIKE '%.%'")
stock_symbols = [row[0] for row in cursor.fetchall()] stock_symbols = [row[0] for row in cursor.fetchall()]
print(len(stock_symbols))
con.close() con.close()
limit = 100 limit = 200
company_tickers = ','.join(stock_symbols)
urls = [ urls = [
f'https://financialmodelingprep.com/api/v3/stock_news?tickers={company_tickers}&limit={limit}&apikey={api_key}', f'https://financialmodelingprep.com/stable/news/stock-latest?limit={limit}&apikey={api_key}',
f'https://financialmodelingprep.com/stable/news/general-latest?limit={limit}&apikey={api_key}',
f"https://financialmodelingprep.com/stable/news/press-releases-latest?limit={limit}&apikey={api_key}"
] ]
for url in urls: for url in urls:
async with aiohttp.ClientSession() as session: try:
async with session.get(url) as response: async with aiohttp.ClientSession() as session:
data = await response.json() async with session.get(url) as response:
data = await response.json()
if "stock_news" in url:
custom_domains = ['prnewswire.com', 'globenewswire.com', 'accesswire.com'] if "stock-latest" in url or "press-releases-latest" in url:
data = filter_and_deduplicate(data, excluded_domains=custom_domains) data = [item for item in data if item['symbol'] in stock_symbols]
data_name = 'stock-news'
if "stock-latest" in url:
custom_domains = ['prnewswire.com', 'globenewswire.com', 'accesswire.com']
data = filter_and_deduplicate(data, excluded_domains=custom_domains)
data_name = 'stock-news'
#elif "press-releases" in url: if "general-latest" in url:
# data_name = 'press-releases' custom_domains = ['prnewswire.com', 'globenewswire.com', 'accesswire.com']
data = filter_and_deduplicate(data, excluded_domains=custom_domains)
data_name = 'general-news'
if "press-releases-latest" in url:
data_name = 'press-news'
with open(f"json/market-news/{data_name}.json", 'w') as file: if len(data) > 0:
ujson.dump(data, file) with open(f"json/market-news/{data_name}.json", 'w') as file:
ujson.dump(data, file)
except:
pass
general_news = finnhub_client.general_news('general')
general_news = [item for item in general_news if item["source"] != "" and item["image"] != ""]
with open(f"json/market-news/general-news.json", 'w') as file:
ujson.dump(general_news, file)
try: try:
asyncio.run(run()) asyncio.run(run())