remove old news from wiim

This commit is contained in:
MuslemRahimi 2024-12-06 23:32:55 +01:00
parent a50ae8e5f3
commit 0500b1ca64

View File

@ -10,6 +10,8 @@ import os
from dotenv import load_dotenv from dotenv import load_dotenv
from datetime import datetime, timedelta from datetime import datetime, timedelta
from tqdm import tqdm from tqdm import tqdm
import pytz
date_format = "%a, %d %b %Y %H:%M:%S %z" date_format = "%a, %d %b %Y %H:%M:%S %z"
@ -18,6 +20,8 @@ api_key = os.getenv('BENZINGA_API_KEY')
headers = {"accept": "application/json"} headers = {"accept": "application/json"}
N_weeks_ago = datetime.now(pytz.UTC) - timedelta(weeks=2)
query_template = """ query_template = """
SELECT SELECT
close close
@ -64,26 +68,73 @@ def correct_weekday(selected_date):
REQUEST_LIMIT = 500 REQUEST_LIMIT = 500
PAUSE_TIME = 10 PAUSE_TIME = 10
def check_existing_file(symbol):
file_path = f"json/wiim/company/{symbol}.json"
if os.path.exists(file_path):
try:
with open(file_path, 'r') as file:
existing_data = ujson.load(file)
# Filter out elements older than two weeks
updated_data = []
for item in existing_data:
try:
# Parse the date
date_obj = datetime.strptime(item['date'], "%Y-%m-%d %H:%M:%S")
if date_obj.tzinfo is None:
date_obj = date_obj.replace(tzinfo=pytz.UTC)
if date_obj >= N_weeks_ago:
updated_data.append(item)
except Exception as e:
print(f"Error processing existing item: {e}")
# Write back the filtered data
if updated_data:
with open(file_path, 'w') as file:
ujson.dump(updated_data, file)
print(f"Updated existing file for {symbol}, removed old entries.")
else:
os.remove(file_path)
print(f"Deleted file for {symbol} as all entries were older than two weeks.")
except Exception as e:
print(f"Error processing existing file for {symbol}: {e}")
async def get_endpoint(session, symbol, con, semaphore): async def get_endpoint(session, symbol, con, semaphore):
async with semaphore: async with semaphore:
url = "https://api.benzinga.com/api/v2/news" url = "https://api.benzinga.com/api/v2/news"
querystring = {"token": api_key, "tickers": symbol, "channels":"WIIM", "pageSize":"20", "displayOutput":"full"} querystring = {
"token": api_key,
"tickers": symbol,
"channels": "WIIM",
"pageSize": "20",
"displayOutput": "full"
}
try: try:
async with session.get(url, params=querystring, headers=headers) as response: async with session.get(url, params=querystring, headers=headers) as response:
res_list = [] res_list = []
res = ujson.loads(await response.text()) res = ujson.loads(await response.text())
# Create a timezone-aware datetime for two weeks ago in UTC
for item in res: for item in res:
try: try:
# Parse the date and ensure timezone-awareness
date_obj = datetime.strptime(item['created'], date_format) date_obj = datetime.strptime(item['created'], date_format)
new_date_obj_utc = date_obj if date_obj.tzinfo is None:
date_obj = date_obj.replace(tzinfo=pytz.UTC)
# Skip items older than two weeks
if date_obj < N_weeks_ago:
continue
start_date_obj_utc = correct_weekday(date_obj) start_date_obj_utc = correct_weekday(date_obj)
start_date = start_date_obj_utc.strftime("%Y-%m-%d") start_date = start_date_obj_utc.strftime("%Y-%m-%d")
end_date = new_date_obj_utc.strftime("%Y-%m-%d") end_date = date_obj.strftime("%Y-%m-%d")
new_date_str = date_obj.strftime("%Y-%m-%d %H:%M:%S")
new_date_str = new_date_obj_utc.strftime("%Y-%m-%d %H:%M:%S")
query = query_template.format(symbol=symbol) query = query_template.format(symbol=symbol)
try: try:
@ -93,15 +144,24 @@ async def get_endpoint(session, symbol, con, semaphore):
else: else:
change_percent = '-' change_percent = '-'
except Exception as e: except Exception as e:
print(f"Error fetching stock data for {symbol}: {e}")
change_percent = '-' change_percent = '-'
res_list.append({'date': new_date_str, 'text': item['title'], 'changesPercentage': change_percent, 'url': item['url']})
res_list.append({
'date': new_date_str,
'text': item['title'],
'changesPercentage': change_percent,
'url': item['url']
})
except Exception as e: except Exception as e:
print(f"Error processing item for {symbol}: {e}") print(f"Error processing item for {symbol}: {e}")
if len(res_list) > 0: if res_list:
print("Done", symbol) print(f"Done processing {symbol}")
with open(f"json/wiim/company/{symbol}.json", 'w') as file: with open(f"json/wiim/company/{symbol}.json", 'w') as file:
ujson.dump(res_list, file) ujson.dump(res_list, file)
else:
check_existing_file(symbol)
except Exception as e: except Exception as e:
print(f"Error fetching data for {symbol}: {e}") print(f"Error fetching data for {symbol}: {e}")
@ -137,7 +197,7 @@ async def run():
cursor.execute("PRAGMA journal_mode = wal") cursor.execute("PRAGMA journal_mode = wal")
cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE symbol NOT LIKE '%.%'") cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE symbol NOT LIKE '%.%'")
stock_symbols = [row[0] for row in cursor.fetchall()] stock_symbols = [row[0] for row in cursor.fetchall()]
#stock_symbols = ['GME'] #stock_symbols = ['AMD']
etf_con = sqlite3.connect('etf.db') etf_con = sqlite3.connect('etf.db')