update cron job

This commit is contained in:
MuslemRahimi 2025-01-18 14:32:03 +01:00
parent bd9e48db03
commit daa768487b
2 changed files with 18 additions and 14 deletions

View File

@ -1,17 +1,17 @@
import os import os
import pandas as pd import pandas as pd
import ujson import sqlite3
import orjson
from selenium import webdriver from selenium import webdriver
from selenium.webdriver.common.by import By from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from dotenv import load_dotenv from dotenv import load_dotenv
import sqlite3
from datetime import datetime
def clean_link(url): def clean_link(url):
"""
Clean the article link to extract the actual URL if it's wrapped in a redirect.
"""
if 'url=' in url: if 'url=' in url:
return url.split('url=')[-1] return url.split('url=')[-1]
return url return url
@ -25,7 +25,7 @@ def main():
con.close() con.close()
load_dotenv() load_dotenv()
url = os.getenv('IPO_NEWS') # IPO news URL url = os.getenv('IPO_NEWS') # IPO news URL
# Set up the WebDriver options # Set up the WebDriver options
options = Options() options = Options()
@ -42,8 +42,9 @@ def main():
try: try:
# Fetch the website # Fetch the website
driver.get(url) driver.get(url)
# Wait for the page to load (if needed, adjust the time) # Wait for the page to load
driver.implicitly_wait(5) driver.implicitly_wait(5)
# Find all the news containers # Find all the news containers
news_items = driver.find_elements(By.CSS_SELECTOR, ".gap-4.border-gray-300.bg-white.p-4.shadow.last\\:pb-1") news_items = driver.find_elements(By.CSS_SELECTOR, ".gap-4.border-gray-300.bg-white.p-4.shadow.last\\:pb-1")
@ -51,28 +52,32 @@ def main():
news_data = [] news_data = []
for item in news_items: for item in news_items:
try: try:
# Extract relevant elements
title_element = item.find_element(By.CSS_SELECTOR, "h3 a") title_element = item.find_element(By.CSS_SELECTOR, "h3 a")
description_element = item.find_element(By.CSS_SELECTOR, "p") description_element = item.find_element(By.CSS_SELECTOR, "p")
timestamp_element = item.find_element(By.CSS_SELECTOR, ".text-sm.text-faded") timestamp_element = item.find_element(By.CSS_SELECTOR, ".text-sm.text-faded")
stocks_element = item.find_elements(By.CSS_SELECTOR, ".ticker") stocks_element = item.find_elements(By.CSS_SELECTOR, ".ticker")
img_element = item.find_element(By.CSS_SELECTOR, "img.h-full.w-full.rounded.object-cover")
# Get element data
title = title_element.text title = title_element.text
description = description_element.text description = description_element.text
timestamp = timestamp_element.text timestamp = timestamp_element.text
link = title_element.get_attribute("href") link = title_element.get_attribute("href")
stocks = [stock.text for stock in stocks_element] stocks = [stock.text for stock in stocks_element]
img_link = img_element.get_attribute("src")
stock_list = [] # Filter stocks that exist in the database
for symbol in stocks: stock_list = [symbol for symbol in stocks if symbol in stock_symbols]
if symbol in stock_symbols:
stock_list.append(symbol)
# Add to news data
news_data.append({ news_data.append({
"title": title, "title": title,
"description": description, "description": description,
"timestamp": timestamp, "timestamp": timestamp,
"link": clean_link(link), "link": clean_link(link),
"stocks": stock_list "stocks": stock_list,
"img": img_link
}) })
except Exception as e: except Exception as e:
@ -80,12 +85,10 @@ def main():
# Convert the data into a DataFrame # Convert the data into a DataFrame
df = pd.DataFrame(news_data) df = pd.DataFrame(news_data)
print(df)
# Save the DataFrame to a JSON file # Save the DataFrame to a JSON file
df.to_json(json_file_path, orient='records', indent=2) df.to_json(json_file_path, orient='records', indent=2)
finally: finally:
# Ensure the WebDriver is closed # Ensure the WebDriver is closed
driver.quit() driver.quit()

View File

@ -180,6 +180,7 @@ def run_cron_market_news():
week = datetime.today().weekday() week = datetime.today().weekday()
if week <= 4: if week <= 4:
run_command(["python3", "cron_market_news.py"]) run_command(["python3", "cron_market_news.py"])
run_command(["python3", "cron_ipo_news.py"])
def run_company_news(): def run_company_news():
week = datetime.today().weekday() week = datetime.today().weekday()