update cron job

2025-01-18 14:32:03 +01:00 · 2025-01-18 14:32:03 +01:00 · daa768487b
commit daa768487b
parent bd9e48db03
2 changed files with 18 additions and 14 deletions
--- a/app/cron_ipo_news.py
+++ b/app/cron_ipo_news.py
@ -1,17 +1,17 @@
 import os
 import pandas as pd
-import ujson
-import orjson
+import sqlite3
 from selenium import webdriver
 from selenium.webdriver.common.by import By
 from selenium.webdriver.chrome.service import Service
-from webdriver_manager.chrome import ChromeDriverManager
 from selenium.webdriver.chrome.options import Options
+from webdriver_manager.chrome import ChromeDriverManager
 from dotenv import load_dotenv
-import sqlite3
-from datetime import datetime

 def clean_link(url):
+    """
+    Clean the article link to extract the actual URL if it's wrapped in a redirect.
+    """
    if 'url=' in url:
        return url.split('url=')[-1]
    return url
@ -25,7 +25,7 @@ def main():
    con.close()

    load_dotenv()
-    url = os.getenv('IPO_NEWS') # IPO news URL
+    url = os.getenv('IPO_NEWS')  # IPO news URL

    # Set up the WebDriver options
    options = Options()
@ -42,8 +42,9 @@ def main():
    try:
        # Fetch the website
        driver.get(url)
-        # Wait for the page to load (if needed, adjust the time)
+        # Wait for the page to load
        driver.implicitly_wait(5)
+
        # Find all the news containers
        news_items = driver.find_elements(By.CSS_SELECTOR, ".gap-4.border-gray-300.bg-white.p-4.shadow.last\\:pb-1")

@ -51,28 +52,32 @@ def main():
        news_data = []
        for item in news_items:
            try:
+                # Extract relevant elements
                title_element = item.find_element(By.CSS_SELECTOR, "h3 a")
                description_element = item.find_element(By.CSS_SELECTOR, "p")
                timestamp_element = item.find_element(By.CSS_SELECTOR, ".text-sm.text-faded")
                stocks_element = item.find_elements(By.CSS_SELECTOR, ".ticker")
+                img_element = item.find_element(By.CSS_SELECTOR, "img.h-full.w-full.rounded.object-cover")

+                # Get element data
                title = title_element.text
                description = description_element.text
                timestamp = timestamp_element.text
                link = title_element.get_attribute("href")
                stocks = [stock.text for stock in stocks_element]
+                img_link = img_element.get_attribute("src")

-                stock_list = []
-                for symbol in stocks:
-                    if symbol in stock_symbols:
-                        stock_list.append(symbol)
+                # Filter stocks that exist in the database
+                stock_list = [symbol for symbol in stocks if symbol in stock_symbols]

+                # Add to news data
                news_data.append({
                    "title": title,
                    "description": description,
                    "timestamp": timestamp,
                    "link": clean_link(link),
-                    "stocks": stock_list
+                    "stocks": stock_list,
+                    "img": img_link
                })

            except Exception as e:
@ -80,12 +85,10 @@ def main():

        # Convert the data into a DataFrame
        df = pd.DataFrame(news_data)
-        print(df)

        # Save the DataFrame to a JSON file
        df.to_json(json_file_path, orient='records', indent=2)

-
    finally:
        # Ensure the WebDriver is closed
        driver.quit()
--- a/app/primary_cron_job.py
+++ b/app/primary_cron_job.py
@ -180,6 +180,7 @@ def run_cron_market_news():
    week = datetime.today().weekday()
    if week <= 4:
        run_command(["python3", "cron_market_news.py"])
+        run_command(["python3", "cron_ipo_news.py"])

 def run_company_news():
    week = datetime.today().weekday()