update cron job
This commit is contained in:
parent
bd9e48db03
commit
daa768487b
@ -1,17 +1,17 @@
|
|||||||
import os
|
import os
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import ujson
|
import sqlite3
|
||||||
import orjson
|
|
||||||
from selenium import webdriver
|
from selenium import webdriver
|
||||||
from selenium.webdriver.common.by import By
|
from selenium.webdriver.common.by import By
|
||||||
from selenium.webdriver.chrome.service import Service
|
from selenium.webdriver.chrome.service import Service
|
||||||
from webdriver_manager.chrome import ChromeDriverManager
|
|
||||||
from selenium.webdriver.chrome.options import Options
|
from selenium.webdriver.chrome.options import Options
|
||||||
|
from webdriver_manager.chrome import ChromeDriverManager
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
import sqlite3
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
def clean_link(url):
|
def clean_link(url):
|
||||||
|
"""
|
||||||
|
Clean the article link to extract the actual URL if it's wrapped in a redirect.
|
||||||
|
"""
|
||||||
if 'url=' in url:
|
if 'url=' in url:
|
||||||
return url.split('url=')[-1]
|
return url.split('url=')[-1]
|
||||||
return url
|
return url
|
||||||
@ -25,7 +25,7 @@ def main():
|
|||||||
con.close()
|
con.close()
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
url = os.getenv('IPO_NEWS') # IPO news URL
|
url = os.getenv('IPO_NEWS') # IPO news URL
|
||||||
|
|
||||||
# Set up the WebDriver options
|
# Set up the WebDriver options
|
||||||
options = Options()
|
options = Options()
|
||||||
@ -42,8 +42,9 @@ def main():
|
|||||||
try:
|
try:
|
||||||
# Fetch the website
|
# Fetch the website
|
||||||
driver.get(url)
|
driver.get(url)
|
||||||
# Wait for the page to load (if needed, adjust the time)
|
# Wait for the page to load
|
||||||
driver.implicitly_wait(5)
|
driver.implicitly_wait(5)
|
||||||
|
|
||||||
# Find all the news containers
|
# Find all the news containers
|
||||||
news_items = driver.find_elements(By.CSS_SELECTOR, ".gap-4.border-gray-300.bg-white.p-4.shadow.last\\:pb-1")
|
news_items = driver.find_elements(By.CSS_SELECTOR, ".gap-4.border-gray-300.bg-white.p-4.shadow.last\\:pb-1")
|
||||||
|
|
||||||
@ -51,28 +52,32 @@ def main():
|
|||||||
news_data = []
|
news_data = []
|
||||||
for item in news_items:
|
for item in news_items:
|
||||||
try:
|
try:
|
||||||
|
# Extract relevant elements
|
||||||
title_element = item.find_element(By.CSS_SELECTOR, "h3 a")
|
title_element = item.find_element(By.CSS_SELECTOR, "h3 a")
|
||||||
description_element = item.find_element(By.CSS_SELECTOR, "p")
|
description_element = item.find_element(By.CSS_SELECTOR, "p")
|
||||||
timestamp_element = item.find_element(By.CSS_SELECTOR, ".text-sm.text-faded")
|
timestamp_element = item.find_element(By.CSS_SELECTOR, ".text-sm.text-faded")
|
||||||
stocks_element = item.find_elements(By.CSS_SELECTOR, ".ticker")
|
stocks_element = item.find_elements(By.CSS_SELECTOR, ".ticker")
|
||||||
|
img_element = item.find_element(By.CSS_SELECTOR, "img.h-full.w-full.rounded.object-cover")
|
||||||
|
|
||||||
|
# Get element data
|
||||||
title = title_element.text
|
title = title_element.text
|
||||||
description = description_element.text
|
description = description_element.text
|
||||||
timestamp = timestamp_element.text
|
timestamp = timestamp_element.text
|
||||||
link = title_element.get_attribute("href")
|
link = title_element.get_attribute("href")
|
||||||
stocks = [stock.text for stock in stocks_element]
|
stocks = [stock.text for stock in stocks_element]
|
||||||
|
img_link = img_element.get_attribute("src")
|
||||||
|
|
||||||
stock_list = []
|
# Filter stocks that exist in the database
|
||||||
for symbol in stocks:
|
stock_list = [symbol for symbol in stocks if symbol in stock_symbols]
|
||||||
if symbol in stock_symbols:
|
|
||||||
stock_list.append(symbol)
|
|
||||||
|
|
||||||
|
# Add to news data
|
||||||
news_data.append({
|
news_data.append({
|
||||||
"title": title,
|
"title": title,
|
||||||
"description": description,
|
"description": description,
|
||||||
"timestamp": timestamp,
|
"timestamp": timestamp,
|
||||||
"link": clean_link(link),
|
"link": clean_link(link),
|
||||||
"stocks": stock_list
|
"stocks": stock_list,
|
||||||
|
"img": img_link
|
||||||
})
|
})
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -80,12 +85,10 @@ def main():
|
|||||||
|
|
||||||
# Convert the data into a DataFrame
|
# Convert the data into a DataFrame
|
||||||
df = pd.DataFrame(news_data)
|
df = pd.DataFrame(news_data)
|
||||||
print(df)
|
|
||||||
|
|
||||||
# Save the DataFrame to a JSON file
|
# Save the DataFrame to a JSON file
|
||||||
df.to_json(json_file_path, orient='records', indent=2)
|
df.to_json(json_file_path, orient='records', indent=2)
|
||||||
|
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
# Ensure the WebDriver is closed
|
# Ensure the WebDriver is closed
|
||||||
driver.quit()
|
driver.quit()
|
||||||
|
|||||||
@ -180,6 +180,7 @@ def run_cron_market_news():
|
|||||||
week = datetime.today().weekday()
|
week = datetime.today().weekday()
|
||||||
if week <= 4:
|
if week <= 4:
|
||||||
run_command(["python3", "cron_market_news.py"])
|
run_command(["python3", "cron_market_news.py"])
|
||||||
|
run_command(["python3", "cron_ipo_news.py"])
|
||||||
|
|
||||||
def run_company_news():
|
def run_company_news():
|
||||||
week = datetime.today().weekday()
|
week = datetime.today().weekday()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user