94 lines
2.8 KiB
Python
94 lines
2.8 KiB
Python
import os
|
|
import pandas as pd
|
|
import ujson
|
|
from selenium import webdriver
|
|
from selenium.webdriver.common.by import By
|
|
from selenium.webdriver.chrome.service import Service
|
|
from webdriver_manager.chrome import ChromeDriverManager
|
|
from selenium.webdriver.chrome.options import Options
|
|
from dotenv import load_dotenv
|
|
import sqlite3
|
|
|
|
def save_json(data, file_path):
|
|
with open(file_path, 'w') as file:
|
|
ujson.dump(data, file)
|
|
|
|
query_template = """
|
|
SELECT
|
|
name, sector
|
|
FROM
|
|
stocks
|
|
WHERE
|
|
symbol = ?
|
|
"""
|
|
|
|
def main():
|
|
# Load environment variables
|
|
con = sqlite3.connect('stocks.db')
|
|
load_dotenv()
|
|
url = os.getenv('CORPORATE_LOBBYING')
|
|
|
|
# Set up the WebDriver options
|
|
options = Options()
|
|
options.add_argument("--headless")
|
|
options.add_argument("--no-sandbox")
|
|
options.add_argument("--disable-dev-shm-usage")
|
|
|
|
# Initialize the WebDriver
|
|
service = Service(ChromeDriverManager().install())
|
|
driver = webdriver.Chrome(options=options)
|
|
|
|
try:
|
|
# Fetch the website
|
|
driver.get(url)
|
|
|
|
# Find the table element
|
|
table = driver.find_element(By.ID, 'myTable')
|
|
|
|
# Extract data from the table
|
|
data = []
|
|
rows = table.find_elements(By.TAG_NAME, 'tr')[1:] # Skip the header row
|
|
for row in rows:
|
|
columns = row.find_elements(By.TAG_NAME, 'td')
|
|
if len(columns) == 3:
|
|
ticker = columns[0].find_element(By.TAG_NAME, 'strong').text
|
|
company = columns[0].find_element(By.TAG_NAME, 'span').text
|
|
amount = columns[1].text.strip()
|
|
date = columns[2].text.strip()
|
|
|
|
data.append({
|
|
'ticker': ticker,
|
|
'company': company,
|
|
'amount': amount,
|
|
'date': date
|
|
})
|
|
|
|
# Fetch additional data from the database
|
|
res = []
|
|
for item in data:
|
|
symbol = item['ticker']
|
|
try:
|
|
db_data = pd.read_sql_query(query_template, con, params=(symbol,))
|
|
if not db_data.empty:
|
|
res.append({
|
|
**item,
|
|
'name': db_data['name'].iloc[0],
|
|
'sector': db_data['sector'].iloc[0]
|
|
})
|
|
else:
|
|
res.append(item)
|
|
except Exception as e:
|
|
print(f"Error processing {symbol}: {e}")
|
|
res.append(item)
|
|
|
|
# Save the JSON data
|
|
if len(res) > 0:
|
|
save_json(res, 'json/corporate-lobbying/tracker/data.json')
|
|
|
|
finally:
|
|
# Ensure the WebDriver is closed
|
|
driver.quit()
|
|
con.close()
|
|
|
|
if __name__ == '__main__':
|
|
main() |