diff --git a/app/cron_lobbying_tracker.py b/app/cron_lobbying_tracker.py new file mode 100644 index 0000000..808ed04 --- /dev/null +++ b/app/cron_lobbying_tracker.py @@ -0,0 +1,94 @@ +import os +import pandas as pd +import ujson +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.chrome.service import Service +from webdriver_manager.chrome import ChromeDriverManager +from selenium.webdriver.chrome.options import Options +from dotenv import load_dotenv +import sqlite3 + +def save_json(data, file_path): + with open(file_path, 'w') as file: + ujson.dump(data, file) + +query_template = """ + SELECT + name, sector + FROM + stocks + WHERE + symbol = ? +""" + +def main(): + # Load environment variables + con = sqlite3.connect('stocks.db') + load_dotenv() + url = os.getenv('CORPORATE_LOBBYING') + + # Set up the WebDriver options + options = Options() + options.add_argument("--headless") + options.add_argument("--no-sandbox") + options.add_argument("--disable-dev-shm-usage") + + # Initialize the WebDriver + service = Service(ChromeDriverManager().install()) + driver = webdriver.Chrome(service=service, options=options) + + try: + # Fetch the website + driver.get(url) + + # Find the table element + table = driver.find_element(By.ID, 'myTable') + + # Extract data from the table + data = [] + rows = table.find_elements(By.TAG_NAME, 'tr')[1:] # Skip the header row + for row in rows: + columns = row.find_elements(By.TAG_NAME, 'td') + if len(columns) == 3: + ticker = columns[0].find_element(By.TAG_NAME, 'strong').text + company = columns[0].find_element(By.TAG_NAME, 'span').text + amount = columns[1].text.strip() + date = columns[2].text.strip() + + data.append({ + 'ticker': ticker, + 'company': company, + 'amount': amount, + 'date': date + }) + + # Fetch additional data from the database + res = [] + for item in data: + symbol = item['ticker'] + try: + db_data = pd.read_sql_query(query_template, con, params=(symbol,)) + if not db_data.empty: + res.append({ + **item, + 'name': db_data['name'].iloc[0], + 'sector': db_data['sector'].iloc[0] + }) + else: + res.append(item) + except Exception as e: + print(f"Error processing {symbol}: {e}") + res.append(item) + + # Save the JSON data + if len(res) > 0: + save_json(res, 'json/corporate-lobbying/tracker/data.json') + + finally: + # Ensure the WebDriver is closed + driver.quit() + con.close() + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/app/main.py b/app/main.py index bdd5abf..04fb914 100755 --- a/app/main.py +++ b/app/main.py @@ -3232,6 +3232,35 @@ async def get_cramer_tracker(api_key: str = Security(get_api_key)): headers={"Content-Encoding": "gzip"} ) +@app.get("/lobbying-tracker") +async def get_cramer_tracker(api_key: str = Security(get_api_key)): + cache_key = f"corporate-lobbying-tracker" + cached_result = redis_client.get(cache_key) + if cached_result: + return StreamingResponse( + io.BytesIO(cached_result), + media_type="application/json", + headers={"Content-Encoding": "gzip"} + ) + try: + with open(f"json/corporate-lobbying/tracker/data.json", 'rb') as file: + res = orjson.loads(file.read()) + except: + res = [] + + data = orjson.dumps(res) + compressed_data = gzip.compress(data) + + redis_client.set(cache_key, compressed_data) + redis_client.expire(cache_key, 60*15) + + return StreamingResponse( + io.BytesIO(compressed_data), + media_type="application/json", + headers={"Content-Encoding": "gzip"} + ) + + @app.get("/reddit-tracker") async def get_reddit_tracker(api_key: str = Security(get_api_key)): cache_key = f"reddit-tracker" diff --git a/app/primary_cron_job.py b/app/primary_cron_job.py index cdb3d83..8ca9beb 100755 --- a/app/primary_cron_job.py +++ b/app/primary_cron_job.py @@ -439,24 +439,28 @@ def run_dashboard(): ] run_command(command) -def run_reddit_tracker(): - run_command(["python3", "cron_reddit_tracker.py"]) - run_command(["python3", "cron_reddit_statistics.py"]) - command = [ - "sudo", "rsync", "-avz", "-e", "ssh", - "/root/backend/app/json/reddit-tracker", - f"root@{useast_ip_address}:/root/backend/app/json" +def run_tracker(): + # Run Python scripts + scripts = [ + "cron_reddit_tracker.py", + "cron_reddit_statistics.py", + "cron_cramer_tracker.py", + "cron_lobbying_tracker.py" ] - run_command(command) + for script in scripts: + run_command(["python3", script]) -def run_cramer_tracker(): - run_command(["python3", "cron_cramer_tracker.py"]) - command = [ - "sudo", "rsync", "-avz", "-e", "ssh", - "/root/backend/app/json/cramer-tracker", - f"root@{useast_ip_address}:/root/backend/app/json" + # Rsync commands + rsync_commands = [ + ("/root/backend/app/json/reddit-tracker", "/root/backend/app/json"), + ("/root/backend/app/json/cramer-tracker", "/root/backend/app/json"), + ("/root/backend/app/json/corporate-lobbying/tracker", "/root/backend/app/json/corporate-lobbying") ] - run_command(command) + + base_command = ["sudo", "rsync", "-avz", "-e", "ssh"] + for source, dest in rsync_commands: + command = base_command + [source, f"root@{useast_ip_address}:{dest}"] + run_command(command) # Create functions to run each schedule in a separate thread @@ -509,8 +513,7 @@ schedule.every(15).minutes.do(run_threaded, run_cron_market_news).tag('market_ne schedule.every(10).minutes.do(run_threaded, run_one_day_price).tag('one_day_price_job') schedule.every(15).minutes.do(run_threaded, run_cron_heatmap).tag('heatmap_job') -schedule.every(10).minutes.do(run_threaded, run_reddit_tracker).tag('reddit_tracker_job') -schedule.every(10).minutes.do(run_threaded, run_cramer_tracker).tag('cramer_tracker_job') +schedule.every(10).minutes.do(run_threaded, run_tracker).tag('tracker_job') schedule.every(1).minutes.do(run_threaded, run_cron_quote).tag('quote_job')