backend/app/cron_executive.py
2024-05-26 22:28:08 +02:00

105 lines
3.1 KiB
Python
Executable File

from datetime import datetime, timedelta
import ujson
import time
import sqlite3
import pandas as pd
import numpy as np
from collections import defaultdict
import time
import asyncio
import aiohttp
from faker import Faker
from tqdm import tqdm
from dotenv import load_dotenv
import os
load_dotenv()
api_key = os.getenv('FMP_API_KEY')
def generate_female_names(num_names):
fake = Faker()
female_names = []
for _ in range(num_names):
female_names.append(fake.first_name_female())
return female_names
def generate_male_names(num_names):
fake = Faker()
male_names = []
for _ in range(num_names):
male_names.append(fake.first_name_male())
return male_names
# Specify the number of female names you want in the list
number_of_names = 20_000
female_names_list = generate_female_names(number_of_names)
male_names_list = generate_male_names(number_of_names)
def custom_sort(entry):
# Ensure "Chief Executive Officer" appears first, then sort by name
if "Chief Executive Officer" in entry['title']:
return (0, entry['name'])
else:
return (1, entry['name'])
async def save_executives(session, symbol):
url = f"https://financialmodelingprep.com/api/v3/key-executives/{symbol}?apikey={api_key}"
async with session.get(url) as response:
data = await response.json()
unique_names = set()
filtered_data = []
for entry in sorted(data, key=custom_sort):
name = entry['name']
if name not in unique_names:
unique_names.add(name)
filtered_data.append(entry)
for entry in filtered_data:
if entry['gender'] == '' or entry['gender'] == None:
if any(substring.lower() in entry['name'].lower() for substring in female_names_list):
#print(entry['name'])
entry['gender'] = 'female'
elif any(substring.lower() in entry['name'].lower() for substring in male_names_list):
#print(entry['name'])
entry['gender'] = 'male'
if len(filtered_data) > 0:
with open(f"json/executives/{symbol}.json", 'w') as file:
ujson.dump(filtered_data, file)
async def run():
con = sqlite3.connect('stocks.db')
cursor = con.cursor()
cursor.execute("PRAGMA journal_mode = wal")
cursor.execute("SELECT DISTINCT symbol FROM stocks WHERE symbol NOT LIKE '%.%'")
symbols = [row[0] for row in cursor.fetchall()]
con.close()
async with aiohttp.ClientSession() as session:
tasks = []
i = 0
for symbol in tqdm(symbols):
tasks.append(save_executives(session, symbol))
i += 1
if i % 800 == 0:
await asyncio.gather(*tasks)
tasks = []
print('sleeping mode: ', i)
await asyncio.sleep(60) # Pause for 60 seconds
#tasks.append(self.save_ohlc_data(session, "%5EGSPC"))
if tasks:
await asyncio.gather(*tasks)
loop = asyncio.get_event_loop()
loop.run_until_complete(run())