update dashboard
This commit is contained in:
parent
fdd1c20c1e
commit
c3656edbc9
@ -349,27 +349,28 @@ def save_json(symbol, data):
|
|||||||
json.dump(data, file)
|
json.dump(data, file)
|
||||||
|
|
||||||
|
|
||||||
def process_stock(stock, csv_files, reports_folder, threshold):
|
def process_stocks_batch(stocks, csv_files, reports_folder, threshold):
|
||||||
print(stock['name'])
|
all_df = pd.concat([pd.read_csv(os.path.join(reports_folder, csv_file), usecols=['ClientName', 'AmountReported', 'FilingYear']) for csv_file in csv_files])
|
||||||
year_totals = defaultdict(float)
|
all_df['ClientName_lower'] = all_df['ClientName'].str.lower()
|
||||||
stock_name_lower = stock['name'].lower()
|
|
||||||
|
|
||||||
for csv_file in csv_files:
|
results = {}
|
||||||
print(csv_file)
|
for stock in stocks:
|
||||||
df = pd.read_csv(os.path.join(reports_folder, csv_file), usecols=['ClientName', 'AmountReported', 'FilingYear'])
|
print(stock['name'])
|
||||||
|
stock_name_lower = stock['name'].lower()
|
||||||
|
|
||||||
df['ClientName_lower'] = df['ClientName'].str.lower()
|
all_df['score'] = all_df['ClientName_lower'].apply(lambda x: process.extractOne(stock_name_lower, [x])[1])
|
||||||
df['score'] = df['ClientName_lower'].apply(lambda x: process.extractOne(stock_name_lower, [x])[1])
|
matched_df = all_df[all_df['score'] >= threshold]
|
||||||
|
|
||||||
matched_df = df[df['score'] >= threshold]
|
year_totals = matched_df.groupby('FilingYear')['AmountReported'].sum().to_dict()
|
||||||
|
all_res_list = [{'year': year, 'amount': amount} for year, amount in year_totals.items()]
|
||||||
|
|
||||||
year_totals.update(matched_df.groupby('FilingYear')['AmountReported'].sum().to_dict())
|
if all_res_list:
|
||||||
|
save_json(stock['symbol'], all_res_list)
|
||||||
|
print(f"Saved data for {stock['symbol']} ({len(all_res_list)} matches)")
|
||||||
|
|
||||||
|
results[stock['symbol']] = all_res_list
|
||||||
|
|
||||||
all_res_list = [{'year': year, 'amount': amount} for year, amount in year_totals.items()]
|
return results
|
||||||
|
|
||||||
if all_res_list:
|
|
||||||
save_json(stock['symbol'], all_res_list)
|
|
||||||
print(f"Saved data for {stock['symbol']} ({len(all_res_list)} matches)")
|
|
||||||
|
|
||||||
def create_dataset():
|
def create_dataset():
|
||||||
reports_folder = "json/corporate-lobbying/reports"
|
reports_folder = "json/corporate-lobbying/reports"
|
||||||
@ -381,11 +382,19 @@ def create_dataset():
|
|||||||
cursor.execute("PRAGMA journal_mode = wal")
|
cursor.execute("PRAGMA journal_mode = wal")
|
||||||
cursor.execute("SELECT DISTINCT symbol, name FROM stocks WHERE marketCap >= 10E9 AND symbol NOT LIKE '%.%' AND symbol NOT LIKE '%-%'")
|
cursor.execute("SELECT DISTINCT symbol, name FROM stocks WHERE marketCap >= 10E9 AND symbol NOT LIKE '%.%' AND symbol NOT LIKE '%-%'")
|
||||||
stock_data = [{'symbol': row[0], 'name': row[1]} for row in cursor.fetchall()]
|
stock_data = [{'symbol': row[0], 'name': row[1]} for row in cursor.fetchall()]
|
||||||
print(len(stock_data))
|
print(f"Total stocks: {len(stock_data)}")
|
||||||
con.close()
|
con.close()
|
||||||
|
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:
|
batch_size = 10
|
||||||
executor.map(lambda stock: process_stock(stock, csv_files, reports_folder, threshold), stock_data)
|
stock_batches = [stock_data[i:i+batch_size] for i in range(0, len(stock_data), batch_size)]
|
||||||
|
|
||||||
|
with concurrent.futures.ProcessPoolExecutor(max_workers=os.cpu_count()) as executor:
|
||||||
|
futures = [executor.submit(process_stocks_batch, batch, csv_files, reports_folder, threshold) for batch in stock_batches]
|
||||||
|
|
||||||
|
for future in concurrent.futures.as_completed(futures):
|
||||||
|
results = future.result()
|
||||||
|
print(f"Processed batch with {len(results)} stocks")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if '__main__' == __name__:
|
if '__main__' == __name__:
|
||||||
|
|||||||
@ -123,6 +123,7 @@ async def get_recent_earnings(session):
|
|||||||
try:
|
try:
|
||||||
symbol = item['ticker']
|
symbol = item['ticker']
|
||||||
name = item['name']
|
name = item['name']
|
||||||
|
time = item['time']
|
||||||
eps_prior = float(item['eps_prior']) if item['eps_prior'] != '' else 0
|
eps_prior = float(item['eps_prior']) if item['eps_prior'] != '' else 0
|
||||||
eps_surprise = float(item['eps_surprise']) if item['eps_surprise'] != '' else 0
|
eps_surprise = float(item['eps_surprise']) if item['eps_surprise'] != '' else 0
|
||||||
eps = float(item['eps']) if item['eps'] != '' else 0
|
eps = float(item['eps']) if item['eps'] != '' else 0
|
||||||
@ -135,6 +136,7 @@ async def get_recent_earnings(session):
|
|||||||
res_list.append({
|
res_list.append({
|
||||||
'symbol': symbol,
|
'symbol': symbol,
|
||||||
'name': name,
|
'name': name,
|
||||||
|
'time': time,
|
||||||
'marketCap': market_cap,
|
'marketCap': market_cap,
|
||||||
'epsPrior':eps_prior,
|
'epsPrior':eps_prior,
|
||||||
'epsSurprise': eps_surprise,
|
'epsSurprise': eps_surprise,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user