update business metrics
This commit is contained in:
parent
ef5945715c
commit
f3efc9f435
@ -46,52 +46,48 @@ def convert_to_dict(data):
|
||||
|
||||
return result
|
||||
|
||||
def prepare_expense_dataset(symbol):
|
||||
# Define the list of key elements you want to track
|
||||
expense_keys = [
|
||||
'researchAndDevelopmentExpenses',
|
||||
'generalAndAdministrativeExpenses',
|
||||
'sellingAndMarketingExpenses',
|
||||
'operatingExpenses',
|
||||
'costOfRevenue'
|
||||
]
|
||||
def prepare_expense_dataset(data):
|
||||
data = convert_to_dict(data)
|
||||
res_list = {}
|
||||
operating_name_list = []
|
||||
operating_history_list = []
|
||||
index = 0
|
||||
for date, info in data.items():
|
||||
value_list = []
|
||||
for name, val in info.items():
|
||||
if index == 0:
|
||||
operating_name_list.append(name)
|
||||
if name in operating_name_list:
|
||||
value_list.append(val)
|
||||
if len(value_list) > 0:
|
||||
operating_history_list.append({'date': date, 'value': value_list})
|
||||
index +=1
|
||||
|
||||
# Open the financial statement data for the symbol
|
||||
with open(f"json/financial-statements/income-statement/quarter/{symbol}.json", 'rb') as file:
|
||||
data = orjson.loads(file.read())
|
||||
# Convert the data into a dictionary
|
||||
operating_history_list = sorted(operating_history_list, key=lambda x: datetime.strptime(x['date'], '%Y-%m-%d'))
|
||||
|
||||
# Initialize a dictionary to hold the history and growth for each key
|
||||
expense_data = {}
|
||||
# Initialize 'valueGrowth' as None for all entries
|
||||
for item in operating_history_list:
|
||||
item['valueGrowth'] = [None] * len(item['value'])
|
||||
|
||||
for key in expense_keys:
|
||||
expense_data[key] = []
|
||||
# Calculate valueGrowth for each item based on the previous date value
|
||||
for i in range(1, len(operating_history_list)): # Start from the second item
|
||||
current_item = operating_history_list[i]
|
||||
prev_item = operating_history_list[i - 1]
|
||||
|
||||
# Prepare the data for the current key
|
||||
for entry in data:
|
||||
date = entry.get('date')
|
||||
value = entry.get(key, 0) # Default to 0 if the key is missing
|
||||
expense_data[key].append({'date': date, 'value': value})
|
||||
|
||||
# Sort the list by date
|
||||
expense_data[key] = sorted(expense_data[key], key=lambda x: datetime.strptime(x['date'], '%Y-%m-%d'))
|
||||
|
||||
# Initialize 'valueGrowth' as None for all entries
|
||||
for item in expense_data[key]:
|
||||
item['valueGrowth'] = None
|
||||
|
||||
# Calculate valueGrowth for each item based on the previous date value
|
||||
for i in range(1, len(expense_data[key])):
|
||||
value_growth = []
|
||||
for cur_value, prev_value in zip(current_item['value'], prev_item['value']):
|
||||
try:
|
||||
current_item = expense_data[key][i]
|
||||
prev_item = expense_data[key][i - 1]
|
||||
growth = round(((current_item['value'] - prev_item['value']) / prev_item['value']) * 100, 2) if prev_item['value'] != 0 else None
|
||||
current_item['valueGrowth'] = growth
|
||||
growth = round(((cur_value - prev_value) / prev_value) * 100, 2)
|
||||
except:
|
||||
current_item['valueGrowth'] = None
|
||||
growth = None
|
||||
value_growth.append(growth)
|
||||
|
||||
# Return the results as a dictionary with all keys
|
||||
return expense_data
|
||||
current_item['valueGrowth'] = value_growth
|
||||
|
||||
operating_history_list = sorted(operating_history_list, key=lambda x: datetime.strptime(x['date'], '%Y-%m-%d'), reverse=True)
|
||||
|
||||
res_list = {'operatingExpenses': {'names': operating_name_list, 'history': operating_history_list}}
|
||||
return res_list
|
||||
|
||||
def prepare_geo_dataset(data):
|
||||
data = convert_to_dict(data)
|
||||
@ -137,7 +133,7 @@ def prepare_geo_dataset(data):
|
||||
|
||||
return res_list
|
||||
|
||||
def prepare_dataset(data, geo_data, symbol):
|
||||
def prepare_dataset(data, geo_data, income_data, symbol):
|
||||
data = convert_to_dict(data)
|
||||
res_list = {}
|
||||
revenue_name_list = []
|
||||
@ -180,11 +176,11 @@ def prepare_dataset(data, geo_data, symbol):
|
||||
res_list = {'revenue': {'names': revenue_name_list, 'history': revenue_history_list}}
|
||||
|
||||
geo_data = prepare_geo_dataset(geo_data)
|
||||
#operating_expense_data = prepare_expense_dataset(symbol)
|
||||
operating_expense_data = prepare_expense_dataset(income_data)
|
||||
|
||||
|
||||
#res_list = {**res_list, **geo_data, 'expense': operating_expense_data}
|
||||
res_list = {**res_list, **geo_data}
|
||||
res_list = {**res_list, **geo_data, **operating_expense_data}
|
||||
return res_list
|
||||
|
||||
async def get_data(session, total_symbols):
|
||||
@ -192,6 +188,37 @@ async def get_data(session, total_symbols):
|
||||
for i in tqdm(range(0, len(total_symbols), batch_size)):
|
||||
batch = total_symbols[i:i+batch_size]
|
||||
for symbol in batch:
|
||||
try:
|
||||
with open(f"json/financial-statements/income-statement/quarter/{symbol}.json",'r') as file:
|
||||
income_data = orjson.loads(file.read())
|
||||
|
||||
include_selling_and_marketing = income_data[0].get('sellingAndMarketingExpenses', 0) > 0 if income_data else False
|
||||
# Process the income_data
|
||||
income_data = [
|
||||
{
|
||||
'date': entry['date'],
|
||||
'Selling, General, and Administrative': entry.get('sellingGeneralAndAdministrativeExpenses', 0),
|
||||
'Research and Development': entry.get('researchAndDevelopmentExpenses', 0),
|
||||
**({'Sales and Marketing': entry.get('sellingAndMarketingExpenses', 0)} if include_selling_and_marketing else {})
|
||||
}
|
||||
for entry in income_data
|
||||
if datetime.strptime(entry['date'], '%Y-%m-%d') > datetime(2015, 1, 1)
|
||||
]
|
||||
|
||||
income_data = [
|
||||
{
|
||||
entry['date']: {
|
||||
key: value
|
||||
for key, value in entry.items()
|
||||
if key != 'date'
|
||||
}
|
||||
}
|
||||
for entry in income_data
|
||||
]
|
||||
except:
|
||||
income_data = []
|
||||
|
||||
|
||||
product_data = []
|
||||
geo_data = []
|
||||
|
||||
@ -213,7 +240,7 @@ async def get_data(session, total_symbols):
|
||||
pass
|
||||
|
||||
if len(product_data) > 0 and len(geo_data) > 0:
|
||||
data = prepare_dataset(product_data, geo_data, symbol)
|
||||
data = prepare_dataset(product_data, geo_data, income_data, symbol)
|
||||
await save_json(data, symbol)
|
||||
|
||||
# Wait 60 seconds after processing each batch of 300 symbols
|
||||
@ -230,6 +257,7 @@ async def run():
|
||||
#total_symbols = ['TSLA'] # For testing purposes
|
||||
con.close()
|
||||
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
await get_data(session, total_symbols)
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user