update metrics

This commit is contained in:
MuslemRahimi 2024-10-22 22:35:10 +02:00
parent 0a2698509a
commit d03b7a58ee
2 changed files with 29 additions and 27 deletions

View File

@ -52,10 +52,10 @@ def extract_sector(ticker, con):
symbol = ? symbol = ?
""" """
try: try:
df = pd.read_sql_query(query_template, con, params=(ticker,)) df = pd.read_sql_query(query_template, con, params=(ticker,))
sector = df['sector'].iloc[0] sector = df['sector'].iloc[0]
except: except:
sector = None sector = None
return sector return sector

View File

@ -6,6 +6,7 @@ from datetime import datetime
from collections import defaultdict from collections import defaultdict
import re import re
#Tell the SEC who you are #Tell the SEC who you are
set_identity("Max Mustermann max.mustermann@indigo.com") set_identity("Max Mustermann max.mustermann@indigo.com")
@ -370,12 +371,11 @@ def generate_revenue_dataset(dataset):
return final_result return final_result
def run(symbol):
def process_filings(filings, symbol):
revenue_sources = [] revenue_sources = []
geography_sources = [] geography_sources = []
filings = Company(symbol).get_filings(form=["10-Q","10-K"]).latest(20)
#print(filings[0].xbrl())
for i in range(0,17): for i in range(0,17):
try: try:
@ -383,7 +383,6 @@ def run(symbol):
facts = filing_xbrl.facts.data facts = filing_xbrl.facts.data
latest_rows = facts.groupby('dimensions').head(1) latest_rows = facts.groupby('dimensions').head(1)
for index, row in latest_rows.iterrows(): for index, row in latest_rows.iterrows():
dimensions_str = row.get("dimensions", "{}") dimensions_str = row.get("dimensions", "{}")
try: try:
@ -391,21 +390,19 @@ def run(symbol):
except (ValueError, SyntaxError): except (ValueError, SyntaxError):
dimensions_dict = {} dimensions_dict = {}
for column_name in [ for column_name in [
"srt:StatementGeographicalAxis", "srt:StatementGeographicalAxis",
"us-gaap:StatementBusinessSegmentsAxis", "us-gaap:StatementBusinessSegmentsAxis",
"srt:ProductOrServiceAxis", "srt:ProductOrServiceAxis",
]: ]:
product_dimension = dimensions_dict.get(column_name) if isinstance(dimensions_dict, dict) else None product_dimension = dimensions_dict.get(column_name) if isinstance(dimensions_dict, dict) else None
# Check if the namespace is 'us-gaap' and product_dimension is valid
if row["namespace"] == "us-gaap" and product_dimension is not None and ( if row["namespace"] == "us-gaap" and product_dimension is not None and (
product_dimension.startswith(symbol.lower() + ":") or product_dimension.startswith(symbol.lower() + ":") or
product_dimension.startswith("country" + ":") or product_dimension.startswith("country" + ":") or
product_dimension.startswith("us-gaap"+":") or product_dimension.startswith("us-gaap"+":") or
product_dimension.startswith("srt"+":") product_dimension.startswith("srt"+":")
): ):
replacements = { replacements = {
"Member": "", "Member": "",
"VideoGameAccessories": "HardwareAndAccessories", "VideoGameAccessories": "HardwareAndAccessories",
@ -416,39 +413,44 @@ def run(symbol):
"srt:": "", "srt:": "",
"SegmentMember": "", "SegmentMember": "",
} }
name = product_dimension name = product_dimension
for old, new in replacements.items(): for old, new in replacements.items():
name = name.replace(old, new) name = name.replace(old, new)
# Determine the target list and the name transformation logic
if symbol in ['SAVE','BA','NFLX','LLY','MSFT','META','NVDA','AAPL','GME']: if symbol in ['SAVE','BA','NFLX','LLY','MSFT','META','NVDA','AAPL','GME']:
column_list = ["srt:ProductOrServiceAxis"] column_list = ["srt:ProductOrServiceAxis"]
else: else:
column_list = ["srt:ProductOrServiceAxis", "us-gaap:StatementBusinessSegmentsAxis"] column_list = ["srt:ProductOrServiceAxis", "us-gaap:StatementBusinessSegmentsAxis"]
if column_name in column_list: if column_name in column_list:
revenue_sources.append({"name": name, "value": row["value"], "date": row["end_date"]}) revenue_sources.append({"name": name, "value": row["value"], "date": row["end_date"]})
else: else:
geography_sources.append({"name": name, "value": row["value"], "date": row["end_date"]}) geography_sources.append({"name": name, "value": row["value"], "date": row["end_date"]})
except Exception as e: except Exception as e:
print(e) print(e)
return revenue_sources, geography_sources
def run(symbol):
# First try with 10-Q filings only
filings = Company(symbol).get_filings(form=["10-Q"]).latest(20)
revenue_sources, geography_sources = process_filings(filings, symbol)
# If no geography sources found, try with 10-K filings
if not geography_sources:
print(f"No geography sources found in 10-Q for {symbol}, checking 10-K filings...")
filings_10k = Company(symbol).get_filings(form=["10-K"]).latest(20)
_, geography_sources = process_filings(filings_10k, symbol)
print(geography_sources) print(geography_sources)
revenue_dataset = generate_revenue_dataset(revenue_sources) revenue_dataset = generate_revenue_dataset(revenue_sources)
geographic_dataset = generate_geography_dataset(geography_sources) geographic_dataset = generate_geography_dataset(geography_sources)
final_dataset = {'revenue': revenue_dataset, 'geographic': geographic_dataset} final_dataset = {'revenue': revenue_dataset, 'geographic': geographic_dataset}
with open(f"json/business-metrics/{symbol}.json", "w") as file: with open(f"json/business-metrics/{symbol}.json", "w") as file:
ujson.dump(final_dataset, file) ujson.dump(final_dataset, file)
if __name__ == "__main__": if __name__ == "__main__":
for symbol in ['AMD','SAVE','BA','ADBE','NFLX','PLTR','MSFT','META','TSLA','NVDA','AAPL','GME']:
for symbol in []: #['SAVE','BA','ADBE','NFLX','PLTR','MSFT','META','TSLA','NVDA','AAPL','GME']:
#for AMD, SAVE we need 10-K form to get geography revenue
run(symbol) run(symbol)