From d03b7a58ee7344a45bc436184dd7c102a65006e9 Mon Sep 17 00:00:00 2001 From: MuslemRahimi Date: Tue, 22 Oct 2024 22:35:10 +0200 Subject: [PATCH] update metrics --- app/cron_analyst_db.py | 8 +++--- app/cron_business_metrics.py | 48 +++++++++++++++++++----------------- 2 files changed, 29 insertions(+), 27 deletions(-) diff --git a/app/cron_analyst_db.py b/app/cron_analyst_db.py index efebaf4..ecfc656 100755 --- a/app/cron_analyst_db.py +++ b/app/cron_analyst_db.py @@ -42,7 +42,7 @@ def remove_duplicates(data, key): return new_data def extract_sector(ticker, con): - + query_template = f""" SELECT sector @@ -52,10 +52,10 @@ def extract_sector(ticker, con): symbol = ? """ try: - df = pd.read_sql_query(query_template, con, params=(ticker,)) - sector = df['sector'].iloc[0] + df = pd.read_sql_query(query_template, con, params=(ticker,)) + sector = df['sector'].iloc[0] except: - sector = None + sector = None return sector diff --git a/app/cron_business_metrics.py b/app/cron_business_metrics.py index e30d95a..90681f9 100644 --- a/app/cron_business_metrics.py +++ b/app/cron_business_metrics.py @@ -6,6 +6,7 @@ from datetime import datetime from collections import defaultdict import re + #Tell the SEC who you are set_identity("Max Mustermann max.mustermann@indigo.com") @@ -370,42 +371,38 @@ def generate_revenue_dataset(dataset): return final_result -def run(symbol): + +def process_filings(filings, symbol): revenue_sources = [] geography_sources = [] - filings = Company(symbol).get_filings(form=["10-Q","10-K"]).latest(20) - #print(filings[0].xbrl()) - + for i in range(0,17): try: filing_xbrl = filings[i].xbrl() facts = filing_xbrl.facts.data latest_rows = facts.groupby('dimensions').head(1) - - + for index, row in latest_rows.iterrows(): dimensions_str = row.get("dimensions", "{}") try: dimensions_dict = ast.literal_eval(dimensions_str) if isinstance(dimensions_str, str) else dimensions_str except (ValueError, SyntaxError): dimensions_dict = {} - - + for column_name in [ "srt:StatementGeographicalAxis", "us-gaap:StatementBusinessSegmentsAxis", "srt:ProductOrServiceAxis", ]: product_dimension = dimensions_dict.get(column_name) if isinstance(dimensions_dict, dict) else None - # Check if the namespace is 'us-gaap' and product_dimension is valid + if row["namespace"] == "us-gaap" and product_dimension is not None and ( product_dimension.startswith(symbol.lower() + ":") or product_dimension.startswith("country" + ":") or product_dimension.startswith("us-gaap"+":") or product_dimension.startswith("srt"+":") ): - replacements = { "Member": "", "VideoGameAccessories": "HardwareAndAccessories", @@ -416,39 +413,44 @@ def run(symbol): "srt:": "", "SegmentMember": "", } - - name = product_dimension - for old, new in replacements.items(): name = name.replace(old, new) - # Determine the target list and the name transformation logic + if symbol in ['SAVE','BA','NFLX','LLY','MSFT','META','NVDA','AAPL','GME']: column_list = ["srt:ProductOrServiceAxis"] else: column_list = ["srt:ProductOrServiceAxis", "us-gaap:StatementBusinessSegmentsAxis"] - - if column_name in column_list: + if column_name in column_list: revenue_sources.append({"name": name, "value": row["value"], "date": row["end_date"]}) - else: geography_sources.append({"name": name, "value": row["value"], "date": row["end_date"]}) - - - + except Exception as e: print(e) + + return revenue_sources, geography_sources +def run(symbol): + # First try with 10-Q filings only + filings = Company(symbol).get_filings(form=["10-Q"]).latest(20) + revenue_sources, geography_sources = process_filings(filings, symbol) + + # If no geography sources found, try with 10-K filings + if not geography_sources: + print(f"No geography sources found in 10-Q for {symbol}, checking 10-K filings...") + filings_10k = Company(symbol).get_filings(form=["10-K"]).latest(20) + _, geography_sources = process_filings(filings_10k, symbol) + print(geography_sources) revenue_dataset = generate_revenue_dataset(revenue_sources) geographic_dataset = generate_geography_dataset(geography_sources) final_dataset = {'revenue': revenue_dataset, 'geographic': geographic_dataset} + with open(f"json/business-metrics/{symbol}.json", "w") as file: ujson.dump(final_dataset, file) if __name__ == "__main__": - - for symbol in []: #['SAVE','BA','ADBE','NFLX','PLTR','MSFT','META','TSLA','NVDA','AAPL','GME']: - #for AMD, SAVE we need 10-K form to get geography revenue + for symbol in ['AMD','SAVE','BA','ADBE','NFLX','PLTR','MSFT','META','TSLA','NVDA','AAPL','GME']: run(symbol) \ No newline at end of file