From e17316e8d6d0d25f26433c2e106817a943061767 Mon Sep 17 00:00:00 2001 From: MuslemRahimi Date: Fri, 21 Feb 2025 23:36:14 +0100 Subject: [PATCH] update consensus rating --- app/cron_analyst_ticker.py | 200 +++++++++++++++---------------------- 1 file changed, 78 insertions(+), 122 deletions(-) diff --git a/app/cron_analyst_ticker.py b/app/cron_analyst_ticker.py index 1da24a4..dd13798 100755 --- a/app/cron_analyst_ticker.py +++ b/app/cron_analyst_ticker.py @@ -67,35 +67,32 @@ def get_all_analyst_summary(res_list): # Get the latest summary of ratings from the last 12 months end_date = date.today() - # Filter the data for the last 12 months and consider the last N ratings - #Furthermore consider only the last rating of the analyst if he provided multiple in the last 12 months - #filtered data is needed for the recommendation list - filtered_data = [item for item in res_list if start_date_12m <= datetime.strptime(item['date'], '%Y-%m-%d').date() <= end_date] - #unique list is needed for analyst summary rating + # Filter data to include only ratings within the last 12 months + filtered_data = [ + item for item in res_list + if start_date_12m <= datetime.strptime(item['date'], '%Y-%m-%d').date() <= end_date + ] + # Use only the latest rating per analyst and limit to 60 entries unique_filtered_data = filter_latest_entries(filtered_data)[:60] - # Initialize dictionary to store the latest price target for each analyst + # Collect the latest price target for each analyst latest_pt_current = defaultdict(list) - - # Iterate through the filtered data to collect pt_current for each analyst for item in unique_filtered_data: if 'adjusted_pt_current' in item and item['adjusted_pt_current']: analyst_name = item['analyst_name'] try: pt_current_value = float(item['adjusted_pt_current']) - # Collect all pt_current values for each analyst latest_pt_current[analyst_name].append(pt_current_value) except (ValueError, TypeError): print(f"Invalid pt_current value for analyst '{analyst_name}': {item['adjusted_pt_current']}") # Compute statistics for price targets pt_current_values = [val for sublist in latest_pt_current.values() for val in sublist] - #remove outliers to keep high and low price target reasonable + # Remove outliers using the IQR method q1, q3 = np.percentile(pt_current_values, [25, 75]) iqr = q3 - q1 pt_current_values = [x for x in pt_current_values if (q1 - 1.5 * iqr) <= x <= (q3 + 1.5 * iqr)] - # Compute different price target metrics if there are values, otherwise set to 0 if pt_current_values: median_pt_current = statistics.median(pt_current_values) avg_pt_current = statistics.mean(pt_current_values) @@ -104,80 +101,60 @@ def get_all_analyst_summary(res_list): else: median_pt_current = avg_pt_current = low_pt_current = high_pt_current = 0 - # Initialize recommendation tracking + # Define rating hierarchy for conversion rating_hierarchy = {'Strong Sell': 0, 'Sell': 1, 'Hold': 2, 'Buy': 3, 'Strong Buy': 4} - # Track monthly recommendations + # Track monthly recommendations for visualization monthly_recommendations = {} - - # Iterate through the filtered data to track monthly recommendations for item in filtered_data: - # Extract month from the date item_date = datetime.strptime(item['date'], '%Y-%m-%d') month_key = item_date.strftime('%Y-%m-01') - - # Initialize month's recommendation counts if not exists if month_key not in monthly_recommendations: - monthly_recommendations[month_key] = { - 'Strong Sell': 0, - 'Sell': 0, - 'Hold': 0, - 'Buy': 0, - 'Strong Buy': 0 - } - - # Check and increment recommendation count for the month + monthly_recommendations[month_key] = {key: 0 for key in rating_hierarchy.keys()} if 'rating_current' in item and item['rating_current'] in rating_hierarchy: monthly_recommendations[month_key][item['rating_current']] += 1 - # Convert monthly recommendations to a sorted list recommendation_list = [] for month in sorted(monthly_recommendations.keys()): month_data = monthly_recommendations[month] recommendation_list.append({ 'date': month, - 'Strong Sell': month_data['Strong Sell'], - 'Sell': month_data['Sell'], - 'Hold': month_data['Hold'], - 'Buy': month_data['Buy'], - 'Strong Buy': month_data['Strong Buy'] + **month_data }) - # Compute consensus ratings (similar to previous implementation) - consensus_ratings = defaultdict(str) + # Build a dictionary with the latest rating per analyst + consensus_ratings = {} for item in unique_filtered_data: - if 'rating_current' in item and item['rating_current'] and 'analyst_name' in item and item['analyst_name']: - try: - analyst_name = item['analyst_name'] - current_rating = item['rating_current'] - if current_rating in rating_hierarchy: - consensus_ratings[analyst_name] = current_rating - except: - pass + if item.get('rating_current') and item.get('analyst_name'): + current_rating = item['rating_current'] + if current_rating in rating_hierarchy: + consensus_ratings[item['analyst_name']] = current_rating + + # --- New Robust Consensus Rating Calculation --- + # Convert each valid rating into its numeric value + rating_values = [rating_hierarchy[r] for r in consensus_ratings.values() if r in rating_hierarchy] + if rating_values: + # Compute the median and round it to the nearest integer + consensus_numeric = round(statistics.median(rating_values)) + # Map the numeric consensus back to its corresponding rating string + inverse_rating_hierarchy = {v: k for k, v in rating_hierarchy.items()} + consensus_rating = inverse_rating_hierarchy.get(consensus_numeric, 'Hold') + else: + consensus_rating = 'Hold' + # ------------------------------------------------- - # Compute the consensus rating based on the most frequent rating among analysts - consensus_rating_counts = defaultdict(int) - for rating in consensus_ratings.values(): - consensus_rating_counts[rating] += 1 - consensus_rating = max(consensus_rating_counts, key=consensus_rating_counts.get) - - # Sum up all Buy, Sell, Hold for the progress bar in sveltekit - data_dict = dict(consensus_rating_counts) + # Build aggregated counts for Buy, Sell, and Hold (for the progress bar) + data_dict = {key: 0 for key in rating_hierarchy.keys()} + for r in consensus_ratings.values(): + data_dict[r] += 1 buy_total = data_dict.get('Strong Buy', 0) + data_dict.get('Buy', 0) sell_total = data_dict.get('Strong Sell', 0) + data_dict.get('Sell', 0) hold_total = data_dict.get('Hold', 0) # Count unique analysts - unique_analyst_names = set() numOfAnalyst = len(unique_filtered_data) - ''' - for item in filtered_data: - if item['analyst_name'] not in unique_analyst_names: - unique_analyst_names.add(item['analyst_name']) - numOfAnalyst += 1 - ''' - # Update stats dictionary with new keys including recommendationList + # Update stats dictionary with computed metrics and the recommendation list stats = { 'numOfAnalyst': numOfAnalyst, 'consensusRating': consensus_rating, @@ -193,40 +170,36 @@ def get_all_analyst_summary(res_list): res = {**stats, **categorical_ratings} return res + + def get_top_analyst_summary(res_list): # Get the latest summary of ratings from the last 12 months end_date = date.today() res_list = [item for item in res_list if item['analystScore'] >= 4] - # Filter the data for the last 12 months and consider the last N ratings - #Furthermore consider only the last rating of the analyst if he provided multiple in the last 12 months - #filtered data is needed for the recommendation list + + # Filter data to only include ratings from the last 12 months filtered_data = [item for item in res_list if start_date_12m <= datetime.strptime(item['date'], '%Y-%m-%d').date() <= end_date] - #unique list is needed for analyst summary rating + # Ensure only the latest rating per analyst is used unique_filtered_data = filter_latest_entries(filtered_data) - print(unique_filtered_data) - # Initialize dictionary to store the latest price target for each analyst + # Collect the latest price target for each analyst latest_pt_current = defaultdict(list) - - # Iterate through the filtered data to collect pt_current for each analyst for item in unique_filtered_data: if 'adjusted_pt_current' in item and item['adjusted_pt_current']: analyst_name = item['analyst_name'] try: pt_current_value = float(item['adjusted_pt_current']) - # Collect all pt_current values for each analyst latest_pt_current[analyst_name].append(pt_current_value) except (ValueError, TypeError): print(f"Invalid pt_current value for analyst '{analyst_name}': {item['adjusted_pt_current']}") - # Compute statistics for price targets + # Compute statistics for price targets (removing outliers) pt_current_values = [val for sublist in latest_pt_current.values() for val in sublist] - #remove outliers to keep high and low price target reasonable - q1, q3 = np.percentile(pt_current_values, [25, 75]) - iqr = q3 - q1 - pt_current_values = [x for x in pt_current_values if (q1 - 1.5 * iqr) <= x <= (q3 + 1.5 * iqr)] - - # Compute different price target metrics if there are values, otherwise set to 0 + if pt_current_values: + q1, q3 = np.percentile(pt_current_values, [25, 75]) + iqr = q3 - q1 + pt_current_values = [x for x in pt_current_values if (q1 - 1.5 * iqr) <= x <= (q3 + 1.5 * iqr)] + if pt_current_values: median_pt_current = statistics.median(pt_current_values) avg_pt_current = statistics.mean(pt_current_values) @@ -235,75 +208,60 @@ def get_top_analyst_summary(res_list): else: median_pt_current = avg_pt_current = low_pt_current = high_pt_current = 0 - # Initialize recommendation tracking + # Define the rating hierarchy rating_hierarchy = {'Strong Sell': 0, 'Sell': 1, 'Hold': 2, 'Buy': 3, 'Strong Buy': 4} - # Track monthly recommendations + # Track monthly recommendations for visualization monthly_recommendations = {} - - # Iterate through the filtered data to track monthly recommendations for item in filtered_data: - # Extract month from the date item_date = datetime.strptime(item['date'], '%Y-%m-%d') month_key = item_date.strftime('%Y-%m-01') - - # Initialize month's recommendation counts if not exists if month_key not in monthly_recommendations: - monthly_recommendations[month_key] = { - 'Strong Sell': 0, - 'Sell': 0, - 'Hold': 0, - 'Buy': 0, - 'Strong Buy': 0 - } + monthly_recommendations[month_key] = {key: 0 for key in rating_hierarchy.keys()} - # Check and increment recommendation count for the month if 'rating_current' in item and item['rating_current'] in rating_hierarchy: monthly_recommendations[month_key][item['rating_current']] += 1 - - # Convert monthly recommendations to a sorted list + recommendation_list = [] for month in sorted(monthly_recommendations.keys()): month_data = monthly_recommendations[month] recommendation_list.append({ 'date': month, - 'Strong Sell': month_data['Strong Sell'], - 'Sell': month_data['Sell'], - 'Hold': month_data['Hold'], - 'Buy': month_data['Buy'], - 'Strong Buy': month_data['Strong Buy'] + **month_data }) - # Compute consensus ratings (similar to previous implementation) - consensus_ratings = defaultdict(str) + # Build a dictionary with the latest rating per analyst + consensus_ratings = {} for item in unique_filtered_data: - if 'rating_current' in item and item['rating_current'] and 'analyst_name' in item and item['analyst_name']: - try: - analyst_name = item['analyst_name'] - current_rating = item['rating_current'] - if current_rating in rating_hierarchy: - consensus_ratings[analyst_name] = current_rating - except: - pass + if item.get('rating_current') and item.get('analyst_name'): + current_rating = item['rating_current'] + if current_rating in rating_hierarchy: + consensus_ratings[item['analyst_name']] = current_rating + + # --- New Robust Consensus Rating Calculation --- + # Convert each valid rating into its numeric score and compute the median + rating_values = [rating_hierarchy[r] for r in consensus_ratings.values() if r in rating_hierarchy] + if rating_values: + consensus_numeric = round(statistics.median(rating_values)) + # Map the numeric consensus back to its corresponding rating string + inverse_rating_hierarchy = {v: k for k, v in rating_hierarchy.items()} + consensus_rating = inverse_rating_hierarchy.get(consensus_numeric, 'Hold') + else: + consensus_rating = 'Hold' + # ------------------------------------------------- - # Compute the consensus rating based on the most frequent rating among analysts - consensus_rating_counts = defaultdict(int) - for rating in consensus_ratings.values(): - consensus_rating_counts[rating] += 1 - consensus_rating = max(consensus_rating_counts, key=consensus_rating_counts.get) - - # Sum up all Buy, Sell, Hold for the progress bar in sveltekit - data_dict = dict(consensus_rating_counts) + # Sum up the recommendation counts for Buy, Sell, and Hold for progress bar purposes + data_dict = {key: 0 for key in rating_hierarchy.keys()} + for r in consensus_ratings.values(): + data_dict[r] += 1 buy_total = data_dict.get('Strong Buy', 0) + data_dict.get('Buy', 0) sell_total = data_dict.get('Strong Sell', 0) + data_dict.get('Sell', 0) hold_total = data_dict.get('Hold', 0) - # Count unique analysts - unique_analyst_names = set() + # Count the unique analysts used in the unique filtered data numOfAnalyst = len(unique_filtered_data) - - # Update stats dictionary with new keys including recommendationList + # Prepare the stats dictionary with all the computed values stats = { 'numOfAnalyst': numOfAnalyst, 'consensusRating': consensus_rating, @@ -315,7 +273,6 @@ def get_top_analyst_summary(res_list): } categorical_ratings = {'Buy': buy_total, 'Sell': sell_total, 'Hold': hold_total} - res = {**stats, **categorical_ratings} return res @@ -465,7 +422,6 @@ def run(chunk, analyst_list, con): print(e) - try: con = sqlite3.connect('stocks.db') stock_cursor = con.cursor() @@ -479,7 +435,7 @@ try: chunk_size = len(stock_symbols) // 300 # Divide the list into N chunks chunks = [stock_symbols[i:i + chunk_size] for i in range(0, len(stock_symbols), chunk_size)] - chunks = [['AAPL']] + #chunks = [['AAPL']] for chunk in chunks: run(chunk, analyst_stats_list, con)