From 6577085c0b0be1d3c6c9fdd7b520b659806cdd93 Mon Sep 17 00:00:00 2001 From: MuslemRahimi Date: Sun, 28 Jul 2024 16:45:32 +0200 Subject: [PATCH] update cron job --- app/cron_reddit_statistics.py | 70 ++++++++++++++++++++++++++--------- 1 file changed, 53 insertions(+), 17 deletions(-) diff --git a/app/cron_reddit_statistics.py b/app/cron_reddit_statistics.py index a67e4c7..0747b11 100644 --- a/app/cron_reddit_statistics.py +++ b/app/cron_reddit_statistics.py @@ -7,6 +7,14 @@ from collections import defaultdict import os from dotenv import load_dotenv import sqlite3 +import nltk +from nltk.sentiment import SentimentIntensityAnalyzer + +# Download required NLTK data +nltk.download('vader_lexicon', quiet=True) + +# Initialize the NLTK sentiment analyzer +sia = SentimentIntensityAnalyzer() con = sqlite3.connect('stocks.db') @@ -24,7 +32,6 @@ etf_symbols = [row[0] for row in etf_cursor.fetchall()] con.close() etf_con.close() - load_dotenv() client_key = os.getenv('REDDIT_API_KEY') client_secret = os.getenv('REDDIT_API_SECRET') @@ -37,12 +44,6 @@ reddit = praw.Reddit( user_agent=user_agent ) -# Get subscriber count and active user count -#subreddit = reddit.subreddit("wallstreetbets") -#subscriber_count = subreddit.subscribers -#active_user_count = subreddit.active_user_count - - # Function to save data def save_data(data, filename): with open(f'json/reddit-tracker/wallstreetbets/{filename}', 'w', encoding='utf-8') as f: @@ -57,12 +58,14 @@ def compute_daily_statistics(file_path): daily_stats = defaultdict(lambda: { 'post_count': 0, 'total_comments': 0, - 'ticker_mentions': defaultdict(int), + 'ticker_mentions': defaultdict(lambda: {'total': 0, 'PUT': 0, 'CALL': 0, 'sentiment': []}), 'unique_tickers': set() }) - # Compile regex pattern for finding tickers + # Compile regex patterns for finding tickers, PUT, and CALL ticker_pattern = re.compile(r'\$([A-Z]+)') + put_pattern = re.compile(r'\b(PUT|PUTS)\b', re.IGNORECASE) + call_pattern = re.compile(r'\b(CALL|CALLS)\b', re.IGNORECASE) # Process each post for post in data: @@ -77,9 +80,23 @@ def compute_daily_statistics(file_path): text_to_search = post['title'] + ' ' + post['selftext'] tickers = ticker_pattern.findall(text_to_search) + # Check for PUT and CALL mentions + put_mentions = len(put_pattern.findall(text_to_search)) + call_mentions = len(call_pattern.findall(text_to_search)) + + # Perform sentiment analysis + sentiment_scores = sia.polarity_scores(text_to_search) + for ticker in tickers: - daily_stats[post_date]['ticker_mentions'][ticker] += 1 + daily_stats[post_date]['ticker_mentions'][ticker]['total'] += 1 daily_stats[post_date]['unique_tickers'].add(ticker) + + # Add PUT and CALL counts + daily_stats[post_date]['ticker_mentions'][ticker]['PUT'] += put_mentions + daily_stats[post_date]['ticker_mentions'][ticker]['CALL'] += call_mentions + + # Add sentiment score + daily_stats[post_date]['ticker_mentions'][ticker]['sentiment'].append(sentiment_scores['compound']) # Calculate averages and format the results formatted_stats = [] @@ -88,26 +105,45 @@ def compute_daily_statistics(file_path): 'date': date.isoformat(), 'totalPosts': stats['post_count'], 'totalComments': stats['total_comments'], - 'totalMentions': sum(stats['ticker_mentions'].values()), + 'totalMentions': sum(mentions['total'] for mentions in stats['ticker_mentions'].values()), 'companySpread': len(stats['unique_tickers']), - 'tickerMentions': dict(stats['ticker_mentions']) # Optional: include detailed ticker mentions + 'tickerMentions': [ + { + 'symbol': ticker, + 'count': mentions['total'], + 'put': mentions['PUT'], + 'call': mentions['CALL'] + } + for ticker, mentions in stats['ticker_mentions'].items() + ] }) return formatted_stats, daily_stats -# Function to compute trending tickers def compute_trending_tickers(daily_stats): today = datetime.now().date() seven_days_ago = today - timedelta(days=14) - trending = defaultdict(int) + trending = defaultdict(lambda: {'total': 0, 'PUT': 0, 'CALL': 0, 'sentiment': []}) for date, stats in daily_stats.items(): if seven_days_ago <= date <= today: - for ticker, count in stats['ticker_mentions'].items(): - trending[ticker] += count + for ticker, counts in stats['ticker_mentions'].items(): + trending[ticker]['total'] += counts['total'] + trending[ticker]['PUT'] += counts['PUT'] + trending[ticker]['CALL'] += counts['CALL'] + trending[ticker]['sentiment'].extend(counts['sentiment']) - trending_list = [{'symbol': symbol, 'count': count} for symbol, count in trending.items()] + trending_list = [ + { + 'symbol': symbol, + 'count': counts['total'], + 'put': counts['PUT'], + 'call': counts['CALL'], + 'avgSentiment': sum(counts['sentiment']) / len(counts['sentiment']) if counts['sentiment'] else 0 + } + for symbol, counts in trending.items() + ] trending_list.sort(key=lambda x: x['count'], reverse=True) for item in trending_list: