70 lines
2.6 KiB
Python
70 lines
2.6 KiB
Python
import json
|
|
import re
|
|
import requests
|
|
from datetime import datetime
|
|
from collections import defaultdict
|
|
|
|
def get_subscriber_count():
|
|
url = "https://www.reddit.com/r/wallstreetbets/new.json"
|
|
headers = {'User-agent': 'Mozilla/5.0'}
|
|
response = requests.get(url, headers=headers)
|
|
if response.status_code == 200:
|
|
data = response.json()
|
|
return data['data']['children'][0]['data']['subreddit_subscribers']
|
|
return None
|
|
|
|
def compute_daily_statistics(file_path):
|
|
# Load the data from the JSON file
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
|
|
# Get current subscriber count
|
|
subscriber_count = get_subscriber_count()
|
|
|
|
# Initialize a defaultdict to store daily statistics
|
|
daily_stats = defaultdict(lambda: {
|
|
'post_count': 0,
|
|
'total_comments': 0,
|
|
'ticker_mentions': defaultdict(int),
|
|
'unique_tickers': set()
|
|
})
|
|
|
|
# Compile regex pattern for finding tickers
|
|
ticker_pattern = re.compile(r'\$([A-Z]+)')
|
|
|
|
# Process each post
|
|
for post in data:
|
|
# Convert UTC timestamp to datetime object
|
|
post_date = datetime.utcfromtimestamp(post['created_utc']).date()
|
|
|
|
# Update statistics for this day
|
|
daily_stats[post_date]['post_count'] += 1
|
|
daily_stats[post_date]['total_comments'] += post['num_comments']
|
|
|
|
# Find ticker mentions in title and selftext
|
|
text_to_search = post['title'] + ' ' + post['selftext']
|
|
tickers = ticker_pattern.findall(text_to_search)
|
|
|
|
for ticker in tickers:
|
|
daily_stats[post_date]['ticker_mentions'][ticker] += 1
|
|
daily_stats[post_date]['unique_tickers'].add(ticker)
|
|
|
|
# Calculate averages and format the results
|
|
formatted_stats = []
|
|
for date, stats in sorted(daily_stats.items(), reverse=True):
|
|
formatted_stats.append({
|
|
'date': date.isoformat(),
|
|
'totalPosts': stats['post_count'],
|
|
'totalComments': stats['total_comments'],
|
|
'subscribersCount': subscriber_count,
|
|
'totalMentions': sum(stats['ticker_mentions'].values()),
|
|
'companySpread': len(stats['unique_tickers']),
|
|
'tickerMentions': dict(stats['ticker_mentions']) # Optional: include detailed ticker mentions
|
|
})
|
|
|
|
return formatted_stats
|
|
|
|
# Usage
|
|
file_path = 'json/reddit-tracker/wallstreetbets/data.json'
|
|
daily_statistics = compute_daily_statistics(file_path)
|
|
print(json.dumps(daily_statistics, indent=2)) |