186 lines
6.7 KiB
Python
186 lines
6.7 KiB
Python
import json
|
|
import re
|
|
import requests
|
|
import praw
|
|
from datetime import datetime, timedelta
|
|
from collections import defaultdict
|
|
import os
|
|
from dotenv import load_dotenv
|
|
import sqlite3
|
|
import nltk
|
|
from nltk.sentiment import SentimentIntensityAnalyzer
|
|
|
|
# Download required NLTK data
|
|
nltk.download('vader_lexicon', quiet=True)
|
|
|
|
# Initialize the NLTK sentiment analyzer
|
|
sia = SentimentIntensityAnalyzer()
|
|
|
|
con = sqlite3.connect('stocks.db')
|
|
|
|
cursor = con.cursor()
|
|
cursor.execute("PRAGMA journal_mode = wal")
|
|
cursor.execute("SELECT DISTINCT symbol FROM stocks")
|
|
stock_symbols = [row[0] for row in cursor.fetchall()]
|
|
|
|
etf_con = sqlite3.connect('etf.db')
|
|
etf_cursor = etf_con.cursor()
|
|
etf_cursor.execute("PRAGMA journal_mode = wal")
|
|
etf_cursor.execute("SELECT DISTINCT symbol FROM etfs")
|
|
etf_symbols = [row[0] for row in etf_cursor.fetchall()]
|
|
|
|
total_symbols = stock_symbols + etf_symbols
|
|
con.close()
|
|
etf_con.close()
|
|
|
|
load_dotenv()
|
|
client_key = os.getenv('REDDIT_API_KEY')
|
|
client_secret = os.getenv('REDDIT_API_SECRET')
|
|
user_agent = os.getenv('REDDIT_USER_AGENT')
|
|
|
|
# Initialize Reddit instance
|
|
reddit = praw.Reddit(
|
|
client_id=client_key,
|
|
client_secret=client_secret,
|
|
user_agent=user_agent
|
|
)
|
|
|
|
# Function to save data
|
|
def save_data(data, filename):
|
|
with open(f'json/reddit-tracker/wallstreetbets/{filename}', 'w', encoding='utf-8') as f:
|
|
json.dump(data, f, ensure_ascii=False, indent=2)
|
|
|
|
def compute_daily_statistics(file_path):
|
|
# Load the data from the JSON file
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
|
|
# Initialize a defaultdict to store daily statistics
|
|
daily_stats = defaultdict(lambda: {
|
|
'post_count': 0,
|
|
'total_comments': 0,
|
|
'ticker_mentions': defaultdict(lambda: {'total': 0, 'PUT': 0, 'CALL': 0, 'sentiment': []}),
|
|
'unique_tickers': set()
|
|
})
|
|
|
|
# Compile regex patterns for finding tickers, PUT, and CALL
|
|
ticker_pattern = re.compile(r'\$([A-Z]+)')
|
|
put_pattern = re.compile(r'\b(PUT|PUTS)\b', re.IGNORECASE)
|
|
call_pattern = re.compile(r'\b(CALL|CALLS)\b', re.IGNORECASE)
|
|
|
|
# Process each post
|
|
for post in data:
|
|
# Convert UTC timestamp to datetime object
|
|
post_date = datetime.utcfromtimestamp(post['created_utc']).date()
|
|
|
|
# Update statistics for this day
|
|
daily_stats[post_date]['post_count'] += 1
|
|
daily_stats[post_date]['total_comments'] += post['num_comments']
|
|
|
|
# Find ticker mentions in title and selftext
|
|
text_to_search = post['title'] + ' ' + post['selftext']
|
|
tickers = ticker_pattern.findall(text_to_search)
|
|
|
|
# Check for PUT and CALL mentions
|
|
put_mentions = len(put_pattern.findall(text_to_search))
|
|
call_mentions = len(call_pattern.findall(text_to_search))
|
|
|
|
# Perform sentiment analysis
|
|
sentiment_scores = sia.polarity_scores(text_to_search)
|
|
|
|
for ticker in tickers:
|
|
daily_stats[post_date]['ticker_mentions'][ticker]['total'] += 1
|
|
daily_stats[post_date]['unique_tickers'].add(ticker)
|
|
|
|
# Add PUT and CALL counts
|
|
daily_stats[post_date]['ticker_mentions'][ticker]['PUT'] += put_mentions
|
|
daily_stats[post_date]['ticker_mentions'][ticker]['CALL'] += call_mentions
|
|
|
|
# Add sentiment score
|
|
daily_stats[post_date]['ticker_mentions'][ticker]['sentiment'].append(sentiment_scores['compound'])
|
|
|
|
# Calculate averages and format the results
|
|
formatted_stats = []
|
|
for date, stats in sorted(daily_stats.items(), reverse=True):
|
|
formatted_stats.append({
|
|
'date': date.isoformat(),
|
|
'totalPosts': stats['post_count'],
|
|
'totalComments': stats['total_comments'],
|
|
'totalMentions': sum(mentions['total'] for mentions in stats['ticker_mentions'].values()),
|
|
'companySpread': len(stats['unique_tickers']),
|
|
'tickerMentions': [
|
|
{
|
|
'symbol': ticker,
|
|
'count': mentions['total'],
|
|
'put': mentions['PUT'],
|
|
'call': mentions['CALL']
|
|
}
|
|
for ticker, mentions in stats['ticker_mentions'].items()
|
|
]
|
|
})
|
|
|
|
return formatted_stats, daily_stats
|
|
|
|
def compute_trending_tickers(daily_stats):
|
|
today = datetime.now().date()
|
|
seven_days_ago = today - timedelta(days=14)
|
|
|
|
trending = defaultdict(lambda: {'total': 0, 'PUT': 0, 'CALL': 0, 'sentiment': []})
|
|
|
|
for date, stats in daily_stats.items():
|
|
if seven_days_ago <= date <= today:
|
|
for ticker, counts in stats['ticker_mentions'].items():
|
|
trending[ticker]['total'] += counts['total']
|
|
trending[ticker]['PUT'] += counts['PUT']
|
|
trending[ticker]['CALL'] += counts['CALL']
|
|
trending[ticker]['sentiment'].extend(counts['sentiment'])
|
|
|
|
trending_list = [
|
|
{
|
|
'symbol': symbol,
|
|
'count': counts['total'],
|
|
'put': counts['PUT'],
|
|
'call': counts['CALL'],
|
|
'avgSentiment': round(sum(counts['sentiment']) / len(counts['sentiment']),2) if counts['sentiment'] else 0
|
|
}
|
|
for symbol, counts in trending.items() if symbol in total_symbols
|
|
]
|
|
trending_list.sort(key=lambda x: x['count'], reverse=True)
|
|
|
|
for item in trending_list:
|
|
symbol = item['symbol']
|
|
try:
|
|
with open(f'json/quote/{symbol}.json') as f:
|
|
data = json.load(f)
|
|
name = data['name']
|
|
price = round(data['price'],2)
|
|
changes_percentage = round(data['changesPercentage'],2)
|
|
except Exception as e:
|
|
print(e)
|
|
name = None
|
|
price = None
|
|
changes_percentage = None
|
|
|
|
if symbol in stock_symbols:
|
|
item['assetType'] = 'stocks'
|
|
item['name'] = name
|
|
item['price'] = price
|
|
item['changesPercentage'] = changes_percentage
|
|
elif symbol in etf_symbols:
|
|
item['assetType'] = 'etf'
|
|
item['name'] = name
|
|
item['price'] = price
|
|
item['changesPercentage'] = changes_percentage
|
|
else:
|
|
item['assetType'] = ''
|
|
|
|
return trending_list
|
|
|
|
# Usage
|
|
file_path = 'json/reddit-tracker/wallstreetbets/data.json'
|
|
daily_statistics, daily_stats_dict = compute_daily_statistics(file_path)
|
|
save_data(daily_statistics, 'stats.json')
|
|
|
|
# Compute and save trending tickers
|
|
trending_tickers = compute_trending_tickers(daily_stats_dict)
|
|
save_data(trending_tickers, 'trending.json') |