update reddit cron job
This commit is contained in:
parent
343797b70c
commit
ba912469b3
@ -1,26 +1,37 @@
|
|||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import requests
|
import requests
|
||||||
|
import praw
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
def get_subscriber_count():
|
import os
|
||||||
url = "https://www.reddit.com/r/wallstreetbets/new.json"
|
from dotenv import load_dotenv
|
||||||
headers = {'User-agent': 'Mozilla/5.0'}
|
|
||||||
response = requests.get(url, headers=headers)
|
load_dotenv()
|
||||||
if response.status_code == 200:
|
client_key = os.getenv('REDDIT_API_KEY')
|
||||||
data = response.json()
|
client_secret = os.getenv('REDDIT_API_SECRET')
|
||||||
return data['data']['children'][0]['data']['subreddit_subscribers']
|
user_agent = os.getenv('REDDIT_USER_AGENT')
|
||||||
return None
|
|
||||||
|
|
||||||
|
# Initialize Reddit instance
|
||||||
|
reddit = praw.Reddit(
|
||||||
|
client_id=client_key,
|
||||||
|
client_secret=client_secret,
|
||||||
|
user_agent=user_agent
|
||||||
|
)
|
||||||
|
|
||||||
|
# Function to save data
|
||||||
|
def save_data(data):
|
||||||
|
with open('json/reddit-tracker/wallstreetbets/stats.json', 'w', encoding='utf-8') as f:
|
||||||
|
json.dump(data, f, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
|
|
||||||
def compute_daily_statistics(file_path):
|
def compute_daily_statistics(file_path):
|
||||||
# Load the data from the JSON file
|
# Load the data from the JSON file
|
||||||
with open(file_path, 'r', encoding='utf-8') as f:
|
with open(file_path, 'r', encoding='utf-8') as f:
|
||||||
data = json.load(f)
|
data = json.load(f)
|
||||||
|
|
||||||
# Get current subscriber count
|
|
||||||
subscriber_count = get_subscriber_count()
|
|
||||||
|
|
||||||
# Initialize a defaultdict to store daily statistics
|
# Initialize a defaultdict to store daily statistics
|
||||||
daily_stats = defaultdict(lambda: {
|
daily_stats = defaultdict(lambda: {
|
||||||
'post_count': 0,
|
'post_count': 0,
|
||||||
@ -56,7 +67,6 @@ def compute_daily_statistics(file_path):
|
|||||||
'date': date.isoformat(),
|
'date': date.isoformat(),
|
||||||
'totalPosts': stats['post_count'],
|
'totalPosts': stats['post_count'],
|
||||||
'totalComments': stats['total_comments'],
|
'totalComments': stats['total_comments'],
|
||||||
'subscribersCount': subscriber_count,
|
|
||||||
'totalMentions': sum(stats['ticker_mentions'].values()),
|
'totalMentions': sum(stats['ticker_mentions'].values()),
|
||||||
'companySpread': len(stats['unique_tickers']),
|
'companySpread': len(stats['unique_tickers']),
|
||||||
'tickerMentions': dict(stats['ticker_mentions']) # Optional: include detailed ticker mentions
|
'tickerMentions': dict(stats['ticker_mentions']) # Optional: include detailed ticker mentions
|
||||||
@ -67,4 +77,4 @@ def compute_daily_statistics(file_path):
|
|||||||
# Usage
|
# Usage
|
||||||
file_path = 'json/reddit-tracker/wallstreetbets/data.json'
|
file_path = 'json/reddit-tracker/wallstreetbets/data.json'
|
||||||
daily_statistics = compute_daily_statistics(file_path)
|
daily_statistics = compute_daily_statistics(file_path)
|
||||||
print(json.dumps(daily_statistics, indent=2))
|
save_data(daily_statistics)
|
||||||
@ -1,18 +1,18 @@
|
|||||||
import requests
|
import praw
|
||||||
import json
|
import json
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import os
|
import os
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
import time
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
client_key = os.getenv('REDDIT_API_KEY')
|
||||||
|
client_secret = os.getenv('REDDIT_API_SECRET')
|
||||||
|
user_agent = os.getenv('REDDIT_USER_AGENT')
|
||||||
|
|
||||||
# URL of the Reddit API endpoint
|
|
||||||
url = "https://www.reddit.com/r/wallstreetbets/new.json"
|
|
||||||
# File path for the JSON data
|
# File path for the JSON data
|
||||||
file_path = 'json/reddit-tracker/wallstreetbets/data.json'
|
file_path = 'json/reddit-tracker/wallstreetbets/data.json'
|
||||||
|
|
||||||
headers = {
|
|
||||||
'User-Agent': 'python:myapp:v1.0 (by /u/realstocknear)'
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# Ensure the directory exists
|
# Ensure the directory exists
|
||||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||||
|
|
||||||
@ -28,14 +28,12 @@ def save_data(data):
|
|||||||
with open(file_path, 'w', encoding='utf-8') as f:
|
with open(file_path, 'w', encoding='utf-8') as f:
|
||||||
json.dump(data, f, ensure_ascii=False, indent=4)
|
json.dump(data, f, ensure_ascii=False, indent=4)
|
||||||
|
|
||||||
# Function to get updated post data
|
# Initialize Reddit instance
|
||||||
def get_updated_post_data(permalink):
|
reddit = praw.Reddit(
|
||||||
post_url = f"https://www.reddit.com{permalink}.json"
|
client_id=client_key,
|
||||||
response = requests.get(post_url, headers=headers)
|
client_secret=client_secret,
|
||||||
if response.status_code == 200:
|
user_agent=user_agent
|
||||||
post_data = response.json()[0]['data']['children'][0]['data']
|
)
|
||||||
return post_data
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Load existing data
|
# Load existing data
|
||||||
existing_data = load_existing_data()
|
existing_data = load_existing_data()
|
||||||
@ -43,52 +41,41 @@ existing_data = load_existing_data()
|
|||||||
# Create a dictionary of existing posts for faster lookup and update
|
# Create a dictionary of existing posts for faster lookup and update
|
||||||
existing_posts = {post['id']: post for post in existing_data}
|
existing_posts = {post['id']: post for post in existing_data}
|
||||||
|
|
||||||
# Send a GET request to the API
|
|
||||||
response = requests.get(url, headers=headers)
|
|
||||||
|
|
||||||
counter = 0
|
|
||||||
# Check if the request was successful
|
|
||||||
if response.status_code == 200:
|
|
||||||
# Parse the JSON data
|
|
||||||
data = response.json()
|
|
||||||
|
|
||||||
# Flag to check if any data was added or updated
|
# Flag to check if any data was added or updated
|
||||||
data_changed = False
|
data_changed = False
|
||||||
|
|
||||||
# Iterate through each post in the 'children' list
|
# Get the subreddit
|
||||||
for post in data['data']['children']:
|
subreddit = reddit.subreddit("wallstreetbets")
|
||||||
post_data = post['data']
|
|
||||||
post_id = post_data.get('id', '')
|
|
||||||
|
|
||||||
|
# Iterate through new submissions
|
||||||
|
for submission in subreddit.new(limit=1000):
|
||||||
|
post_id = submission.id
|
||||||
# Check if this post is already in our data
|
# Check if this post is already in our data
|
||||||
if post_id in existing_posts:
|
if post_id in existing_posts:
|
||||||
# Update existing post
|
# Update existing post
|
||||||
if counter < 25: #Only update the latest 25 posts to not overload the reddit server
|
existing_posts[post_id]['upvote_ratio'] = submission.upvote_ratio
|
||||||
updated_data = get_updated_post_data(post_data['permalink'])
|
existing_posts[post_id]['num_comments'] = submission.num_comments
|
||||||
if updated_data:
|
|
||||||
existing_posts[post_id]['upvote_ratio'] = updated_data.get('upvote_ratio', existing_posts[post_id]['upvote_ratio'])
|
|
||||||
existing_posts[post_id]['num_comments'] = updated_data.get('num_comments', existing_posts[post_id]['num_comments'])
|
|
||||||
data_changed = True
|
data_changed = True
|
||||||
counter +=1
|
|
||||||
print(counter)
|
|
||||||
else:
|
else:
|
||||||
# Extract the required fields for new post
|
# Extract the required fields for new post
|
||||||
extracted_post = {
|
extracted_post = {
|
||||||
"id": post_id,
|
"id": post_id,
|
||||||
"permalink": post_data.get('permalink', ''),
|
"permalink": submission.permalink,
|
||||||
"title": post_data.get('title', ''),
|
"title": submission.title,
|
||||||
"selftext": post_data.get('selftext', ''),
|
"selftext": submission.selftext,
|
||||||
"created_utc": post_data.get('created_utc', ''),
|
"created_utc": int(submission.created_utc),
|
||||||
"upvote_ratio": post_data.get('upvote_ratio', ''),
|
"upvote_ratio": submission.upvote_ratio,
|
||||||
"num_comments": post_data.get('num_comments', ''),
|
"num_comments": submission.num_comments,
|
||||||
"link_flair_text": post_data.get('link_flair_text', ''),
|
"link_flair_text": submission.link_flair_text,
|
||||||
"author": post_data.get('author', ''),
|
"author": str(submission.author),
|
||||||
}
|
}
|
||||||
|
|
||||||
# Add the new post to the existing data
|
# Add the new post to the existing data
|
||||||
existing_posts[post_id] = extracted_post
|
existing_posts[post_id] = extracted_post
|
||||||
data_changed = True
|
data_changed = True
|
||||||
|
|
||||||
|
time.sleep(1) # Add a 1-second delay between processing submissions
|
||||||
|
|
||||||
if data_changed:
|
if data_changed:
|
||||||
# Convert the dictionary back to a list and sort by created_utc
|
# Convert the dictionary back to a list and sort by created_utc
|
||||||
updated_data = list(existing_posts.values())
|
updated_data = list(existing_posts.values())
|
||||||
@ -99,5 +86,3 @@ if response.status_code == 200:
|
|||||||
print(f"Data updated and saved to {file_path}")
|
print(f"Data updated and saved to {file_path}")
|
||||||
else:
|
else:
|
||||||
print("No new data to add or update.")
|
print("No new data to add or update.")
|
||||||
else:
|
|
||||||
print(f"Failed to retrieve data. Status code: {response.status_code}")
|
|
||||||
@ -37,3 +37,4 @@ finnhub-python
|
|||||||
intrinio_sdk
|
intrinio_sdk
|
||||||
openai
|
openai
|
||||||
slowapi
|
slowapi
|
||||||
|
praw
|
||||||
Loading…
x
Reference in New Issue
Block a user