-
-
Notifications
You must be signed in to change notification settings - Fork 54
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
7100791
commit 11a0619
Showing
2 changed files
with
168 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
import json | ||
import re | ||
import requests | ||
from datetime import datetime | ||
from collections import defaultdict | ||
|
||
def get_subscriber_count(): | ||
url = "https://www.reddit.com/r/wallstreetbets/new.json" | ||
headers = {'User-agent': 'Mozilla/5.0'} | ||
response = requests.get(url, headers=headers) | ||
if response.status_code == 200: | ||
data = response.json() | ||
return data['data']['children'][0]['data']['subreddit_subscribers'] | ||
return None | ||
|
||
def compute_daily_statistics(file_path): | ||
# Load the data from the JSON file | ||
with open(file_path, 'r', encoding='utf-8') as f: | ||
data = json.load(f) | ||
|
||
# Get current subscriber count | ||
subscriber_count = get_subscriber_count() | ||
|
||
# Initialize a defaultdict to store daily statistics | ||
daily_stats = defaultdict(lambda: { | ||
'post_count': 0, | ||
'total_comments': 0, | ||
'ticker_mentions': defaultdict(int), | ||
'unique_tickers': set() | ||
}) | ||
|
||
# Compile regex pattern for finding tickers | ||
ticker_pattern = re.compile(r'\$([A-Z]+)') | ||
|
||
# Process each post | ||
for post in data: | ||
# Convert UTC timestamp to datetime object | ||
post_date = datetime.utcfromtimestamp(post['created_utc']).date() | ||
|
||
# Update statistics for this day | ||
daily_stats[post_date]['post_count'] += 1 | ||
daily_stats[post_date]['total_comments'] += post['num_comments'] | ||
|
||
# Find ticker mentions in title and selftext | ||
text_to_search = post['title'] + ' ' + post['selftext'] | ||
tickers = ticker_pattern.findall(text_to_search) | ||
|
||
for ticker in tickers: | ||
daily_stats[post_date]['ticker_mentions'][ticker] += 1 | ||
daily_stats[post_date]['unique_tickers'].add(ticker) | ||
|
||
# Calculate averages and format the results | ||
formatted_stats = [] | ||
for date, stats in sorted(daily_stats.items(), reverse=True): | ||
formatted_stats.append({ | ||
'date': date.isoformat(), | ||
'totalPosts': stats['post_count'], | ||
'totalComments': stats['total_comments'], | ||
'subscribersCount': subscriber_count, | ||
'totalMentions': sum(stats['ticker_mentions'].values()), | ||
'companySpread': len(stats['unique_tickers']), | ||
'tickerMentions': dict(stats['ticker_mentions']) # Optional: include detailed ticker mentions | ||
}) | ||
|
||
return formatted_stats | ||
|
||
# Usage | ||
file_path = 'json/reddit-tracker/wallstreetbets/data.json' | ||
daily_statistics = compute_daily_statistics(file_path) | ||
print(json.dumps(daily_statistics, indent=2)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
import requests | ||
import json | ||
from datetime import datetime | ||
import os | ||
|
||
# URL of the Reddit API endpoint | ||
url = "https://www.reddit.com/r/wallstreetbets/new.json" | ||
# File path for the JSON data | ||
file_path = 'json/reddit-tracker/wallstreetbets/data.json' | ||
|
||
# Ensure the directory exists | ||
os.makedirs(os.path.dirname(file_path), exist_ok=True) | ||
|
||
# Function to load existing data | ||
def load_existing_data(): | ||
if os.path.exists(file_path): | ||
with open(file_path, 'r', encoding='utf-8') as f: | ||
return json.load(f) | ||
return [] | ||
|
||
# Function to save data | ||
def save_data(data): | ||
with open(file_path, 'w', encoding='utf-8') as f: | ||
json.dump(data, f, ensure_ascii=False, indent=4) | ||
|
||
# Function to get updated post data | ||
def get_updated_post_data(permalink): | ||
post_url = f"https://www.reddit.com{permalink}.json" | ||
response = requests.get(post_url, headers={'User-agent': 'Mozilla/5.0'}) | ||
if response.status_code == 200: | ||
post_data = response.json()[0]['data']['children'][0]['data'] | ||
return post_data | ||
return None | ||
|
||
# Load existing data | ||
existing_data = load_existing_data() | ||
|
||
# Create a dictionary of existing posts for faster lookup and update | ||
existing_posts = {post['id']: post for post in existing_data} | ||
|
||
# Send a GET request to the API | ||
response = requests.get(url, headers={'User-agent': 'Mozilla/5.0'}) | ||
|
||
counter = 0 | ||
# Check if the request was successful | ||
if response.status_code == 200: | ||
# Parse the JSON data | ||
data = response.json() | ||
|
||
# Flag to check if any data was added or updated | ||
data_changed = False | ||
|
||
# Iterate through each post in the 'children' list | ||
for post in data['data']['children']: | ||
post_data = post['data'] | ||
post_id = post_data.get('id', '') | ||
|
||
# Check if this post is already in our data | ||
if post_id in existing_posts: | ||
# Update existing post | ||
if counter < 25: #Only update the latest 25 posts to not overload the reddit server | ||
updated_data = get_updated_post_data(post_data['permalink']) | ||
if updated_data: | ||
existing_posts[post_id]['upvote_ratio'] = updated_data.get('upvote_ratio', existing_posts[post_id]['upvote_ratio']) | ||
existing_posts[post_id]['num_comments'] = updated_data.get('num_comments', existing_posts[post_id]['num_comments']) | ||
data_changed = True | ||
counter +=1 | ||
print(counter) | ||
else: | ||
# Extract the required fields for new post | ||
extracted_post = { | ||
"id": post_id, | ||
"permalink": post_data.get('permalink', ''), | ||
"title": post_data.get('title', ''), | ||
"selftext": post_data.get('selftext', ''), | ||
"created_utc": post_data.get('created_utc', ''), | ||
"upvote_ratio": post_data.get('upvote_ratio', ''), | ||
"num_comments": post_data.get('num_comments', ''), | ||
"link_flair_text": post_data.get('link_flair_text', ''), | ||
"author": post_data.get('author', ''), | ||
} | ||
|
||
# Add the new post to the existing data | ||
existing_posts[post_id] = extracted_post | ||
data_changed = True | ||
|
||
if data_changed: | ||
# Convert the dictionary back to a list and sort by created_utc | ||
updated_data = list(existing_posts.values()) | ||
updated_data.sort(key=lambda x: x['created_utc'], reverse=True) | ||
|
||
# Save the updated data | ||
save_data(updated_data) | ||
print(f"Data updated and saved to {file_path}") | ||
else: | ||
print("No new data to add or update.") | ||
else: | ||
print(f"Failed to retrieve data. Status code: {response.status_code}") |