Skip to content

Commit

Permalink
first test of github action
Browse files Browse the repository at this point in the history
  • Loading branch information
aapatni committed Feb 7, 2024
1 parent c387fd5 commit 187a010
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 10 deletions.
40 changes: 40 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
name: Hourly Job

on:
schedule:
# Runs at the start of every hour
- cron: '0 * * * *'

jobs:
run-script:
runs-on: ubuntu-latest

steps:
- name: Check out repository code
uses: actions/checkout@v2

- name: setup python
uses: actions/setup-python@v4
with:
python-version: '3.11'

- name: install python packages
run: |
python -m pip install --upgrade pip
pip install -r src/data_collection/requirements.txt
- name: Run the reddit crawler
env:
REDDIT_APP_ID: ${{ secrets.REDDIT_APP_ID }}
REDDIT_APP_KEY: ${{ secrets.REDDIT_APP_KEY }}
SUPABASE_WATCHDB_URL: ${{ secrets.SUPABASE_WATCHDB_URL }}
SUPABASE_WATCHDB_SERVICE_ROLE_KEY: ${{ secrets.SUPABASE_WATCHDB_SERVICE_ROLE_KEY }}
run: python src/data_collection/reddit_crawler.py

- name: Run the watchdb populator
env:
REDDIT_APP_ID: ${{ secrets.REDDIT_APP_ID }}
REDDIT_APP_KEY: ${{ secrets.REDDIT_APP_KEY }}
SUPABASE_WATCHDB_URL: ${{ secrets.SUPABASE_WATCHDB_URL }}
SUPABASE_WATCHDB_SERVICE_ROLE_KEY: ${{ secrets.SUPABASE_WATCHDB_SERVICE_ROLE_KEY }}
run: python src/data_collection/watchdb_populator.py
12 changes: 7 additions & 5 deletions src/data_collection/reddit_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,19 @@
from supabase import create_client, Client

def main(time_filter, post_limit, comments_limit):
# Supabase setup
# Supabase credentials
url: str = os.environ.get('SUPABASE_WATCHDB_URL')
key: str = os.environ.get('SUPABASE_WATCHDB_SERVICE_ROLE_KEY')
supabase: Client = create_client(url, key)

# Reddit API Credentials
client_id = os.environ.get('REDDIT_APP_ID')
client_secret = os.environ.get('REDDIT_APP_KEY')
user_agent = 'User-Agent:chrono-codex-server:v1 (by /u/ChronoCrawler)'

# Initialize PRAW with your credentials
# Supabase setup
supabase: Client = create_client(url, key)

# Initialize PRAW with credentials
user_agent = 'User-Agent:chrono-codex-server:v1 (by /u/ChronoCrawler)'
reddit = praw.Reddit(client_id=client_id,
client_secret=client_secret,
user_agent=user_agent)
Expand Down Expand Up @@ -51,7 +53,7 @@ def main(time_filter, post_limit, comments_limit):
raise

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Reddit Crawler for Subreddit Posts")
parser = argparse.ArgumentParser(description="Reddit WatchExchange Crawler")
parser.add_argument("--time_filter", help="Time filter for posts", default="hour")
parser.add_argument("--post_limit", help="Limit of posts to fetch", type=int, default=10)
parser.add_argument("--comments_limit", help="Limit of comments to fetch for each post", type=int, default=25)
Expand Down
4 changes: 4 additions & 0 deletions src/data_collection/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
praw
supabase
openai
schema-validator
2 changes: 1 addition & 1 deletion src/data_collection/schema_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def validate_schema(data: dict):
# filtering out incorrect data.

# Load the schema
with open('watch_schema.json', 'r') as file:
with open('src/data_collection/watch_schema.json', 'r') as file:
schema = json.load(file)

json_data = filter_invalid_entries(data, schema)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
from schema_validator import validate_schema

def process_queue(supabase_url, supabase_key, openai_key):
url: str = os.environ.get('SUPABASE_WATCHDB_URL')
key: str = os.environ.get('SUPABASE_WATCHDB_SERVICE_ROLE_KEY')
openai_key = os.environ.get('OPENAI_API_CHRONO_KEY')

# Supabase setup
supabase: Client = create_client(supabase_url, supabase_key)

Expand All @@ -14,7 +18,7 @@ def process_queue(supabase_url, supabase_key, openai_key):
api_key=openai_key
)

with open('query_schema.json') as f:
with open('src/data_collection/query_schema.json') as f:
output_schema_str = f.read()

# Fetch data from Supabase queue
Expand Down Expand Up @@ -56,9 +60,9 @@ def process_queue(supabase_url, supabase_key, openai_key):

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Process queue items and format them using OpenAI")
parser.add_argument("--supabase_url", required=True, help="Supabase project URL")
parser.add_argument("--supabase_key", required=True, help="Supabase service role key")
parser.add_argument("--openai_key", required=True, help="OpenAI API key")
# parser.add_argument("--supabase_url", required=True, help="Supabase project URL")
# parser.add_argument("--supabase_key", required=True, help="Supabase service role key")
# parser.add_argument("--openai_key", required=True, help="OpenAI API key")

args = parser.parse_args()

Expand Down

0 comments on commit 187a010

Please sign in to comment.