first test of github action

aapatni · Feb 7, 2024 · 187a010 · 187a010
1 parent c387fd5
commit 187a010
Show file tree

Hide file tree

Showing 5 changed files with 60 additions and 10 deletions.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -0,0 +1,40 @@
+name: Hourly Job
+
+on:
+  schedule:
+    # Runs at the start of every hour
+    - cron: '0 * * * *'
+
+jobs:
+  run-script:
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Check out repository code
+      uses: actions/checkout@v2
+
+    - name: setup python
+          uses: actions/setup-python@v4
+          with:
+            python-version: '3.11'
+
+    - name: install python packages
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r src/data_collection/requirements.txt
+
+    - name: Run the reddit crawler
+      env:
+        REDDIT_APP_ID: ${{ secrets.REDDIT_APP_ID }}
+        REDDIT_APP_KEY: ${{ secrets.REDDIT_APP_KEY }}
+        SUPABASE_WATCHDB_URL: ${{ secrets.SUPABASE_WATCHDB_URL }}
+        SUPABASE_WATCHDB_SERVICE_ROLE_KEY: ${{ secrets.SUPABASE_WATCHDB_SERVICE_ROLE_KEY }}
+      run: python src/data_collection/reddit_crawler.py
+
+    - name: Run the watchdb populator
+      env:
+          REDDIT_APP_ID: ${{ secrets.REDDIT_APP_ID }}
+          REDDIT_APP_KEY: ${{ secrets.REDDIT_APP_KEY }}
+          SUPABASE_WATCHDB_URL: ${{ secrets.SUPABASE_WATCHDB_URL }}
+          SUPABASE_WATCHDB_SERVICE_ROLE_KEY: ${{ secrets.SUPABASE_WATCHDB_SERVICE_ROLE_KEY }}
+      run: python src/data_collection/watchdb_populator.py
diff --git a/src/data_collection/reddit_crawler.py b/src/data_collection/reddit_crawler.py
@@ -7,17 +7,19 @@
 from supabase import create_client, Client
 
 def main(time_filter, post_limit, comments_limit):
-    # Supabase setup
+    # Supabase credentials
     url: str = os.environ.get('SUPABASE_WATCHDB_URL')
     key: str = os.environ.get('SUPABASE_WATCHDB_SERVICE_ROLE_KEY')
-    supabase: Client = create_client(url, key)
 
     # Reddit API Credentials
     client_id = os.environ.get('REDDIT_APP_ID')
     client_secret = os.environ.get('REDDIT_APP_KEY')
-    user_agent = 'User-Agent:chrono-codex-server:v1 (by /u/ChronoCrawler)'
 
-    # Initialize PRAW with your credentials
+    # Supabase setup
+    supabase: Client = create_client(url, key)
+
+    # Initialize PRAW with credentials
+    user_agent = 'User-Agent:chrono-codex-server:v1 (by /u/ChronoCrawler)'
     reddit = praw.Reddit(client_id=client_id,
                         client_secret=client_secret,
                         user_agent=user_agent)
@@ -51,7 +53,7 @@ def main(time_filter, post_limit, comments_limit):
                 raise
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Reddit Crawler for Subreddit Posts")
+    parser = argparse.ArgumentParser(description="Reddit WatchExchange Crawler")
     parser.add_argument("--time_filter", help="Time filter for posts", default="hour")
     parser.add_argument("--post_limit", help="Limit of posts to fetch", type=int, default=10)
     parser.add_argument("--comments_limit", help="Limit of comments to fetch for each post", type=int, default=25)

diff --git a/src/data_collection/requirements.txt b/src/data_collection/requirements.txt
@@ -0,0 +1,4 @@
+praw
+supabase
+openai
+schema-validator
diff --git a/src/data_collection/schema_validator.py b/src/data_collection/schema_validator.py
@@ -36,7 +36,7 @@ def validate_schema(data: dict):
     # filtering out incorrect data.
 
     # Load the schema
-    with open('watch_schema.json', 'r') as file:
+    with open('src/data_collection/watch_schema.json', 'r') as file:
         schema = json.load(file)
 
     json_data = filter_invalid_entries(data, schema)

diff --git a/src/data_collection/gpt_formatter.py → src/data_collection/watchdb_populator.py b/src/data_collection/gpt_formatter.py → src/data_collection/watchdb_populator.py
@@ -6,6 +6,10 @@
 from schema_validator import validate_schema
 
 def process_queue(supabase_url, supabase_key, openai_key):
+    url: str = os.environ.get('SUPABASE_WATCHDB_URL')
+    key: str = os.environ.get('SUPABASE_WATCHDB_SERVICE_ROLE_KEY')
+    openai_key = os.environ.get('OPENAI_API_CHRONO_KEY')
+
     # Supabase setup
     supabase: Client = create_client(supabase_url, supabase_key)
 
@@ -14,7 +18,7 @@ def process_queue(supabase_url, supabase_key, openai_key):
         api_key=openai_key
     )
 
-    with open('query_schema.json') as f:
+    with open('src/data_collection/query_schema.json') as f:
         output_schema_str = f.read()
 
     # Fetch data from Supabase queue
@@ -56,9 +60,9 @@ def process_queue(supabase_url, supabase_key, openai_key):
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Process queue items and format them using OpenAI")
-    parser.add_argument("--supabase_url", required=True, help="Supabase project URL")
-    parser.add_argument("--supabase_key", required=True, help="Supabase service role key")
-    parser.add_argument("--openai_key", required=True, help="OpenAI API key")
+    # parser.add_argument("--supabase_url", required=True, help="Supabase project URL")
+    # parser.add_argument("--supabase_key", required=True, help="Supabase service role key")
+    # parser.add_argument("--openai_key", required=True, help="OpenAI API key")
 
     args = parser.parse_args()