diff --git a/app/public/manifest.json b/app/public/manifest.json
deleted file mode 100644
index 080d6c7..0000000
--- a/app/public/manifest.json
+++ /dev/null
@@ -1,25 +0,0 @@
-{
-  "short_name": "React App",
-  "name": "Create React App Sample",
-  "icons": [
-    {
-      "src": "favicon.ico",
-      "sizes": "64x64 32x32 24x24 16x16",
-      "type": "image/x-icon"
-    },
-    {
-      "src": "logo192.png",
-      "type": "image/png",
-      "sizes": "192x192"
-    },
-    {
-      "src": "logo512.png",
-      "type": "image/png",
-      "sizes": "512x512"
-    }
-  ],
-  "start_url": ".",
-  "display": "standalone",
-  "theme_color": "#000000",
-  "background_color": "#ffffff"
-}
diff --git a/reddit_crawler.ipynb b/reddit_crawler.ipynb
deleted file mode 100644
index 63fee4b..0000000
--- a/reddit_crawler.ipynb
+++ /dev/null
@@ -1,209 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Lad wrote a Python script to download Alexa voice recordings, he didn't expect this email.\n",
-      "This post has:\n",
-      "I redesign the Python logo to make it more modern\n",
-      "Automate the boring stuff with python - tinder\n",
-      "Just finished programming and building my own smart mirror in python, wrote all of the code myself and implemented my own voice control and facial recognition features\n"
-     ]
-    }
-   ],
-   "source": [
-    "import praw\n",
-    "import os\n",
-    "import json\n",
-    "import time\n",
-    "\n",
-    "from supabase import create_client, Client\n",
-    "\n",
-    "# Supabase setup\n",
-    "url: str = os.environ.get('SUPABASE_WATCHDB_URL')\n",
-    "key: str = os.environ.get('SUPABASE_WATCHDB_SERVICE_ROLE_KEY')\n",
-    "supabase: Client = create_client(url, key)\n",
-    "\n",
-    "# Reddit API Credentials\n",
-    "client_id = os.environ.get('REDDIT_APP_ID')\n",
-    "client_secret = os.environ.get('REDDIT_APP_KEY')\n",
-    "user_agent = 'User-Agent:chrono-codex-server:v1 (by /u/ChronoCrawler)'\n",
-    "\n",
-    "# Initialize PRAW with your credentials\n",
-    "reddit = praw.Reddit(client_id=client_id,\n",
-    "                     client_secret=client_secret,\n",
-    "                     user_agent=user_agent)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2024-02-06 17:53:19,214:INFO - HTTP Request: POST https://gvvfniijngcyqnwvrbal.supabase.co/rest/v1/rqueue \"HTTP/1.1 409 Conflict\"\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Skipping insertion for post_id=1akq7lk as it already exists.\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2024-02-06 17:53:19,456:INFO - HTTP Request: POST https://gvvfniijngcyqnwvrbal.supabase.co/rest/v1/rqueue \"HTTP/1.1 201 Created\"\n",
-      "2024-02-06 17:53:19,743:INFO - HTTP Request: POST https://gvvfniijngcyqnwvrbal.supabase.co/rest/v1/rqueue \"HTTP/1.1 201 Created\"\n",
-      "2024-02-06 17:53:20,017:INFO - HTTP Request: POST https://gvvfniijngcyqnwvrbal.supabase.co/rest/v1/rqueue \"HTTP/1.1 201 Created\"\n",
-      "2024-02-06 17:53:20,316:INFO - HTTP Request: POST https://gvvfniijngcyqnwvrbal.supabase.co/rest/v1/rqueue \"HTTP/1.1 201 Created\"\n",
-      "2024-02-06 17:53:20,715:INFO - HTTP Request: POST https://gvvfniijngcyqnwvrbal.supabase.co/rest/v1/rqueue \"HTTP/1.1 201 Created\"\n",
-      "2024-02-06 17:53:20,948:INFO - HTTP Request: POST https://gvvfniijngcyqnwvrbal.supabase.co/rest/v1/rqueue \"HTTP/1.1 201 Created\"\n",
-      "2024-02-06 17:53:21,310:INFO - HTTP Request: POST https://gvvfniijngcyqnwvrbal.supabase.co/rest/v1/rqueue \"HTTP/1.1 201 Created\"\n",
-      "2024-02-06 17:53:21,584:INFO - HTTP Request: POST https://gvvfniijngcyqnwvrbal.supabase.co/rest/v1/rqueue \"HTTP/1.1 201 Created\"\n",
-      "2024-02-06 17:53:21,863:INFO - HTTP Request: POST https://gvvfniijngcyqnwvrbal.supabase.co/rest/v1/rqueue \"HTTP/1.1 201 Created\"\n"
-     ]
-    }
-   ],
-   "source": [
-    "# The subreddit you want to scrape\n",
-    "subreddit = reddit.subreddit('watchexchange')\n",
-    "\n",
-    "# Initialize a list to store data before saving to disk\n",
-    "data_list = []\n",
-    "\n",
-    "# Fetch the top posts from the subreddit\n",
-    "top_posts = subreddit.top(time_filter=\"hour\", limit=10)  # Adjust limit as needed\n",
-    "\n",
-    "\n",
-    "# Push the data collected to Supabase\n",
-    "for post in top_posts:\n",
-    "    post.comments.replace_more(limit=25)  # Load all comments\n",
-    "    # comments = [{'userid': comment.author.name, 'comment': comment.body} for comment in post.comments.list()]\n",
-    "    comments = ' | '.join([f\"{comment.author.name}: {comment.body}\" for comment in post.comments.list()])\n",
-    "\n",
-    "    post_data = {\n",
-    "        'post_id': post.id,\n",
-    "        'author_id': post.author.name,\n",
-    "        'title': post.title,\n",
-    "        'url': post.url,\n",
-    "        'comments': comments\n",
-    "    }\n",
-    "    \n",
-    "    try:\n",
-    "        # Attempt to insert post_data into your Supabase table\n",
-    "        data_insert_response = supabase.table('rqueue').insert(post_data).execute()\n",
-    "    except Exception as e:\n",
-    "        if 'duplicate key value violates unique constraint \"rqueue_pkey\"' in str(e):\n",
-    "            print(f\"Skipping insertion for post_id={post_data['post_id']} as it already exists.\")\n",
-    "        else:\n",
-    "            raise\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2024-02-06 18:18:58,543:INFO - HTTP Request: GET https://gvvfniijngcyqnwvrbal.supabase.co/rest/v1/rqueue?select=%2A&processed=eq.False&limit=1 \"HTTP/1.1 200 OK\"\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "An error occurred: \n",
-      "\n",
-      "You tried to access openai.Completion, but this is no longer supported in openai>=1.0.0 - see the README at https://github.com/openai/openai-python for the API.\n",
-      "\n",
-      "You can run `openai migrate` to automatically upgrade your codebase to use the 1.0.0 interface. \n",
-      "\n",
-      "Alternatively, you can pin your installation to the old version, e.g. `pip install openai==0.28`\n",
-      "\n",
-      "A detailed migration guide is available here: https://github.com/openai/openai-python/discussions/742\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "import openai\n",
-    "import json\n",
-    "import jsonschema\n",
-    "\n",
-    "# Load your OpenAI API key from an environment variable or directly\n",
-    "openai_api_key = os.environ.get('OPENAI_API_CHRONO_KEY')\n",
-    "\n",
-    "openai.api_key = openai_api_key\n",
-    "\n",
-    "with open('query_schema.json') as f:\n",
-    "    output_schema_str = f.read()\n",
-    "\n",
-    "# Fetch data from Supabase queue\n",
-    "try:\n",
-    "    queue_data = supabase.table('rqueue').select('*').eq('processed', False).limit(1).execute()\n",
-    "    if len(queue_data.data) > 1:\n",
-    "        raise Exception(\"lll\")\n",
-    "    if queue_data.data:\n",
-    "        for item in queue_data.data:\n",
-    "            # Convert the item to JSON string\n",
-    "            item_json = json.dumps(item)\n",
-    "\n",
-    "            prompt = f\"Given the data: {item_json}, construct a JSON object that adheres to the specified output schema. Here is the output schema: {output_schema_str}\"\n",
-    "            try:\n",
-    "                response = openai.Completion.create(\n",
-    "                    model=\"gpt-3.5-turbo-0125\",\n",
-    "                    prompt=prompt,\n",
-    "                    temperature=0.5,  # Adjust as needed\n",
-    "                    max_tokens=1024,  # Adjust based on your expected output size\n",
-    "                    top_p=1.0,\n",
-    "                    frequency_penalty=0.0,\n",
-    "                    presence_penalty=0.0\n",
-    "                )\n",
-    "                print(response.choices[0].text)\n",
-    "            except Exception as e:\n",
-    "                print(f\"An error occurred: {e}\")\n",
-    "\n",
-    "    else:\n",
-    "        print(\"No data found in the queue.\")\n",
-    "except Exception as e:\n",
-    "    print(f\"Failed to fetch data from Supabase: {e}\")\n"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "base",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/app/.gitignore b/src/app/.gitignore
similarity index 100%
rename from app/.gitignore
rename to src/app/.gitignore
diff --git a/app/README.md b/src/app/README.md
similarity index 100%
rename from app/README.md
rename to src/app/README.md
diff --git a/app/package-lock.json b/src/app/package-lock.json
similarity index 100%
rename from app/package-lock.json
rename to src/app/package-lock.json
diff --git a/app/package.json b/src/app/package.json
similarity index 100%
rename from app/package.json
rename to src/app/package.json
diff --git a/app/public/index.html b/src/app/public/index.html
similarity index 100%
rename from app/public/index.html
rename to src/app/public/index.html
diff --git a/app/public/robots.txt b/src/app/public/robots.txt
similarity index 100%
rename from app/public/robots.txt
rename to src/app/public/robots.txt
diff --git a/app/src/App.css b/src/app/src/App.css
similarity index 100%
rename from app/src/App.css
rename to src/app/src/App.css
diff --git a/app/src/App.js b/src/app/src/App.js
similarity index 100%
rename from app/src/App.js
rename to src/app/src/App.js
diff --git a/app/src/App.test.js b/src/app/src/App.test.js
similarity index 100%
rename from app/src/App.test.js
rename to src/app/src/App.test.js
diff --git a/app/src/FilterInput.js b/src/app/src/FilterInput.js
similarity index 100%
rename from app/src/FilterInput.js
rename to src/app/src/FilterInput.js
diff --git a/app/src/NavBar.js b/src/app/src/NavBar.js
similarity index 100%
rename from app/src/NavBar.js
rename to src/app/src/NavBar.js
diff --git a/app/src/SharedDataContext.js b/src/app/src/SharedDataContext.js
similarity index 100%
rename from app/src/SharedDataContext.js
rename to src/app/src/SharedDataContext.js
diff --git a/app/src/WatchCard.js b/src/app/src/WatchCard.js
similarity index 100%
rename from app/src/WatchCard.js
rename to src/app/src/WatchCard.js
diff --git a/app/src/WatchDetailsModal.js b/src/app/src/WatchDetailsModal.js
similarity index 100%
rename from app/src/WatchDetailsModal.js
rename to src/app/src/WatchDetailsModal.js
diff --git a/app/src/WatchGrid.js b/src/app/src/WatchGrid.js
similarity index 100%
rename from app/src/WatchGrid.js
rename to src/app/src/WatchGrid.js
diff --git a/app/src/index.css b/src/app/src/index.css
similarity index 100%
rename from app/src/index.css
rename to src/app/src/index.css
diff --git a/app/src/index.js b/src/app/src/index.js
similarity index 100%
rename from app/src/index.js
rename to src/app/src/index.js
diff --git a/app/src/reportWebVitals.js b/src/app/src/reportWebVitals.js
similarity index 100%
rename from app/src/reportWebVitals.js
rename to src/app/src/reportWebVitals.js
diff --git a/app/src/setupTests.js b/src/app/src/setupTests.js
similarity index 100%
rename from app/src/setupTests.js
rename to src/app/src/setupTests.js
diff --git a/app/src/supabaseClient.js b/src/app/src/supabaseClient.js
similarity index 100%
rename from app/src/supabaseClient.js
rename to src/app/src/supabaseClient.js
diff --git a/app/src/watches.json b/src/app/src/watches.json
similarity index 100%
rename from app/src/watches.json
rename to src/app/src/watches.json
diff --git a/src/data_collection/gpt_formatter.py b/src/data_collection/gpt_formatter.py
new file mode 100644
index 0000000..575db56
--- /dev/null
+++ b/src/data_collection/gpt_formatter.py
@@ -0,0 +1,65 @@
+import argparse
+import os
+import json
+from openai import OpenAI
+from supabase import create_client, Client
+from schema_validator import validate_schema
+
+def process_queue(supabase_url, supabase_key, openai_key):
+    # Supabase setup
+    supabase: Client = create_client(supabase_url, supabase_key)
+
+    # Set the API key for OpenAI
+    client = OpenAI(
+        api_key=openai_key
+    )
+
+    with open('query_schema.json') as f:
+        output_schema_str = f.read()
+
+    # Fetch data from Supabase queue
+    try:
+        queue_data = supabase.table('rqueue').select('*').eq('processed', False).limit(1).execute()
+        if len(queue_data.data) < 2:  # Fixed to check for non-empty data
+            for item in queue_data.data:
+                relevant_data = {key: item[key] for key in ["author_id", "title", "url", "comments"]}
+                item_json = json.dumps(relevant_data)
+                prompt = f"Given the data: {item_json}, construct a JSON object that adheres to the specified output schema. Output schema: {output_schema_str}"
+                try:
+                    response = client.chat.completions.create( 
+                        model="gpt-3.5-turbo-0125", 
+                        response_format={ "type": "json_object" },
+                        messages=[{"role": "system", "content": "You are a helpful assistant that outputs valid JSON.>"}, 
+                                    {"role": "user", "content": prompt}],
+                    )
+                    try:
+                        response_json = json.loads(response.choices[0].message.content)
+                    except Exception as e:
+                        print("Error in openai response: ", e)
+
+                    try:
+                        validated_response = validate_schema(response_json)
+                        try:
+                            # supabase.table("watches").insert([validated_response]).execute()
+                            supabase.table("rqueue").update({"processed": True}).eq("post_id", item["post_id"]).execute()
+                        except Exception as e:
+                            print(f"Failed to push to supabase (watches): {e}")
+                    except Exception as e:
+                        print(f"current response could not be validated: {e}")
+                    
+                except Exception as e:
+                    print(f"An OpenAI error occurred: {e}")
+            
+
+    except Exception as e:
+        print(f"Failed to fetch data from Supabase (rqueue): {e}")
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Process queue items and format them using OpenAI")
+    parser.add_argument("--supabase_url", required=True, help="Supabase project URL")
+    parser.add_argument("--supabase_key", required=True, help="Supabase service role key")
+    parser.add_argument("--openai_key", required=True, help="OpenAI API key")
+
+    args = parser.parse_args()
+
+    process_queue(args.supabase_url, args.supabase_key, args.openai_key)
diff --git a/query_schema.json b/src/data_collection/query_schema.json
similarity index 100%
rename from query_schema.json
rename to src/data_collection/query_schema.json
diff --git a/src/data_collection/reddit_crawler.ipynb b/src/data_collection/reddit_crawler.ipynb
new file mode 100644
index 0000000..0c0d28f
--- /dev/null
+++ b/src/data_collection/reddit_crawler.ipynb
@@ -0,0 +1,326 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Lad wrote a Python script to download Alexa voice recordings, he didn't expect this email.\n",
+      "This post has:\n",
+      "I redesign the Python logo to make it more modern\n",
+      "Automate the boring stuff with python - tinder\n",
+      "Just finished programming and building my own smart mirror in python, wrote all of the code myself and implemented my own voice control and facial recognition features\n"
+     ]
+    }
+   ],
+   "source": [
+    "import praw\n",
+    "import os\n",
+    "import json\n",
+    "import time\n",
+    "\n",
+    "from supabase import create_client, Client\n",
+    "\n",
+    "# Supabase setup\n",
+    "url: str = os.environ.get('SUPABASE_WATCHDB_URL')\n",
+    "key: str = os.environ.get('SUPABASE_WATCHDB_SERVICE_ROLE_KEY')\n",
+    "supabase: Client = create_client(url, key)\n",
+    "\n",
+    "# Reddit API Credentials\n",
+    "client_id = os.environ.get('REDDIT_APP_ID')\n",
+    "client_secret = os.environ.get('REDDIT_APP_KEY')\n",
+    "user_agent = 'User-Agent:chrono-codex-server:v1 (by /u/ChronoCrawler)'\n",
+    "\n",
+    "# Initialize PRAW with your credentials\n",
+    "reddit = praw.Reddit(client_id=client_id,\n",
+    "                     client_secret=client_secret,\n",
+    "                     user_agent=user_agent)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2024-02-06 17:53:19,214:INFO - HTTP Request: POST https://gvvfniijngcyqnwvrbal.supabase.co/rest/v1/rqueue \"HTTP/1.1 409 Conflict\"\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Skipping insertion for post_id=1akq7lk as it already exists.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2024-02-06 17:53:19,456:INFO - HTTP Request: POST https://gvvfniijngcyqnwvrbal.supabase.co/rest/v1/rqueue \"HTTP/1.1 201 Created\"\n",
+      "2024-02-06 17:53:19,743:INFO - HTTP Request: POST https://gvvfniijngcyqnwvrbal.supabase.co/rest/v1/rqueue \"HTTP/1.1 201 Created\"\n",
+      "2024-02-06 17:53:20,017:INFO - HTTP Request: POST https://gvvfniijngcyqnwvrbal.supabase.co/rest/v1/rqueue \"HTTP/1.1 201 Created\"\n",
+      "2024-02-06 17:53:20,316:INFO - HTTP Request: POST https://gvvfniijngcyqnwvrbal.supabase.co/rest/v1/rqueue \"HTTP/1.1 201 Created\"\n",
+      "2024-02-06 17:53:20,715:INFO - HTTP Request: POST https://gvvfniijngcyqnwvrbal.supabase.co/rest/v1/rqueue \"HTTP/1.1 201 Created\"\n",
+      "2024-02-06 17:53:20,948:INFO - HTTP Request: POST https://gvvfniijngcyqnwvrbal.supabase.co/rest/v1/rqueue \"HTTP/1.1 201 Created\"\n",
+      "2024-02-06 17:53:21,310:INFO - HTTP Request: POST https://gvvfniijngcyqnwvrbal.supabase.co/rest/v1/rqueue \"HTTP/1.1 201 Created\"\n",
+      "2024-02-06 17:53:21,584:INFO - HTTP Request: POST https://gvvfniijngcyqnwvrbal.supabase.co/rest/v1/rqueue \"HTTP/1.1 201 Created\"\n",
+      "2024-02-06 17:53:21,863:INFO - HTTP Request: POST https://gvvfniijngcyqnwvrbal.supabase.co/rest/v1/rqueue \"HTTP/1.1 201 Created\"\n"
+     ]
+    }
+   ],
+   "source": [
+    "# The subreddit you want to scrape\n",
+    "subreddit = reddit.subreddit('watchexchange')\n",
+    "\n",
+    "# Initialize a list to store data before saving to disk\n",
+    "data_list = []\n",
+    "\n",
+    "# Fetch the top posts from the subreddit\n",
+    "top_posts = subreddit.top(time_filter=\"hour\", limit=10)  # Adjust limit as needed\n",
+    "\n",
+    "\n",
+    "# Push the data collected to Supabase\n",
+    "for post in top_posts:\n",
+    "    post.comments.replace_more(limit=25)  # Load all comments\n",
+    "    # comments = [{'userid': comment.author.name, 'comment': comment.body} for comment in post.comments.list()]\n",
+    "    comments = ' | '.join([f\"{comment.author.name}: {comment.body}\" for comment in post.comments.list()])\n",
+    "\n",
+    "    post_data = {\n",
+    "        'post_id': post.id,\n",
+    "        'author_id': post.author.name,\n",
+    "        'title': post.title,\n",
+    "        'url': post.url,\n",
+    "        'comments': comments\n",
+    "    }\n",
+    "    \n",
+    "    try:\n",
+    "        # Attempt to insert post_data into your Supabase table\n",
+    "        data_insert_response = supabase.table('rqueue').insert(post_data).execute()\n",
+    "    except Exception as e:\n",
+    "        if 'duplicate key value violates unique constraint \"rqueue_pkey\"' in str(e):\n",
+    "            print(f\"Skipping insertion for post_id={post_data['post_id']} as it already exists.\")\n",
+    "        else:\n",
+    "            raise\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from jsonschema import validate\n",
+    "from jsonschema.exceptions import ValidationError\n",
+    "\n",
+    "\n",
+    "def filter_invalid_entries(data, schema):\n",
+    "    \"\"\"\n",
+    "    Removes entries from the data that do not match the schema's type requirements.\n",
+    "    \n",
+    "    :param data: The JSON object to filter.\n",
+    "    :param schema: The schema defining the expected types.\n",
+    "    :return: A new dictionary with only the valid entries according to the schema.\n",
+    "    \"\"\"\n",
+    "    valid_data = {}\n",
+    "    for key, value in data.items():\n",
+    "        expected_type = schema.get(\"properties\", {}).get(key, {}).get(\"type\", None)\n",
+    "        if expected_type:\n",
+    "            if expected_type == \"number\" and value is not None:\n",
+    "                print(key,value)\n",
+    "                try:\n",
+    "                    converted_value = int(value)\n",
+    "                except ValueError:\n",
+    "                    try:\n",
+    "                        converted_value = float(value)\n",
+    "                    except ValueError:\n",
+    "                        continue\n",
+    "                valid_data[key] = converted_value\n",
+    "            elif expected_type == \"string\" and isinstance(value, str):\n",
+    "                valid_data[key] = value\n",
+    "    return valid_data\n",
+    "\n",
+    "\n",
+    "\n",
+    "def validate_schema(data: dict):\n",
+    "    # This code validates the schema of the provided JSON data\n",
+    "    # and drops any entries that do not match the schema, effectively\n",
+    "    # filtering out incorrect data.\n",
+    "\n",
+    "    # Load the schema\n",
+    "    with open('watch_schema.json', 'r') as file:\n",
+    "        schema = json.load(file)\n",
+    "\n",
+    "    json_data = filter_invalid_entries(data, schema)\n",
+    "\n",
+    "    # Validate the JSON, on failure throw exception\n",
+    "    validate(instance=json_data, schema=schema)\n",
+    "    return json_data  \n",
+    "    \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'Brand': 'Breitling', 'Model': 'SuperOcean II 36', 'Reference Number': 'A17312', 'Case Material': 'Not specified', 'Case Diameter': None, 'Case Thickness': None, 'Lug Width': None, 'Lug-to-Lug': None, 'Dial Color': 'Not specified', 'Crystal Type': 'Not specified', 'Water Resistance': 'Not specified', 'Movement': 'Not specified', 'Caliber': 'Not specified', 'Movement Type': 'Not specified', 'Power Reserve': 'Not specified', 'Bracelet/Strap Material': 'Not specified', 'Clasp Type': 'Not specified', 'Product Weight': 'Not specified', 'Features': 'Completely full kit with watch roll. 2016 year.', 'Price': 1699, 'Availability': 'Zelle, Venmo, or WU accepted', 'Photo URL': 'https://imgur.com/a/Quq7MB9', 'Merchant Name': 'Pristine_Courage_535', 'Product URL': 'https://www.reddit.com/r/Watchexchange/comments/1akq7lk/wts_breitling_superocean_a17312_white_full_kit/'}\n",
+      "Price 1699\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'Brand': 'Breitling',\n",
+       " 'Model': 'SuperOcean II 36',\n",
+       " 'Reference Number': 'A17312',\n",
+       " 'Case Material': 'Not specified',\n",
+       " 'Dial Color': 'Not specified',\n",
+       " 'Crystal Type': 'Not specified',\n",
+       " 'Water Resistance': 'Not specified',\n",
+       " 'Movement': 'Not specified',\n",
+       " 'Caliber': 'Not specified',\n",
+       " 'Movement Type': 'Not specified',\n",
+       " 'Power Reserve': 'Not specified',\n",
+       " 'Bracelet/Strap Material': 'Not specified',\n",
+       " 'Clasp Type': 'Not specified',\n",
+       " 'Product Weight': 'Not specified',\n",
+       " 'Features': 'Completely full kit with watch roll. 2016 year.',\n",
+       " 'Price': 1699,\n",
+       " 'Availability': 'Zelle, Venmo, or WU accepted',\n",
+       " 'Photo URL': 'https://imgur.com/a/Quq7MB9',\n",
+       " 'Merchant Name': 'Pristine_Courage_535',\n",
+       " 'Product URL': 'https://www.reddit.com/r/Watchexchange/comments/1akq7lk/wts_breitling_superocean_a17312_white_full_kit/'}"
+      ]
+     },
+     "execution_count": 44,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "response_json = json.loads(response.choices[0].message.content)\n",
+    "print(response_json)\n",
+    "validated_response = validate_schema(response_json)\n",
+    "validated_response"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2024-02-07 11:37:51,075:INFO - HTTP Request: GET https://gvvfniijngcyqnwvrbal.supabase.co/rest/v1/rqueue?select=%2A&processed=eq.False&limit=1 \"HTTP/1.1 200 OK\"\n",
+      "2024-02-07 11:37:51,152:INFO - HTTP Request: PATCH https://gvvfniijngcyqnwvrbal.supabase.co/rest/v1/rqueue?post_id=eq.1akqglv \"HTTP/1.1 200 OK\"\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'postgrest.base_request_builder.APIResponse'>\n",
+      "data=[{'created_at': '2024-02-07T01:53:19.464226+00:00', 'post_id': '1akqglv', 'author_id': 'liuserr', 'title': '[WTS] Sinn 356 on OEM Leather strap', 'url': 'https://i.redd.it/18ictee7d2hc1.jpeg', 'comments': 'AutoModerator: * u/liuserr has 149 Transactions and [this 7-day post history](https://www.reddit.com/r/Watchexchange/wiki/user_post_history/liuserr).\\n* [Click send on this message](https://www.reddit.com/message/compose/?to=WatchExBot&subject=Feedback_Check&message=u/liuserr) to get a detailed look at this user\\'s transaction history. \\n  * If the above link doesn\\'t work, send a message containing the username (starting with u/) to the bot, `WatchExBot`\\n* This post: \"[[WTS] Sinn 356 on OEM Leather strap](https://www.reddit.com/r/Watchexchange/comments/1akqglv/wts_sinn_356_on_oem_leather_strap/)\"  \\n\\n---\\n\\n**This post may be removed without notice if it does not follow the rules!**\\n\\n* **Pictures** are required. Do you have pictures?    \\n* A **timestamp** is required. Do you have a ***handwritten timestamp*** under your item(s)?  \\n* A **price** or trade value is required. Do you have a price?   \\n         \\nIf you make a mistake in your post, ***do not delete and repost.*** Message the mods *first* or your post may be removed and you\\'ll have to wait 7 days to repost.\\n\\n---\\n\\n[Click here](https://www.reddit.com/r/Watchexchange/comments/ihs99w/meta_new_flairbased_feedback/) to learn how to leave feedback that updates your transaction flair.\\n\\n**Avoid banned or unqualified users; do not deal with anyone unless they comment on this thread.**   \\n         \\nIf you believe you have been scammed, please [fill out this form](https://www.reddit.com/message/compose/?to=r/Watchexchange&subject=I%20Think%20I%20Have%20Been%20Scammed&message=*%20Username%20of%20the%20person%20scamming%20me:%20%0D%0A%0D%0A*%20Link%20to%20the%20thread%20where%20the%20transaction%20originated:%20%0D%0A%0D%0A*%20Link%20to%20screenshots%20uploaded%20to%20imgur%20of%20my%20conversations%20with%20the%20scammer:%20%0D%0A%0D%0A*%20An%20explanation%20of%20the%20situation:%20).\\n\\nThe presence of this message does **not** indicate a need to message the moderators. \\n\\n\\n*I am a bot, and this action was performed automatically. Please [contact the moderators of this subreddit](/message/compose/?to=/r/Watchexchange) if you have any questions or concerns.* | liuserr: For sale is a Sinn 356 that comes on an unworn leather strap. I\\'m selling it because a chronograph is a little too frail for everyday wear. The watch comes on a sandblasted case with very little signs of wear. I believe the day is slightly misaligned, but not by a terrible amount if so as I can barely tell when scrolling through the days.\\n\\nThe watch comes on its OEM black leather strap that has not been worn - there are no creases on the strap holes. It also comes with the inner and outer boxes, instruction manual, and warranty card dated: Jan 2020.\\n\\nModel: Sinn 356 Movement: SW-500 Case Size: 38.5mm Case Thickness: 15mm Lug Width: 20mm Lug to lug: 46mm Water resistance: 100 meters\\n\\nAlbum: https://imgur.com/a/Lbafeue \\nTimestamp: https://i.imgur.com/9OYeF6O.jpg\\n\\nPrice: $1700 shipped CONUS via Zelle, wire transfer, or Paypal F&F for established members. Face to face in the Bay Area is welcomed.\\n\\nNo trades please.', 'processed': False}] count=None\n",
+      "Case Diameter 38.5\n",
+      "Case Thickness 15\n",
+      "Lug Width 20\n",
+      "Lug-to-Lug 46\n",
+      "Price 1700\n"
+     ]
+    }
+   ],
+   "source": [
+    "from openai import OpenAI\n",
+    "import os\n",
+    "import json\n",
+    "\n",
+    "# Set the API key\n",
+    "client = OpenAI(\n",
+    "    api_key= os.environ.get('OPENAI_API_CHRONO_KEY')\n",
+    ")\n",
+    "\n",
+    "with open('query_schema.json') as f:\n",
+    "    output_schema_str = f.read()\n",
+    "\n",
+    "# Fetch data from Supabase queue\n",
+    "try:\n",
+    "    queue_data = supabase.table('rqueue').select('*').eq('processed', False).limit(1).execute()\n",
+    "    if len(queue_data.data) < 2:  # Fixed to check for non-empty data\n",
+    "        for item in queue_data.data:\n",
+    "            relevant_data = {key: item[key] for key in [\"author_id\", \"title\", \"url\", \"comments\"]}\n",
+    "            item_json = json.dumps(relevant_data)\n",
+    "            prompt = f\"Given the data: {item_json}, construct a JSON object that adheres to the specified output schema. Output schema: {output_schema_str}\"\n",
+    "            try:\n",
+    "                response = client.chat.completions.create( \n",
+    "                    model=\"gpt-3.5-turbo-0125\", \n",
+    "                    response_format={ \"type\": \"json_object\" },\n",
+    "                    messages=[{\"role\": \"system\", \"content\": \"You are a helpful assistant that outputs valid JSON.>\"}, \n",
+    "                                {\"role\": \"user\", \"content\": prompt}],\n",
+    "                )\n",
+    "                try:\n",
+    "                    response_json = json.loads(response.choices[0].message.content)\n",
+    "                except Exception as e:\n",
+    "                    print(\"Error in openai response: \", e)\n",
+    "\n",
+    "                try:\n",
+    "                    validated_response = validate_schema(response_json)\n",
+    "                    try:\n",
+    "                        # supabase.table(\"watches\").insert([validated_response]).execute()\n",
+    "                        supabase.table(\"rqueue\").update({\"processed\": True}).eq(\"post_id\", item[\"post_id\"]).execute()\n",
+    "                    except Exception as e:\n",
+    "                        print(f\"Failed to push to supabase (watches): {e}\")\n",
+    "                except Exception as e:\n",
+    "                    print(f\"current response could not be validated: {e}\")\n",
+    "                \n",
+    "            except Exception as e:\n",
+    "                print(f\"An OpenAI error occurred: {e}\")\n",
+    "        \n",
+    "\n",
+    "except Exception as e:\n",
+    "    print(f\"Failed to fetch data from Supabase (rqueue): {e}\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/src/data_collection/reddit_crawler.py b/src/data_collection/reddit_crawler.py
new file mode 100644
index 0000000..297aba4
--- /dev/null
+++ b/src/data_collection/reddit_crawler.py
@@ -0,0 +1,62 @@
+import praw
+import os
+import json
+import time
+import argparse
+
+from supabase import create_client, Client
+
+def main(time_filter, post_limit, comments_limit):
+    # Supabase setup
+    url: str = os.environ.get('SUPABASE_WATCHDB_URL')
+    key: str = os.environ.get('SUPABASE_WATCHDB_SERVICE_ROLE_KEY')
+    supabase: Client = create_client(url, key)
+
+    # Reddit API Credentials
+    client_id = os.environ.get('REDDIT_APP_ID')
+    client_secret = os.environ.get('REDDIT_APP_KEY')
+    user_agent = 'User-Agent:chrono-codex-server:v1 (by /u/ChronoCrawler)'
+
+    # Initialize PRAW with your credentials
+    reddit = praw.Reddit(client_id=client_id,
+                        client_secret=client_secret,
+                        user_agent=user_agent)
+
+    # The subreddit you want to scrape
+    subreddit = reddit.subreddit('watchexchange')
+
+    # Fetch the top posts from the subreddit
+    top_posts = subreddit.top(time_filter=time_filter, limit=post_limit)
+
+    # Push the data collected to Supabase
+    for post in top_posts:
+        post.comments.replace_more(limit=comments_limit)  # Load all comments
+        comments = ' | '.join([f"{comment.author.name}: {comment.body}" for comment in post.comments.list()])
+
+        post_data = {
+            'post_id': post.id,
+            'author_id': post.author.name,
+            'title': post.title,
+            'url': post.url,
+            'comments': comments
+        }
+        
+        try:
+            # Attempt to insert post_data into your Supabase table
+            data_insert_response = supabase.table('rqueue').insert(post_data).execute()
+        except Exception as e:
+            if 'duplicate key value violates unique constraint "rqueue_pkey"' in str(e):
+                print(f"Skipping insertion for post_id={post_data['post_id']} as it already exists.")
+            else:
+                raise
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Reddit Crawler for Subreddit Posts")
+    parser.add_argument("--time_filter", help="Time filter for posts", default="hour")
+    parser.add_argument("--post_limit", help="Limit of posts to fetch", type=int, default=10)
+    parser.add_argument("--comments_limit", help="Limit of comments to fetch for each post", type=int, default=25)
+    
+    args = parser.parse_args()
+    
+    main(args.time_filter, args.post_limit, args.comments_limit)
+
diff --git a/src/data_collection/schema_validator.py b/src/data_collection/schema_validator.py
new file mode 100644
index 0000000..49b44f3
--- /dev/null
+++ b/src/data_collection/schema_validator.py
@@ -0,0 +1,47 @@
+from jsonschema import validate
+from jsonschema.exceptions import ValidationError
+
+
+def filter_invalid_entries(data, schema):
+    """
+    Removes entries from the data that do not match the schema's type requirements.
+    
+    :param data: The JSON object to filter.
+    :param schema: The schema defining the expected types.
+    :return: A new dictionary with only the valid entries according to the schema.
+    """
+    valid_data = {}
+    for key, value in data.items():
+        expected_type = schema.get("properties", {}).get(key, {}).get("type", None)
+        if expected_type:
+            if expected_type == "number" and value is not None:
+                print(key,value)
+                try:
+                    converted_value = int(value)
+                except ValueError:
+                    try:
+                        converted_value = float(value)
+                    except ValueError:
+                        continue
+                valid_data[key] = converted_value
+            elif expected_type == "string" and isinstance(value, str):
+                valid_data[key] = value
+    return valid_data
+
+
+
+def validate_schema(data: dict):
+    # This code validates the schema of the provided JSON data
+    # and drops any entries that do not match the schema, effectively
+    # filtering out incorrect data.
+
+    # Load the schema
+    with open('watch_schema.json', 'r') as file:
+        schema = json.load(file)
+
+    json_data = filter_invalid_entries(data, schema)
+
+    # Validate the JSON, on failure throw exception
+    validate(instance=json_data, schema=schema)
+    return json_data  
+    
diff --git a/test.ipynb b/src/data_collection/test.ipynb
similarity index 100%
rename from test.ipynb
rename to src/data_collection/test.ipynb
diff --git a/src/data_collection/watch_schema.json b/src/data_collection/watch_schema.json
new file mode 100644
index 0000000..25f32ee
--- /dev/null
+++ b/src/data_collection/watch_schema.json
@@ -0,0 +1,34 @@
+{
+    "type": "object",
+    "properties": {
+        "Brand": {"type": "string"},
+        "Model": {"type": "string"},
+        "Reference Number": {"type": "string"},
+        "Case Material": {"type": "string"},
+        "Case Diameter": {"type": "number"},
+        "Case Thickness": {"type": "number"},
+        "Lug Width": {"type": "number"},
+        "Lug-to-Lug": {"type": "number"},
+        "Dial Color": {"type": "string"},
+        "Crystal Type": {"type": "string"},
+        "Water Resistance": {"type": "string"},
+        "Movement": {"type": "string"},
+        "Caliber": {"type": "string"},
+        "Movement Type": {"type": "string"},
+        "Power Reserve": {"type": "string"},
+        "Bracelet/Strap Material": {"type": "string"},
+        "Clasp Type": {"type": "string"},
+        "Product Weight": {"type": "string"},
+        "Features": {"type": "string"},
+        "Price": {"type": "number"},
+        "Availability": {"type": "string"},
+        "Photo URL": {"type": "string"},
+        "Merchant Name": {"type": "string"},
+        "Product URL": {"type": "string"}
+    },
+    "required": [
+        "Brand",
+        "Model"
+    ],
+    "additionalProperties": false
+}
\ No newline at end of file