Add reader and parser for RSS feed

pi-sigma · May 23, 2024 · a7ab252 · a7ab252
1 parent e15e0b4
commit a7ab252
Show file tree

Hide file tree

Showing 42 changed files with 1,569 additions and 1,500 deletions.
diff --git a/.github/workflows/django.yml b/.github/workflows/django.yml
@@ -58,10 +58,7 @@ jobs:
           SECRET_KEY: dummy
           DJANGO_ENV: BASE
           SECURE_SSL_REDIRECT: False
-        run: |
-          pytest src/articles/tests/unit/
-          pytest src/articles/tests/integration/
-          pytest src/scraper/tests/
+        run: pytest
 
   #
   # Migrations

diff --git a/config/scraper.py b/config/scraper.py
diff --git a/config/settings/__init__.py b/config/settings/__init__.py
@@ -1,8 +1,5 @@
 """
-Settings are loaded depending on the value of the DJANGO_ENV environment variable,
-
-On the production server, DJANGO_ENV should be left undefined
-(hence the production settings are loaded by default).
+Settings are loaded depending on the DJANGO_ENV environment variable,
 """
 
 from decouple import config

diff --git a/config/settings/base.py b/config/settings/base.py
@@ -5,7 +5,7 @@
 
 from decouple import Csv, config
 
-from .. import scraper
+from .. import tasks
 
 BASE_DIR = Path(__file__).resolve().parent.parent.parent
 
@@ -189,11 +189,21 @@
 CELERY_BROKER_URL = config("CELERY_BROKER_URL", "redis://localhost:6379")
 CELERY_RESULT_BACKEND = config("CELERY_RESULT_BACKEND", "redis://localhost:6379")
 CELERY_BEAT_SCHEDULE = {
-    "get_articles_en": {
+    "scrape_articles_en": {
         "task": "articles.tasks.get_articles",
-        "schedule": scraper.tasks["magazines"]["en"]["schedule"],
+        "schedule": tasks.scrape["articles"]["en"]["schedule"],
         "kwargs": {
             "language": "en",
+            "titles": tasks.scrape["articles"]["en"]["titles"],
+        }
+    },
+    "get_articles_from_feed_en": {
+        "task": "articles.tasks.get_articles",
+        "schedule": tasks.feed["articles"]["en"]["schedule"],
+        "kwargs": {
+            "language": "en",
+            "titles": tasks.feed["articles"]["en"]["titles"],
+            "time_delta": tasks.feed["articles"]["en"]["schedule"],
         }
     }
 }
diff --git a/config/tasks.py b/config/tasks.py
@@ -0,0 +1,30 @@
+scrape = {
+    "articles": {
+        "en": {
+            "schedule": 3,  # minutes
+            "titles": [
+                "Al Jazeera",
+                "Associated Press",
+                "Consortium News",
+                "Current Affairs",
+                "NPR",
+                "Reuters",
+                "The Atlantic",
+                "UPI",
+            ]
+        },
+    },
+}
+feed = {
+    "articles": {
+        "en": {
+            "schedule": 3,  # minutes
+            "titles": [
+                "Christian Science Monitor",
+                "New York Times",
+                "The Guardian",
+                "The Intercept",
+            ]
+        },
+    },
+}
diff --git a/fixtures/feeds.json b/fixtures/feeds.json
@@ -0,0 +1,34 @@
+[
+{
+    "model": "articles.feed",
+    "pk": 1,
+    "fields": {
+        "source": 3,
+        "url": "https://rss.csmonitor.com/feeds/world"
+    }
+},
+{
+    "model": "articles.feed",
+    "pk": 2,
+    "fields": {
+        "source": 6,
+        "url": "https://rss.nytimes.com/services/xml/rss/nyt/World.xml"
+    }
+},
+{
+    "model": "articles.feed",
+    "pk": 3,
+    "fields": {
+        "source": 10,
+        "url": "https://theintercept.com/feed/?lang=en"
+    }
+},
+{
+    "model": "articles.feed",
+    "pk": 4,
+    "fields": {
+        "source": 13,
+        "url": "https://www.theguardian.com/world/rss"
+    }
+}
+]