Skip to content

Commit

Permalink
Add reader and parser for RSS feed
Browse files Browse the repository at this point in the history
  • Loading branch information
pi-sigma committed May 23, 2024
1 parent e15e0b4 commit a7ab252
Show file tree
Hide file tree
Showing 42 changed files with 1,569 additions and 1,500 deletions.
5 changes: 1 addition & 4 deletions .github/workflows/django.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,7 @@ jobs:
SECRET_KEY: dummy
DJANGO_ENV: BASE
SECURE_SSL_REDIRECT: False
run: |
pytest src/articles/tests/unit/
pytest src/articles/tests/integration/
pytest src/scraper/tests/
run: pytest

#
# Migrations
Expand Down
22 changes: 0 additions & 22 deletions config/scraper.py

This file was deleted.

5 changes: 1 addition & 4 deletions config/settings/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
"""
Settings are loaded depending on the value of the DJANGO_ENV environment variable,
On the production server, DJANGO_ENV should be left undefined
(hence the production settings are loaded by default).
Settings are loaded depending on the DJANGO_ENV environment variable,
"""

from decouple import config
Expand Down
16 changes: 13 additions & 3 deletions config/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from decouple import Csv, config

from .. import scraper
from .. import tasks

BASE_DIR = Path(__file__).resolve().parent.parent.parent

Expand Down Expand Up @@ -189,11 +189,21 @@
CELERY_BROKER_URL = config("CELERY_BROKER_URL", "redis://localhost:6379")
CELERY_RESULT_BACKEND = config("CELERY_RESULT_BACKEND", "redis://localhost:6379")
CELERY_BEAT_SCHEDULE = {
"get_articles_en": {
"scrape_articles_en": {
"task": "articles.tasks.get_articles",
"schedule": scraper.tasks["magazines"]["en"]["schedule"],
"schedule": tasks.scrape["articles"]["en"]["schedule"],
"kwargs": {
"language": "en",
"titles": tasks.scrape["articles"]["en"]["titles"],
}
},
"get_articles_from_feed_en": {
"task": "articles.tasks.get_articles",
"schedule": tasks.feed["articles"]["en"]["schedule"],
"kwargs": {
"language": "en",
"titles": tasks.feed["articles"]["en"]["titles"],
"time_delta": tasks.feed["articles"]["en"]["schedule"],
}
}
}
30 changes: 30 additions & 0 deletions config/tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
scrape = {
"articles": {
"en": {
"schedule": 3, # minutes
"titles": [
"Al Jazeera",
"Associated Press",
"Consortium News",
"Current Affairs",
"NPR",
"Reuters",
"The Atlantic",
"UPI",
]
},
},
}
feed = {
"articles": {
"en": {
"schedule": 3, # minutes
"titles": [
"Christian Science Monitor",
"New York Times",
"The Guardian",
"The Intercept",
]
},
},
}
34 changes: 34 additions & 0 deletions fixtures/feeds.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
[
{
"model": "articles.feed",
"pk": 1,
"fields": {
"source": 3,
"url": "https://rss.csmonitor.com/feeds/world"
}
},
{
"model": "articles.feed",
"pk": 2,
"fields": {
"source": 6,
"url": "https://rss.nytimes.com/services/xml/rss/nyt/World.xml"
}
},
{
"model": "articles.feed",
"pk": 3,
"fields": {
"source": 10,
"url": "https://theintercept.com/feed/?lang=en"
}
},
{
"model": "articles.feed",
"pk": 4,
"fields": {
"source": 13,
"url": "https://www.theguardian.com/world/rss"
}
}
]
Loading

0 comments on commit a7ab252

Please sign in to comment.