Skip to content

Commit

Permalink
feat: replace celery with django-background-task
Browse files Browse the repository at this point in the history
  • Loading branch information
joaquimds committed Sep 17, 2024
1 parent cb917fb commit 7720ea0
Show file tree
Hide file tree
Showing 7 changed files with 13 additions and 62 deletions.
9 changes: 1 addition & 8 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ wagtail-linkchecker
===================

A tool/plugin to assist with finding broken links on your wagtail site.
This tool works asynchronously using celery.
This tool works asynchronously using django-background-task.

Installing
==========
Expand All @@ -21,10 +21,6 @@ To use, firstly you will need to add ``wagtaillinkchecker`` to your ``INSTALLED_
There will now be an extra item on the settings panel of the wagtailadmin. Inside here you can enable or disable automated
scanning (See below for more detail) or conduct a scan.

For scans to be conducted from the admin, you must be running a celery daemon.
You can run the celery worker with ``celery -A my_app_name worker -l info``. See the `Celery Documentation <http://docs.celeryproject.org/en/latest/index.html>`_ for more information.
For production you'll want to run celery as a daemon using something like systemd. See `Celery Daemonization <http://docs.celeryproject.org/en/latest/userguide/daemonizing.html#daemonizing>`_ for more information.

Conducting a scan
-----------------
Conducting a scan will scan all of your wagtail pages, and detect all images and anchors with a ``src`` or ``href`` respectively.
Expand All @@ -46,8 +42,5 @@ Command options
``--do-not-send-mail``
Don't send an email to administrators once scan is complete.

``--run-synchronously``
Skip celery and run command synchronously (useful for automated scanning)

``-v 2``
Show more output in the logs
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
author="Neon Jungle",
author_email="[email protected]",
url="https://github.com/neon-jungle/wagtail-linkchecker/",
install_requires=["wagtail>=5.0,<6", "requests>=2.9.1", "celery>=5.0,<6"],
install_requires=["wagtail>=5.0,<6", "requests>=2.9.1", "django-background-tasks>=1.2.5,<3"],
zip_safe=False,
license="BSD License",
packages=find_packages(),
Expand Down
8 changes: 1 addition & 7 deletions wagtaillinkchecker/management/commands/linkcheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,14 @@ def add_arguments(self, parser):
action="store_true",
help="Do not send mails when finding broken links",
)
parser.add_argument(
"--run-synchronously",
action="store_true",
help="Run checks synchronously (avoid the need for Celery)",
)

def handle(self, *args, **kwargs):
site = Site.objects.filter(is_default_site=True).first()
pages = site.root_page.get_descendants(inclusive=True).live().public()
run_sync = kwargs.get("run_synchronously") or False
verbosity = kwargs.get("verbosity") or 1

print(f"Scanning {len(pages)} pages...")
scan = broken_link_scan(site, run_sync, verbosity)
scan = broken_link_scan(site, verbosity)
total_links = ScanLink.objects.filter(scan=scan, crawled=True)
broken_links = ScanLink.objects.filter(scan=scan, broken=True)
print(
Expand Down
7 changes: 2 additions & 5 deletions wagtaillinkchecker/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,13 +102,10 @@ def __str__(self):
def page_is_deleted(self):
return self.page_deleted and self.page_slug

def check_link(self, run_sync=False, verbosity=1):
def check_link(self, verbosity=1):
from wagtaillinkchecker.tasks import check_link

if run_sync:
check_link(self.pk, run_sync=run_sync, verbosity=verbosity)
else:
check_link.apply_async((self.pk, run_sync, verbosity))
check_link(self.pk, verbosity=verbosity)


@receiver(pre_delete, sender=Page)
Expand Down
30 changes: 2 additions & 28 deletions wagtaillinkchecker/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,32 +8,6 @@
from wagtaillinkchecker import HTTP_STATUS_CODES


def get_celery_worker_status():
ERROR_KEY = "ERROR"
try:
from celery import current_app

broker_url = current_app.conf.broker_url
if broker_url.startswith("sqlalchemy"):
# Can't get stats with sqlalchemy broker
return {}

insp = current_app.control.inspect()
d = insp.stats()
if not d:
d = {ERROR_KEY: "No running Celery workers were found."}
except IOError as e:
from errno import errorcode

msg = "Error connecting to the backend: " + str(e)
if len(e.args) > 0 and errorcode.get(e.args[0]) == "ECONNREFUSED":
msg += " Check that the RabbitMQ server is running."
d = {ERROR_KEY: msg}
except ImportError as e:
d = {ERROR_KEY: str(e)}
return d


class Link(Exception):
def __init__(self, url, page, status_code=None, error=None, site=None):
self.url = url
Expand Down Expand Up @@ -135,7 +109,7 @@ def clean_url(url, site):
return url


def broken_link_scan(site, run_sync=False, verbosity=1):
def broken_link_scan(site, verbosity=1):
from wagtaillinkchecker.models import Scan, ScanLink

pages = site.root_page.get_descendants(inclusive=True).live().public()
Expand All @@ -149,6 +123,6 @@ def broken_link_scan(site, run_sync=False, verbosity=1):
ScanLink.objects.get(url=url, scan=scan)
except ScanLink.DoesNotExist:
link = ScanLink.objects.create(url=page.full_url, page=page, scan=scan)
link.check_link(run_sync, verbosity=verbosity)
link.check_link(verbosity=verbosity)

return scan
9 changes: 4 additions & 5 deletions wagtaillinkchecker/tasks.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from celery import shared_task
from background_task import background
from wagtaillinkchecker.scanner import get_url, clean_url
from wagtaillinkchecker.models import ScanLink
from bs4 import BeautifulSoup
Expand All @@ -8,10 +8,9 @@
from django.utils import timezone


@shared_task
@background(schedule=5)
def check_link(
link_pk,
run_sync=False,
verbosity=1,
):
link = ScanLink.objects.get(pk=link_pk)
Expand Down Expand Up @@ -40,7 +39,7 @@ def check_link(
if link_href:
try:
new_link = link.scan.add_link(page=link.page, url=link_href)
new_link.check_link(run_sync, verbosity)
new_link.check_link(verbosity)
except IntegrityError:
pass

Expand All @@ -52,7 +51,7 @@ def check_link(
if image_src:
try:
new_link = link.scan.add_link(page=link.page, url=image_src)
new_link.check_link(run_sync, verbosity)
new_link.check_link(verbosity)
except IntegrityError:
pass
link.crawled = True
Expand Down
10 changes: 2 additions & 8 deletions wagtaillinkchecker/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from wagtaillinkchecker.forms import SitePreferencesForm
from wagtaillinkchecker.models import SitePreferences, Scan
from wagtaillinkchecker.pagination import paginate
from wagtaillinkchecker.scanner import broken_link_scan, get_celery_worker_status
from wagtaillinkchecker.scanner import broken_link_scan

from wagtail.admin import messages
from wagtail.models import Site
Expand Down Expand Up @@ -81,12 +81,6 @@ def settings(request):

def run_scan(request):
site = Site.find_for_request(request)
celery_status = get_celery_worker_status()
if "ERROR" not in celery_status:
broken_link_scan(site)
else:
messages.warning(
request, _("No celery workers are running, the scan was not conducted.")
)
broken_link_scan(site)

return redirect("wagtaillinkchecker")

0 comments on commit 7720ea0

Please sign in to comment.