diff --git a/README.rst b/README.rst index ff6790f..a24eeea 100644 --- a/README.rst +++ b/README.rst @@ -3,7 +3,7 @@ wagtail-linkchecker =================== A tool/plugin to assist with finding broken links on your wagtail site. -This tool works asynchronously using celery. +This tool works asynchronously using django-background-task. Installing ========== @@ -21,10 +21,6 @@ To use, firstly you will need to add ``wagtaillinkchecker`` to your ``INSTALLED_ There will now be an extra item on the settings panel of the wagtailadmin. Inside here you can enable or disable automated scanning (See below for more detail) or conduct a scan. -For scans to be conducted from the admin, you must be running a celery daemon. -You can run the celery worker with ``celery -A my_app_name worker -l info``. See the `Celery Documentation `_ for more information. -For production you'll want to run celery as a daemon using something like systemd. See `Celery Daemonization `_ for more information. - Conducting a scan ----------------- Conducting a scan will scan all of your wagtail pages, and detect all images and anchors with a ``src`` or ``href`` respectively. @@ -46,8 +42,5 @@ Command options ``--do-not-send-mail`` Don't send an email to administrators once scan is complete. -``--run-synchronously`` - Skip celery and run command synchronously (useful for automated scanning) - ``-v 2`` Show more output in the logs diff --git a/setup.py b/setup.py index 4279311..26a980d 100755 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ author="Neon Jungle", author_email="developers@neonjungle.studio", url="https://github.com/neon-jungle/wagtail-linkchecker/", - install_requires=["wagtail>=5.0,<6", "requests>=2.9.1", "celery>=5.0,<6"], + install_requires=["wagtail>=5.0,<6", "requests>=2.9.1", "django-background-tasks>=1.2.5,<3"], zip_safe=False, license="BSD License", packages=find_packages(), diff --git a/wagtaillinkchecker/management/commands/linkcheck.py b/wagtaillinkchecker/management/commands/linkcheck.py index 8fa2fd3..a98fde2 100644 --- a/wagtaillinkchecker/management/commands/linkcheck.py +++ b/wagtaillinkchecker/management/commands/linkcheck.py @@ -17,20 +17,14 @@ def add_arguments(self, parser): action="store_true", help="Do not send mails when finding broken links", ) - parser.add_argument( - "--run-synchronously", - action="store_true", - help="Run checks synchronously (avoid the need for Celery)", - ) def handle(self, *args, **kwargs): site = Site.objects.filter(is_default_site=True).first() pages = site.root_page.get_descendants(inclusive=True).live().public() - run_sync = kwargs.get("run_synchronously") or False verbosity = kwargs.get("verbosity") or 1 print(f"Scanning {len(pages)} pages...") - scan = broken_link_scan(site, run_sync, verbosity) + scan = broken_link_scan(site, verbosity) total_links = ScanLink.objects.filter(scan=scan, crawled=True) broken_links = ScanLink.objects.filter(scan=scan, broken=True) print( diff --git a/wagtaillinkchecker/models.py b/wagtaillinkchecker/models.py index 6f095a3..73681f8 100644 --- a/wagtaillinkchecker/models.py +++ b/wagtaillinkchecker/models.py @@ -102,13 +102,10 @@ def __str__(self): def page_is_deleted(self): return self.page_deleted and self.page_slug - def check_link(self, run_sync=False, verbosity=1): + def check_link(self, verbosity=1): from wagtaillinkchecker.tasks import check_link - if run_sync: - check_link(self.pk, run_sync=run_sync, verbosity=verbosity) - else: - check_link.apply_async((self.pk, run_sync, verbosity)) + check_link(self.pk, verbosity=verbosity) @receiver(pre_delete, sender=Page) diff --git a/wagtaillinkchecker/scanner.py b/wagtaillinkchecker/scanner.py index 35bebcd..00c07df 100644 --- a/wagtaillinkchecker/scanner.py +++ b/wagtaillinkchecker/scanner.py @@ -8,32 +8,6 @@ from wagtaillinkchecker import HTTP_STATUS_CODES -def get_celery_worker_status(): - ERROR_KEY = "ERROR" - try: - from celery import current_app - - broker_url = current_app.conf.broker_url - if broker_url.startswith("sqlalchemy"): - # Can't get stats with sqlalchemy broker - return {} - - insp = current_app.control.inspect() - d = insp.stats() - if not d: - d = {ERROR_KEY: "No running Celery workers were found."} - except IOError as e: - from errno import errorcode - - msg = "Error connecting to the backend: " + str(e) - if len(e.args) > 0 and errorcode.get(e.args[0]) == "ECONNREFUSED": - msg += " Check that the RabbitMQ server is running." - d = {ERROR_KEY: msg} - except ImportError as e: - d = {ERROR_KEY: str(e)} - return d - - class Link(Exception): def __init__(self, url, page, status_code=None, error=None, site=None): self.url = url @@ -135,7 +109,7 @@ def clean_url(url, site): return url -def broken_link_scan(site, run_sync=False, verbosity=1): +def broken_link_scan(site, verbosity=1): from wagtaillinkchecker.models import Scan, ScanLink pages = site.root_page.get_descendants(inclusive=True).live().public() @@ -149,6 +123,6 @@ def broken_link_scan(site, run_sync=False, verbosity=1): ScanLink.objects.get(url=url, scan=scan) except ScanLink.DoesNotExist: link = ScanLink.objects.create(url=page.full_url, page=page, scan=scan) - link.check_link(run_sync, verbosity=verbosity) + link.check_link(verbosity=verbosity) return scan diff --git a/wagtaillinkchecker/tasks.py b/wagtaillinkchecker/tasks.py index dfd4629..5949a20 100644 --- a/wagtaillinkchecker/tasks.py +++ b/wagtaillinkchecker/tasks.py @@ -1,4 +1,4 @@ -from celery import shared_task +from background_task import background from wagtaillinkchecker.scanner import get_url, clean_url from wagtaillinkchecker.models import ScanLink from bs4 import BeautifulSoup @@ -8,10 +8,9 @@ from django.utils import timezone -@shared_task +@background(schedule=5) def check_link( link_pk, - run_sync=False, verbosity=1, ): link = ScanLink.objects.get(pk=link_pk) @@ -40,7 +39,7 @@ def check_link( if link_href: try: new_link = link.scan.add_link(page=link.page, url=link_href) - new_link.check_link(run_sync, verbosity) + new_link.check_link(verbosity) except IntegrityError: pass @@ -52,7 +51,7 @@ def check_link( if image_src: try: new_link = link.scan.add_link(page=link.page, url=image_src) - new_link.check_link(run_sync, verbosity) + new_link.check_link(verbosity) except IntegrityError: pass link.crawled = True diff --git a/wagtaillinkchecker/views.py b/wagtaillinkchecker/views.py index 416e72e..37bd183 100644 --- a/wagtaillinkchecker/views.py +++ b/wagtaillinkchecker/views.py @@ -8,7 +8,7 @@ from wagtaillinkchecker.forms import SitePreferencesForm from wagtaillinkchecker.models import SitePreferences, Scan from wagtaillinkchecker.pagination import paginate -from wagtaillinkchecker.scanner import broken_link_scan, get_celery_worker_status +from wagtaillinkchecker.scanner import broken_link_scan from wagtail.admin import messages from wagtail.models import Site @@ -81,12 +81,6 @@ def settings(request): def run_scan(request): site = Site.find_for_request(request) - celery_status = get_celery_worker_status() - if "ERROR" not in celery_status: - broken_link_scan(site) - else: - messages.warning( - request, _("No celery workers are running, the scan was not conducted.") - ) + broken_link_scan(site) return redirect("wagtaillinkchecker")