From e7b66b43940bb6c0a79fcde87aee24c114327de2 Mon Sep 17 00:00:00 2001 From: Joaquim d'Souza Date: Tue, 17 Sep 2024 16:34:54 +0200 Subject: [PATCH] feat: upgrade wagtail version --- .gitignore | 3 +- setup.py | 36 ++-- wagtaillinkchecker/__init__.py | 188 ++++++++---------- wagtaillinkchecker/apps.py | 2 +- wagtaillinkchecker/forms.py | 3 +- .../management/commands/linkcheck.py | 57 +++--- wagtaillinkchecker/migrations/0001_initial.py | 25 ++- .../migrations/0002_auto_20160209_1533.py | 22 +- .../migrations/0003_store_scan_results.py | 90 ++++++--- .../migrations/0004_auto_20180829_1922.py | 7 +- .../migrations/0005_auto_20180922_1835.py | 7 +- wagtaillinkchecker/models.py | 39 ++-- wagtaillinkchecker/pagination.py | 3 +- wagtaillinkchecker/scanner.py | 84 ++++---- wagtaillinkchecker/tasks.py | 26 ++- wagtaillinkchecker/urls.py | 19 +- wagtaillinkchecker/utils.py | 15 -- wagtaillinkchecker/views.py | 101 ++++------ wagtaillinkchecker/wagtail_hooks.py | 29 +-- 19 files changed, 384 insertions(+), 372 deletions(-) delete mode 100644 wagtaillinkchecker/utils.py diff --git a/.gitignore b/.gitignore index 839cf2e..ca2a902 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,5 @@ dist/ venv/ build/ -.vscode/ \ No newline at end of file +.vscode/ +.venv \ No newline at end of file diff --git a/setup.py b/setup.py index f3d11b0..4279311 100755 --- a/setup.py +++ b/setup.py @@ -3,39 +3,31 @@ Install wagtail-linkchecker using setuptools """ -with open('README.rst', 'r') as f: +with open("README.rst", "r") as f: readme = f.read() from setuptools import find_packages, setup setup( - name='wagtail-linkchecker', - version='0.6.0', + name="wagtail-linkchecker", + version="0.7.1", description="A tool to assist with finding broken links on your wagtail site.", long_description=readme, - author='Neon Jungle', - author_email='developers@neonjungle.studio', - url='https://github.com/neon-jungle/wagtail-linkchecker/', - - install_requires=[ - 'wagtail>=1.0', - 'requests>=2.9.1', - 'celery>=5.0,<6' - ], + author="Neon Jungle", + author_email="developers@neonjungle.studio", + url="https://github.com/neon-jungle/wagtail-linkchecker/", + install_requires=["wagtail>=5.0,<6", "requests>=2.9.1", "celery>=5.0,<6"], zip_safe=False, - license='BSD License', - + license="BSD License", packages=find_packages(), - include_package_data=True, package_data={}, - classifiers=[ - 'Environment :: Web Environment', - 'Intended Audience :: Developers', - 'Operating System :: OS Independent', - 'Programming Language :: Python', - 'Framework :: Django', - 'License :: OSI Approved :: BSD License', + "Environment :: Web Environment", + "Intended Audience :: Developers", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Framework :: Django", + "License :: OSI Approved :: BSD License", ], ) diff --git a/wagtaillinkchecker/__init__.py b/wagtaillinkchecker/__init__.py index 7f1068a..5109c33 100644 --- a/wagtaillinkchecker/__init__.py +++ b/wagtaillinkchecker/__init__.py @@ -1,105 +1,91 @@ -__version__ = '0.1.0' +__version__ = "0.1.0" -default_app_config = 'wagtaillinkchecker.apps.WagtailLinkchekerAppConfig' +default_app_config = "wagtaillinkchecker.apps.WagtailLinkchekerAppConfig" HTTP_STATUS_CODES = { - 100: ('Continue', 'Request received, please continue'), - 101: ('Switching Protocols', - 'Switching to new protocol; obey Upgrade header'), - 102: ('Processing', 'WebDAV; RFC 2518'), - - 200: ('OK', 'Request fulfilled, document follows'), - 201: ('Created', 'Document created, URL follows'), - 202: ('Accepted', - 'Request accepted, processing continues off-line'), - 203: ('Non-Authoritative Information', 'Request fulfilled from cache'), - 204: ('No Content', 'Request fulfilled, nothing follows'), - 205: ('Reset Content', 'Clear input form for further input.'), - 206: ('Partial Content', 'Partial content follows.'), - 207: ('Multi-Status', 'WebDAV; RFC 4918'), - 208: ('Already Reported', 'WebDAV; RFC 5842'), - 226: ('IM Used', 'RFC 3229'), - - 300: ('Multiple Choices', - 'Object has several resources -- see URI list'), - 301: ('Moved Permanently', 'Object moved permanently -- see URI list'), - 302: ('Found', 'Object moved temporarily -- see URI list'), - 303: ('See Other', 'Object moved -- see Method and URL list'), - 304: ('Not Modified', - 'Document has not changed since given time'), - 305: ('Use Proxy', - 'You must use proxy specified in Location to access this ' - 'resource.'), - 306: ('Switch Proxy', 'Subsequent requests should use the specified proxy'), - 307: ('Temporary Redirect', - 'Object moved temporarily -- see URI list'), - 308: ('Permanent Redirect', 'Object moved permanently'), - - 400: ('Bad Request', - 'Bad request syntax or unsupported method'), - 401: ('Unauthorized', - 'No permission -- see authorization schemes'), - 402: ('Payment Required', - 'No payment -- see charging schemes'), - 403: ('Forbidden', - 'Request forbidden -- authorization will not help'), - 404: ('Not Found', 'Nothing matches the given URI'), - 405: ('Method Not Allowed', - 'Specified method is invalid for this resource.'), - 406: ('Not Acceptable', 'URI not available in preferred format.'), - 407: ('Proxy Authentication Required', 'You must authenticate with ' - 'this proxy before proceeding.'), - 408: ('Request Timeout', 'Request timed out; try again later.'), - 409: ('Conflict', 'Request conflict.'), - 410: ('Gone', - 'URI no longer exists and has been permanently removed.'), - 411: ('Length Required', 'Client must specify Content-Length.'), - 412: ('Precondition Failed', 'Precondition in headers is false.'), - 413: ('Payload Too Large', 'Payload is too large.'), - 414: ('Request-URI Too Long', 'URI is too long.'), - 415: ('Unsupported Media Type', 'Entity body in unsupported format.'), - 416: ('Requested Range Not Satisfiable', - 'Cannot satisfy request range.'), - 417: ('Expectation Failed', - 'Expect condition could not be satisfied.'), - 418: ("I'm a teapot", 'The HTCPCP server is a teapot'), - 419: ('Authentication Timeout', 'previously valid authentication has expired'), - 420: ('Method Failure / Enhance Your Calm', 'Spring Framework / Twitter'), - 422: ('Unprocessable Entity', 'WebDAV; RFC 4918'), - 423: ('Locked', 'WebDAV; RFC 4918'), - 424: ('Failed Dependency / Method Failure', 'WebDAV; RFC 4918'), - 425: ('Unordered Collection', 'Internet draft'), - 426: ('Upgrade Required', 'client should switch to a different protocol'), - 428: ('Precondition Required', 'RFC 6585'), - 429: ('Too Many Requests', 'RFC 6585'), - 431: ('Request Header Fields Too Large', 'RFC 6585'), - 440: ('Login Timeout', 'Microsoft'), - 444: ('No Response', 'Nginx'), - 449: ('Retry With', 'Microsoft'), - 450: ('Blocked by Windows Parental Controls', 'Microsoft'), - 451: ('Unavailable For Legal Reasons', 'RFC 7725'), - 494: ('Request Header Too Large', 'Nginx'), - 495: ('Cert Error', 'Nginx'), - 496: ('No Cert', 'Nginx'), - 497: ('HTTP to HTTPS', 'Nginx'), - 498: ('Token expired/invalid', 'Esri'), - 499: ('Client Closed Request', 'Nginx'), - - 500: ('Internal Server Error', 'Server got itself in trouble'), - 501: ('Not Implemented', - 'Server does not support this operation'), - 502: ('Bad Gateway', 'Invalid responses from another server/proxy.'), - 503: ('Service Unavailable', - 'The server cannot process the request due to a high load'), - 504: ('Gateway Timeout', - 'The gateway server did not receive a timely response'), - 505: ('HTTP Version Not Supported', 'Cannot fulfill request.'), - 506: ('Variant Also Negotiates', 'RFC 2295'), - 507: ('Insufficient Storage', 'WebDAV; RFC 4918'), - 508: ('Loop Detected', 'WebDAV; RFC 5842'), - 509: ('Bandwidth Limit Exceeded', 'Apache bw/limited extension'), - 510: ('Not Extended', 'RFC 2774'), - 511: ('Network Authentication Required', 'RFC 6585'), - 598: ('Network read timeout error', 'Unknown'), - 599: ('Network connect timeout error', 'Unknown'), + 100: ("Continue", "Request received, please continue"), + 101: ("Switching Protocols", "Switching to new protocol; obey Upgrade header"), + 102: ("Processing", "WebDAV; RFC 2518"), + 200: ("OK", "Request fulfilled, document follows"), + 201: ("Created", "Document created, URL follows"), + 202: ("Accepted", "Request accepted, processing continues off-line"), + 203: ("Non-Authoritative Information", "Request fulfilled from cache"), + 204: ("No Content", "Request fulfilled, nothing follows"), + 205: ("Reset Content", "Clear input form for further input."), + 206: ("Partial Content", "Partial content follows."), + 207: ("Multi-Status", "WebDAV; RFC 4918"), + 208: ("Already Reported", "WebDAV; RFC 5842"), + 226: ("IM Used", "RFC 3229"), + 300: ("Multiple Choices", "Object has several resources -- see URI list"), + 301: ("Moved Permanently", "Object moved permanently -- see URI list"), + 302: ("Found", "Object moved temporarily -- see URI list"), + 303: ("See Other", "Object moved -- see Method and URL list"), + 304: ("Not Modified", "Document has not changed since given time"), + 305: ( + "Use Proxy", + "You must use proxy specified in Location to access this " "resource.", + ), + 306: ("Switch Proxy", "Subsequent requests should use the specified proxy"), + 307: ("Temporary Redirect", "Object moved temporarily -- see URI list"), + 308: ("Permanent Redirect", "Object moved permanently"), + 400: ("Bad Request", "Bad request syntax or unsupported method"), + 401: ("Unauthorized", "No permission -- see authorization schemes"), + 402: ("Payment Required", "No payment -- see charging schemes"), + 403: ("Forbidden", "Request forbidden -- authorization will not help"), + 404: ("Not Found", "Nothing matches the given URI"), + 405: ("Method Not Allowed", "Specified method is invalid for this resource."), + 406: ("Not Acceptable", "URI not available in preferred format."), + 407: ( + "Proxy Authentication Required", + "You must authenticate with " "this proxy before proceeding.", + ), + 408: ("Request Timeout", "Request timed out; try again later."), + 409: ("Conflict", "Request conflict."), + 410: ("Gone", "URI no longer exists and has been permanently removed."), + 411: ("Length Required", "Client must specify Content-Length."), + 412: ("Precondition Failed", "Precondition in headers is false."), + 413: ("Payload Too Large", "Payload is too large."), + 414: ("Request-URI Too Long", "URI is too long."), + 415: ("Unsupported Media Type", "Entity body in unsupported format."), + 416: ("Requested Range Not Satisfiable", "Cannot satisfy request range."), + 417: ("Expectation Failed", "Expect condition could not be satisfied."), + 418: ("I'm a teapot", "The HTCPCP server is a teapot"), + 419: ("Authentication Timeout", "previously valid authentication has expired"), + 420: ("Method Failure / Enhance Your Calm", "Spring Framework / Twitter"), + 422: ("Unprocessable Entity", "WebDAV; RFC 4918"), + 423: ("Locked", "WebDAV; RFC 4918"), + 424: ("Failed Dependency / Method Failure", "WebDAV; RFC 4918"), + 425: ("Unordered Collection", "Internet draft"), + 426: ("Upgrade Required", "client should switch to a different protocol"), + 428: ("Precondition Required", "RFC 6585"), + 429: ("Too Many Requests", "RFC 6585"), + 431: ("Request Header Fields Too Large", "RFC 6585"), + 440: ("Login Timeout", "Microsoft"), + 444: ("No Response", "Nginx"), + 449: ("Retry With", "Microsoft"), + 450: ("Blocked by Windows Parental Controls", "Microsoft"), + 451: ("Unavailable For Legal Reasons", "RFC 7725"), + 494: ("Request Header Too Large", "Nginx"), + 495: ("Cert Error", "Nginx"), + 496: ("No Cert", "Nginx"), + 497: ("HTTP to HTTPS", "Nginx"), + 498: ("Token expired/invalid", "Esri"), + 499: ("Client Closed Request", "Nginx"), + 500: ("Internal Server Error", "Server got itself in trouble"), + 501: ("Not Implemented", "Server does not support this operation"), + 502: ("Bad Gateway", "Invalid responses from another server/proxy."), + 503: ( + "Service Unavailable", + "The server cannot process the request due to a high load", + ), + 504: ("Gateway Timeout", "The gateway server did not receive a timely response"), + 505: ("HTTP Version Not Supported", "Cannot fulfill request."), + 506: ("Variant Also Negotiates", "RFC 2295"), + 507: ("Insufficient Storage", "WebDAV; RFC 4918"), + 508: ("Loop Detected", "WebDAV; RFC 5842"), + 509: ("Bandwidth Limit Exceeded", "Apache bw/limited extension"), + 510: ("Not Extended", "RFC 2774"), + 511: ("Network Authentication Required", "RFC 6585"), + 598: ("Network read timeout error", "Unknown"), + 599: ("Network connect timeout error", "Unknown"), } diff --git a/wagtaillinkchecker/apps.py b/wagtaillinkchecker/apps.py index 5a8ad0b..7ca2cf4 100644 --- a/wagtaillinkchecker/apps.py +++ b/wagtaillinkchecker/apps.py @@ -2,5 +2,5 @@ class WagtailLinkchekerAppConfig(AppConfig): - name = 'wagtaillinkchecker' + name = "wagtaillinkchecker" verbose_name = "Wagtail Link Checker" diff --git a/wagtaillinkchecker/forms.py b/wagtaillinkchecker/forms.py index 0aff22f..805d29c 100644 --- a/wagtaillinkchecker/forms.py +++ b/wagtaillinkchecker/forms.py @@ -4,7 +4,6 @@ class SitePreferencesForm(forms.ModelForm): - class Meta: model = SitePreferences - exclude = ('site', ) + exclude = ("site",) diff --git a/wagtaillinkchecker/management/commands/linkcheck.py b/wagtaillinkchecker/management/commands/linkcheck.py index ee6d458..8fa2fd3 100644 --- a/wagtaillinkchecker/management/commands/linkcheck.py +++ b/wagtaillinkchecker/management/commands/linkcheck.py @@ -6,41 +6,39 @@ from wagtaillinkchecker.scanner import broken_link_scan from wagtaillinkchecker.models import ScanLink -from wagtaillinkchecker import utils -if utils.is_wagtail_version_more_than_equal_to_2_0(): - from wagtail.core.models import PageRevision, Site -else: - from wagtail.wagtailcore.models import PageRevision, Site +from wagtail.models import PageRevision, Site class Command(BaseCommand): def add_arguments(self, parser): parser.add_argument( - '--do-not-send-mail', - action='store_true', - help='Do not send mails when finding broken links', + "--do-not-send-mail", + action="store_true", + help="Do not send mails when finding broken links", ) parser.add_argument( - '--run-synchronously', - action='store_true', - help='Run checks synchronously (avoid the need for Celery)', + "--run-synchronously", + action="store_true", + help="Run checks synchronously (avoid the need for Celery)", ) def handle(self, *args, **kwargs): site = Site.objects.filter(is_default_site=True).first() pages = site.root_page.get_descendants(inclusive=True).live().public() - run_sync = kwargs.get('run_synchronously') or False - verbosity = kwargs.get('verbosity') or 1 + run_sync = kwargs.get("run_synchronously") or False + verbosity = kwargs.get("verbosity") or 1 - print(f'Scanning {len(pages)} pages...') + print(f"Scanning {len(pages)} pages...") scan = broken_link_scan(site, run_sync, verbosity) total_links = ScanLink.objects.filter(scan=scan, crawled=True) broken_links = ScanLink.objects.filter(scan=scan, broken=True) - print(f'Found {len(total_links)} total links, with {len(broken_links)} broken links.') + print( + f"Found {len(total_links)} total links, with {len(broken_links)} broken links." + ) - if kwargs.get('do_not_send_mail'): - print(f'Will not send any emails') + if kwargs.get("do_not_send_mail"): + print("Will not send any emails") return messages = [] @@ -49,27 +47,30 @@ def handle(self, *args, **kwargs): user = None user_email = settings.DEFAULT_FROM_EMAIL if revisions: - revision = revisions.latest('created_at') + revision = revisions.latest("created_at") user = revision.user - user_email = revision.user.email if revision.user else '' + user_email = revision.user.email if revision.user else "" page_broken_links = [] for link in broken_links: if link.page == page: page_broken_links.append(link) email_message = render_to_string( - 'wagtaillinkchecker/emails/broken_links.html', { - 'page_broken_links': page_broken_links, - 'user': user, - 'page': page, - 'base_url': site.root_url, - 'site_name': settings.WAGTAIL_SITE_NAME, - }) + "wagtaillinkchecker/emails/broken_links.html", + { + "page_broken_links": page_broken_links, + "user": user, + "page": page, + "base_url": site.root_url, + "site_name": settings.WAGTAIL_SITE_NAME, + }, + ) email = EmailMessage( 'Broken links on page "%s"' % (page.title), email_message, settings.DEFAULT_FROM_EMAIL, - [user_email]) - email.content_subtype = 'html' + [user_email], + ) + email.content_subtype = "html" messages.append(email) connection = mail.get_connection() diff --git a/wagtaillinkchecker/migrations/0001_initial.py b/wagtaillinkchecker/migrations/0001_initial.py index 051ac9a..38161f0 100644 --- a/wagtaillinkchecker/migrations/0001_initial.py +++ b/wagtaillinkchecker/migrations/0001_initial.py @@ -7,20 +7,33 @@ class Migration(migrations.Migration): - initial = True dependencies = [ - ('wagtailcore', '0023_alter_page_revision_on_delete_behaviour'), + ("wagtailcore", "0023_alter_page_revision_on_delete_behaviour"), ] operations = [ migrations.CreateModel( - name='SitePreferences', + name="SitePreferences", fields=[ - ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('automated_scanning', models.BooleanField(default=False)), - ('site', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, to='wagtailcore.Site')), + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("automated_scanning", models.BooleanField(default=False)), + ( + "site", + models.OneToOneField( + on_delete=django.db.models.deletion.CASCADE, + to="wagtailcore.Site", + ), + ), ], ), ] diff --git a/wagtaillinkchecker/migrations/0002_auto_20160209_1533.py b/wagtaillinkchecker/migrations/0002_auto_20160209_1533.py index dde3ace..6973b1a 100644 --- a/wagtaillinkchecker/migrations/0002_auto_20160209_1533.py +++ b/wagtaillinkchecker/migrations/0002_auto_20160209_1533.py @@ -7,20 +7,26 @@ class Migration(migrations.Migration): - dependencies = [ - ('wagtaillinkchecker', '0001_initial'), + ("wagtaillinkchecker", "0001_initial"), ] operations = [ migrations.AlterField( - model_name='sitepreferences', - name='automated_scanning', - field=models.BooleanField(default=False, help_text='Conduct automated sitewide scans for broken links, and send emails if a problem is found.'), + model_name="sitepreferences", + name="automated_scanning", + field=models.BooleanField( + default=False, + help_text="Conduct automated sitewide scans for broken links, and send emails if a problem is found.", + ), ), migrations.AlterField( - model_name='sitepreferences', - name='site', - field=models.OneToOneField(editable=False, on_delete=django.db.models.deletion.CASCADE, to='wagtailcore.Site'), + model_name="sitepreferences", + name="site", + field=models.OneToOneField( + editable=False, + on_delete=django.db.models.deletion.CASCADE, + to="wagtailcore.Site", + ), ), ] diff --git a/wagtaillinkchecker/migrations/0003_store_scan_results.py b/wagtaillinkchecker/migrations/0003_store_scan_results.py index 6c0f316..b8844ff 100644 --- a/wagtaillinkchecker/migrations/0003_store_scan_results.py +++ b/wagtaillinkchecker/migrations/0003_store_scan_results.py @@ -7,45 +7,85 @@ class Migration(migrations.Migration): - dependencies = [ - ('wagtailcore', '0033_remove_golive_expiry_help_text'), - ('wagtaillinkchecker', '0002_auto_20160209_1533'), + ("wagtailcore", "0033_remove_golive_expiry_help_text"), + ("wagtaillinkchecker", "0002_auto_20160209_1533"), ] operations = [ migrations.CreateModel( - name='Scan', + name="Scan", fields=[ - ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('scan_finished', models.DateTimeField(blank=True, null=True)), - ('scan_started', models.DateTimeField(auto_now_add=True)), - ('site', models.ForeignKey(editable=False, on_delete=django.db.models.deletion.CASCADE, to='wagtailcore.Site')), + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("scan_finished", models.DateTimeField(blank=True, null=True)), + ("scan_started", models.DateTimeField(auto_now_add=True)), + ( + "site", + models.ForeignKey( + editable=False, + on_delete=django.db.models.deletion.CASCADE, + to="wagtailcore.Site", + ), + ), ], ), migrations.CreateModel( - name='ScanLink', + name="ScanLink", fields=[ - ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('url', models.URLField()), - ('crawled', models.BooleanField(default=False)), - ('invalid', models.BooleanField(default=False)), - ('broken', models.BooleanField(default=False)), - ('status_code', models.IntegerField(blank=True, null=True)), - ('error_text', models.TextField(blank=True, null=True)), - ('page_deleted', models.BooleanField(default=False)), - ('page_slug', models.CharField(blank=True, max_length=128, null=True)), - ('page', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to='wagtailcore.Page')), - ('scan', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='links', to='wagtaillinkchecker.Scan')), + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("url", models.URLField()), + ("crawled", models.BooleanField(default=False)), + ("invalid", models.BooleanField(default=False)), + ("broken", models.BooleanField(default=False)), + ("status_code", models.IntegerField(blank=True, null=True)), + ("error_text", models.TextField(blank=True, null=True)), + ("page_deleted", models.BooleanField(default=False)), + ("page_slug", models.CharField(blank=True, max_length=128, null=True)), + ( + "page", + models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.SET_NULL, + to="wagtailcore.Page", + ), + ), + ( + "scan", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="links", + to="wagtaillinkchecker.Scan", + ), + ), ], ), migrations.AlterField( - model_name='sitepreferences', - name='automated_scanning', - field=models.BooleanField(default=False, help_text='Conduct automated sitewide scans for broken links, and send emails if a problem is found.', verbose_name='Automated Scanning'), + model_name="sitepreferences", + name="automated_scanning", + field=models.BooleanField( + default=False, + help_text="Conduct automated sitewide scans for broken links, and send emails if a problem is found.", + verbose_name="Automated Scanning", + ), ), migrations.AlterUniqueTogether( - name='scanlink', - unique_together=set([('url', 'scan')]), + name="scanlink", + unique_together=set([("url", "scan")]), ), ] diff --git a/wagtaillinkchecker/migrations/0004_auto_20180829_1922.py b/wagtaillinkchecker/migrations/0004_auto_20180829_1922.py index dab22ad..ab34e70 100644 --- a/wagtaillinkchecker/migrations/0004_auto_20180829_1922.py +++ b/wagtaillinkchecker/migrations/0004_auto_20180829_1922.py @@ -4,15 +4,14 @@ class Migration(migrations.Migration): - dependencies = [ - ('wagtaillinkchecker', '0003_store_scan_results'), + ("wagtaillinkchecker", "0003_store_scan_results"), ] operations = [ migrations.AlterField( - model_name='scanlink', - name='page_slug', + model_name="scanlink", + name="page_slug", field=models.CharField(blank=True, max_length=512, null=True), ), ] diff --git a/wagtaillinkchecker/migrations/0005_auto_20180922_1835.py b/wagtaillinkchecker/migrations/0005_auto_20180922_1835.py index 2bd415b..6b98662 100644 --- a/wagtaillinkchecker/migrations/0005_auto_20180922_1835.py +++ b/wagtaillinkchecker/migrations/0005_auto_20180922_1835.py @@ -4,15 +4,14 @@ class Migration(migrations.Migration): - dependencies = [ - ('wagtaillinkchecker', '0004_auto_20180829_1922'), + ("wagtaillinkchecker", "0004_auto_20180829_1922"), ] operations = [ migrations.AlterField( - model_name='scanlink', - name='url', + model_name="scanlink", + name="url", field=models.URLField(max_length=500), ), ] diff --git a/wagtaillinkchecker/models.py b/wagtaillinkchecker/models.py index 2139299..6f095a3 100644 --- a/wagtaillinkchecker/models.py +++ b/wagtaillinkchecker/models.py @@ -1,27 +1,22 @@ -from sys import version from django.db import models from django.db.models.signals import pre_delete from django.dispatch import receiver from django.utils.translation import ugettext_lazy as _ -from wagtaillinkchecker import utils - -if utils.is_wagtail_version_more_than_equal_to_2_0(): - from wagtail.core.models import Site - from wagtail.core.models import Page -else: - from wagtail.wagtailcore.models import Site - from wagtail.wagtailcore.models import Page +from wagtail.models import Site +from wagtail.models import Page class SitePreferences(models.Model): site = models.OneToOneField( - Site, unique=True, db_index=True, editable=False, on_delete=models.CASCADE) + Site, unique=True, db_index=True, editable=False, on_delete=models.CASCADE + ) automated_scanning = models.BooleanField( default=False, help_text=_( - 'Conduct automated sitewide scans for broken links, and send emails if a problem is found.'), - verbose_name=_('Automated Scanning') + "Conduct automated sitewide scans for broken links, and send emails if a problem is found." + ), + verbose_name=_("Automated Scanning"), ) @@ -29,7 +24,8 @@ class Scan(models.Model): scan_finished = models.DateTimeField(blank=True, null=True) scan_started = models.DateTimeField(auto_now_add=True) site = models.ForeignKey( - Site, db_index=True, editable=False, on_delete=models.CASCADE) + Site, db_index=True, editable=False, on_delete=models.CASCADE + ) @property def is_finished(self): @@ -39,14 +35,17 @@ def add_link(self, url=None, page=None): return ScanLink.objects.create(scan=self, url=url, page=page) def result(self): - return _('{0} broken links found out of {1} links'.format(self.broken_link_count(), self.links.count())) + return _( + "{0} broken links found out of {1} links".format( + self.broken_link_count(), self.links.count() + ) + ) def __str__(self): - return 'Scan - {0}'.format(self.scan_started.strftime('%d/%m/%Y')) + return "Scan - {0}".format(self.scan_started.strftime("%d/%m/%Y")) class ScanLinkQuerySet(models.QuerySet): - def valid(self): return self.filter(invalid=False) @@ -67,8 +66,7 @@ def working_links(self): class ScanLink(models.Model): - scan = models.ForeignKey(Scan, related_name='links', - on_delete=models.CASCADE) + scan = models.ForeignKey(Scan, related_name="links", on_delete=models.CASCADE) url = models.URLField(max_length=500) # If the link has been crawled @@ -95,7 +93,7 @@ class ScanLink(models.Model): objects = ScanLinkQuerySet.as_manager() class Meta: - unique_together = [('url', 'scan')] + unique_together = [("url", "scan")] def __str__(self): return self.url @@ -116,4 +114,5 @@ def check_link(self, run_sync=False, verbosity=1): @receiver(pre_delete, sender=Page) def delete_tag(instance, **kwargs): ScanLink.objects.filter(page=instance).update( - page_deleted=True, page_slug=instance.slug) + page_deleted=True, page_slug=instance.slug + ) diff --git a/wagtaillinkchecker/pagination.py b/wagtaillinkchecker/pagination.py index 8a61edd..f48cfa2 100644 --- a/wagtaillinkchecker/pagination.py +++ b/wagtaillinkchecker/pagination.py @@ -1,8 +1,7 @@ -from django.conf import settings from django.core.paginator import Paginator, EmptyPage -def paginate(request, items, page_key='page'): +def paginate(request, items, page_key="page"): paginator = Paginator(items, 50) try: diff --git a/wagtaillinkchecker/scanner.py b/wagtaillinkchecker/scanner.py index c5cde45..6466629 100644 --- a/wagtaillinkchecker/scanner.py +++ b/wagtaillinkchecker/scanner.py @@ -12,15 +12,17 @@ def get_celery_worker_status(): ERROR_KEY = "ERROR" try: from celery.task.control import inspect + insp = inspect() d = insp.stats() if not d: - d = {ERROR_KEY: 'No running Celery workers were found.'} + d = {ERROR_KEY: "No running Celery workers were found."} except IOError as e: from errno import errorcode + msg = "Error connecting to the backend: " + str(e) - if len(e.args) > 0 and errorcode.get(e.args[0]) == 'ECONNREFUSED': - msg += ' Check that the RabbitMQ server is running.' + if len(e.args) > 0 and errorcode.get(e.args[0]) == "ECONNREFUSED": + msg += " Check that the RabbitMQ server is running." d = {ERROR_KEY: msg} except ImportError as e: d = {ERROR_KEY: str(e)} @@ -28,7 +30,6 @@ def get_celery_worker_status(): class Link(Exception): - def __init__(self, url, page, status_code=None, error=None, site=None): self.url = url self.status_code = status_code @@ -42,15 +43,24 @@ def message(self): return self.error elif self.status_code in range(100, 300): message = "Success" - elif self.status_code in range(500, 600) and self.url.startswith(self.site.root_url): - message = str(self.status_code) + ': ' + \ - _('Internal server error, please notify the site administrator.') + elif self.status_code in range(500, 600) and self.url.startswith( + self.site.root_url + ): + message = ( + str(self.status_code) + + ": " + + _("Internal server error, please notify the site administrator.") + ) else: try: - message = str(self.status_code) + ': ' + \ - client.responses[self.status_code] + '.' + message = ( + str(self.status_code) + + ": " + + client.responses[self.status_code] + + "." + ) except KeyError: - message = str(self.status_code) + ': ' + _('Unknown error.') + message = str(self.status_code) + ": " + _("Unknown error.") return message def __str__(self): @@ -67,51 +77,53 @@ def __hash__(self): def get_url(url, page, site): data = { - 'url': url, - 'page': page, - 'site': site, - 'error': False, - 'invalid_schema': False + "url": url, + "page": page, + "site": site, + "error": False, + "invalid_schema": False, } response = None try: response = requests.get(url, verify=True) - data['response'] = response + data["response"] = response except (requests.exceptions.InvalidSchema, requests.exceptions.MissingSchema): - data['invalid_schema'] = True + data["invalid_schema"] = True return data - except requests.exceptions.ConnectionError as e: - data['error'] = True - data['error_message'] = _('There was an error connecting to this site') + except requests.exceptions.ConnectionError: + data["error"] = True + data["error_message"] = _("There was an error connecting to this site") return data except requests.exceptions.RequestException as e: - data['error'] = True - data['status_code'] = response.status_code - data['error_message'] = type(e).__name__ + ': ' + str(e) + data["error"] = True + data["status_code"] = response.status_code + data["error_message"] = type(e).__name__ + ": " + str(e) return data else: if response.status_code not in range(100, 400): - error_message_for_status_code = HTTP_STATUS_CODES.get( - response.status_code) - data['error'] = True - data['status_code'] = response.status_code + error_message_for_status_code = HTTP_STATUS_CODES.get(response.status_code) + data["error"] = True + data["status_code"] = response.status_code if error_message_for_status_code: - data['error_message'] = error_message_for_status_code[0] + data["error_message"] = error_message_for_status_code[0] else: if response.status_code in range(400, 500): - data['error_message'] = 'Client error' + data["error_message"] = "Client error" elif response.status_code in range(500, 600): - data['error_message'] = 'Server Error' + data["error_message"] = "Server Error" else: - data['error_message'] = "Error: Unknown HTTP Status Code '{0}'".format( - response.status_code) + data["error_message"] = ( + "Error: Unknown HTTP Status Code '{0}'".format( + response.status_code + ) + ) return data def clean_url(url, site): - if url and url != '#': - if url.startswith('/'): + if url and url != "#": + if url.startswith("/"): url = site.root_url + url else: return None @@ -120,6 +132,7 @@ def clean_url(url, site): def broken_link_scan(site, run_sync=False, verbosity=1): from wagtaillinkchecker.models import Scan, ScanLink + pages = site.root_page.get_descendants(inclusive=True).live().public() scan = Scan.objects.create(site=site) @@ -130,8 +143,7 @@ def broken_link_scan(site, run_sync=False, verbosity=1): print(f"Checking {url}") ScanLink.objects.get(url=url, scan=scan) except ScanLink.DoesNotExist: - link = ScanLink.objects.create( - url=page.full_url, page=page, scan=scan) + link = ScanLink.objects.create(url=page.full_url, page=page, scan=scan) link.check_link(run_sync, verbosity=verbosity) return scan diff --git a/wagtaillinkchecker/tasks.py b/wagtaillinkchecker/tasks.py index 5a30550..dfd4629 100644 --- a/wagtaillinkchecker/tasks.py +++ b/wagtaillinkchecker/tasks.py @@ -9,27 +9,31 @@ @shared_task -def check_link(link_pk, run_sync=False, verbosity=1, ): +def check_link( + link_pk, + run_sync=False, + verbosity=1, +): link = ScanLink.objects.get(pk=link_pk) site = link.scan.site url = get_url(link.url, link.page, site) - link.status_code = url.get('status_code') + link.status_code = url.get("status_code") - if url['error']: + if url["error"]: link.broken = True - link.error_text = url['error_message'] + link.error_text = url["error_message"] - elif url['invalid_schema']: + elif url["invalid_schema"]: link.invalid = True - link.error_text = _('Link was invalid') + link.error_text = _("Link was invalid") elif link.page.full_url == link.url: - soup = BeautifulSoup(url['response'].content, 'html5lib') - anchors = soup.find_all('a') - images = soup.find_all('img') + soup = BeautifulSoup(url["response"].content, "html5lib") + anchors = soup.find_all("a") + images = soup.find_all("img") for anchor in anchors: - link_href = anchor.get('href') + link_href = anchor.get("href") link_href = clean_url(link_href, site) if verbosity > 1: print(f"cleaned link_href: {link_href}") @@ -41,7 +45,7 @@ def check_link(link_pk, run_sync=False, verbosity=1, ): pass for image in images: - image_src = image.get('src') + image_src = image.get("src") image_src = clean_url(image_src, site) if verbosity > 1: print(f"cleaned image_src: {image_src}") diff --git a/wagtaillinkchecker/urls.py b/wagtaillinkchecker/urls.py index e3b0dff..e98c7c0 100644 --- a/wagtaillinkchecker/urls.py +++ b/wagtaillinkchecker/urls.py @@ -5,14 +5,13 @@ from wagtaillinkchecker import views urlpatterns = [ - url(r'^$', views.index, - name='wagtaillinkchecker'), - url(r'^settings/$', views.settings, - name='wagtaillinkchecker_settings'), - url(r'^scan/$', views.run_scan, - name='wagtaillinkchecker_runscan'), - url(r'^scan/(?P\d+)/$', views.scan, - name='wagtaillinkchecker_scan'), - url(r'^scan/(?P\d+)/delete$', views.delete, - name='wagtaillinkchecker_delete'), + url(r"^$", views.index, name="wagtaillinkchecker"), + url(r"^settings/$", views.settings, name="wagtaillinkchecker_settings"), + url(r"^scan/$", views.run_scan, name="wagtaillinkchecker_runscan"), + url(r"^scan/(?P\d+)/$", views.scan, name="wagtaillinkchecker_scan"), + url( + r"^scan/(?P\d+)/delete$", + views.delete, + name="wagtaillinkchecker_delete", + ), ] diff --git a/wagtaillinkchecker/utils.py b/wagtaillinkchecker/utils.py deleted file mode 100644 index 9321dbb..0000000 --- a/wagtaillinkchecker/utils.py +++ /dev/null @@ -1,15 +0,0 @@ -import re -from wagtail import __version__ as WAGTAIL_VERSION - - -def is_wagtail_version_more_than_equal_to_2_5(): - expression = '^((2.([5-9]{1,}|([1-9]{1,}[0-9]{1,}))(.\d+)*)|(([3-9]{1,})(.\d+)*))$' - - return re.search(expression, WAGTAIL_VERSION) - - -def is_wagtail_version_more_than_equal_to_2_0(): - expression = '^((2.([0-9]{1,}|([1-9]{1,}[0-9]{1,}))(.\d+)*)|(([3-9]{1,})(.\d+)*))$' - - return re.search(expression, WAGTAIL_VERSION) - diff --git a/wagtaillinkchecker/views.py b/wagtaillinkchecker/views.py index b8dd68f..05a76d7 100644 --- a/wagtaillinkchecker/views.py +++ b/wagtaillinkchecker/views.py @@ -3,10 +3,7 @@ from django.shortcuts import get_object_or_404 from django.shortcuts import redirect, render -try: - from django.utils.lru_cache import lru_cache -except ModuleNotFoundError: - from functools import lru_cache +from functools import lru_cache from django.utils.translation import ugettext_lazy as _ @@ -14,65 +11,53 @@ from wagtaillinkchecker.models import SitePreferences, Scan from wagtaillinkchecker.pagination import paginate from wagtaillinkchecker.scanner import broken_link_scan, get_celery_worker_status -from wagtaillinkchecker import utils -if utils.is_wagtail_version_more_than_equal_to_2_0(): - from wagtail.admin import messages - from wagtail.admin.edit_handlers import (ObjectList, - extract_panel_definitions_from_model_class) - from wagtail.core.models import Site -else: - from wagtail.wagtailadmin import messages - from wagtail.wagtailadmin.edit_handlers import (ObjectList, - extract_panel_definitions_from_model_class) - from wagtail.wagtailcore.models import Site +from wagtail.admin import messages +from wagtail.admin.panels import ObjectList, extract_panel_definitions_from_model_class +from wagtail.models import Site @lru_cache() def get_edit_handler(model): - panels = extract_panel_definitions_from_model_class(model, ['site']) + panels = extract_panel_definitions_from_model_class(model, ["site"]) - if utils.is_wagtail_version_more_than_equal_to_2_5(): - return ObjectList(panels).bind_to(model=model) - else: - return ObjectList(panels).bind_to_model(model) + return ObjectList(panels).bind_to(model=model) def scan(request, scan_pk): scan = get_object_or_404(Scan, pk=scan_pk) - return render(request, 'wagtaillinkchecker/scan.html', { - 'scan': scan - }) + return render(request, "wagtaillinkchecker/scan.html", {"scan": scan}) def index(request): - from django.conf import settings - site = Site.find_for_request(request) - scans = Scan.objects.filter(site=site).order_by('-scan_started') + scans = Scan.objects.filter(site=site).order_by("-scan_started") paginator, page = paginate(request, scans) - return render(request, 'wagtaillinkchecker/index.html', { - 'page': page, - 'paginator': paginator, - 'scans': scans - }) + return render( + request, + "wagtaillinkchecker/index.html", + {"page": page, "paginator": paginator, "scans": scans}, + ) def delete(request, scan_pk): scan = get_object_or_404(Scan, pk=scan_pk) - if request.method == 'POST': + if request.method == "POST": scan.delete() - messages.success(request, _( - 'The scan results were successfully deleted.')) - return redirect('wagtaillinkchecker') + messages.success(request, _("The scan results were successfully deleted.")) + return redirect("wagtaillinkchecker") - return render(request, 'wagtaillinkchecker/delete.html', { - 'scan': scan, - }) + return render( + request, + "wagtaillinkchecker/delete.html", + { + "scan": scan, + }, + ) def settings(request): @@ -87,34 +72,36 @@ def settings(request): form = SitePreferencesForm(request.POST, instance=instance) if form.is_valid(): form.save() - messages.success(request, _( - 'Link checker settings have been updated.')) - return redirect('wagtaillinkchecker_settings') + messages.success(request, _("Link checker settings have been updated.")) + return redirect("wagtaillinkchecker_settings") else: - messages.error(request, _( - 'The form could not be saved due to validation errors')) + messages.error( + request, _("The form could not be saved due to validation errors") + ) else: form = SitePreferencesForm(instance=instance) - if utils.is_wagtail_version_more_than_equal_to_2_5(): - edit_handler = object_list.bind_to( - instance=SitePreferences, form=form, request=request) - else: - edit_handler = object_list.bind_to_instance( - instance=SitePreferences, form=form, request=request) + edit_handler = object_list.bind_to( + instance=SitePreferences, form=form, request=request + ) - return render(request, 'wagtaillinkchecker/settings.html', { - 'form': form, - 'edit_handler': edit_handler, - }) + return render( + request, + "wagtaillinkchecker/settings.html", + { + "form": form, + "edit_handler": edit_handler, + }, + ) def run_scan(request): site = Site.find_for_request(request) celery_status = get_celery_worker_status() - if 'ERROR' not in celery_status: + if "ERROR" not in celery_status: broken_link_scan(site) else: - messages.warning(request, _( - 'No celery workers are running, the scan was not conducted.')) + messages.warning( + request, _("No celery workers are running, the scan was not conducted.") + ) - return redirect('wagtaillinkchecker') + return redirect("wagtaillinkchecker") diff --git a/wagtaillinkchecker/wagtail_hooks.py b/wagtaillinkchecker/wagtail_hooks.py index 6dbbc73..05da6fe 100644 --- a/wagtaillinkchecker/wagtail_hooks.py +++ b/wagtaillinkchecker/wagtail_hooks.py @@ -1,37 +1,28 @@ from __future__ import unicode_literals -from django import __version__ as DJANGO_VERSION from django.conf.urls import include, url from django.utils.translation import ugettext_lazy as _ from wagtaillinkchecker import urls -from wagtaillinkchecker import utils -if utils.is_wagtail_version_more_than_equal_to_2_0(): - from django import urls as urlresolvers -else: - from django.core import urlresolvers +from django import urls as urlresolvers -if utils.is_wagtail_version_more_than_equal_to_2_0(): - from wagtail.admin.menu import MenuItem - from wagtail.core import hooks -else: - from wagtail.wagtailadmin.menu import MenuItem - from wagtail.wagtailcore import hooks +from wagtail.admin.menu import MenuItem +from wagtail import hooks -@hooks.register('register_admin_urls') +@hooks.register("register_admin_urls") def register_admin_urls(): return [ - url(r'^link-checker/', include(urls)), + url(r"^link-checker/", include(urls)), ] -@hooks.register('register_settings_menu_item') +@hooks.register("register_settings_menu_item") def register_menu_settings(): return MenuItem( - _('Link Checker'), - urlresolvers.reverse('wagtaillinkchecker'), - classnames='icon icon-link', - order=300 + _("Link Checker"), + urlresolvers.reverse("wagtaillinkchecker"), + classnames="icon icon-link", + order=300, )