From 43ab4e3d6fa245750a74215e43b3b792dd017317 Mon Sep 17 00:00:00 2001 From: Joaquim d'Souza Date: Thu, 3 Oct 2024 12:14:25 +0200 Subject: [PATCH] fix: add timeout to get_url http request --- wagtaillinkchecker/scanner.py | 2 +- wagtaillinkchecker/tasks.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/wagtaillinkchecker/scanner.py b/wagtaillinkchecker/scanner.py index e17ab0f..b10e75d 100644 --- a/wagtaillinkchecker/scanner.py +++ b/wagtaillinkchecker/scanner.py @@ -64,7 +64,7 @@ def get_url(url, page, site): } response = None try: - response = requests.get(url, verify=True) + response = requests.get(url, verify=True, timeout=30) data["response"] = response except (requests.exceptions.InvalidSchema, requests.exceptions.MissingSchema): data["invalid_schema"] = True diff --git a/wagtaillinkchecker/tasks.py b/wagtaillinkchecker/tasks.py index 2e3a1d0..0d26946 100644 --- a/wagtaillinkchecker/tasks.py +++ b/wagtaillinkchecker/tasks.py @@ -22,8 +22,12 @@ def check_link( def check_link_sync(link_pk, verbosity=1): link = ScanLink.objects.get(pk=link_pk) site = link.scan.site + if verbosity > 1: + print(f"Checking {link.url}") url = get_url(link.url, link.page, site) link.status_code = url.get("status_code") + if verbosity > 1: + print(f"Link is {'broken' if url['error'] else 'OK'}") if url["error"]: link.broken = True