From 2454a4ac047fb989e3f7d299ff2473aaab8bf61a Mon Sep 17 00:00:00 2001 From: Joaquim d'Souza Date: Tue, 17 Sep 2024 22:24:14 +0200 Subject: [PATCH] feat: add iframes --- wagtaillinkchecker/tasks.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/wagtaillinkchecker/tasks.py b/wagtaillinkchecker/tasks.py index e960d31..ccd3f5d 100644 --- a/wagtaillinkchecker/tasks.py +++ b/wagtaillinkchecker/tasks.py @@ -34,6 +34,7 @@ def check_link_sync(link_pk, verbosity=1): soup = BeautifulSoup(url["response"].content, "html5lib") anchors = soup.find_all("a") images = soup.find_all("img") + iframes = soup.find_all("iframe") for anchor in anchors: link_href = anchor.get("href") @@ -47,6 +48,18 @@ def check_link_sync(link_pk, verbosity=1): except IntegrityError: pass + for iframe in iframes: + link_href = iframe.get("src") + link_href = clean_url(link_href, site) + if verbosity > 1: + print(f"cleaned iframe link_href: {link_href}") + if link_href: + try: + new_link = link.scan.add_link(page=link.page, url=link_href) + new_link.check_link(verbosity) + except IntegrityError: + pass + for image in images: image_src = image.get("src") image_src = clean_url(image_src, site)