Skip to content

Commit

Permalink
now uses PSL webbrowser instead of Playwright
Browse files Browse the repository at this point in the history
  • Loading branch information
anshtiwatne committed Feb 6, 2022
1 parent f904b76 commit 9f74272
Showing 1 changed file with 4 additions and 15 deletions.
19 changes: 4 additions & 15 deletions crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
import copy
import re
from types import NoneType
import webbrowser
import requests
from playwright.sync_api import sync_playwright


START_PAGE = "/wiki/Special:Random"
Expand Down Expand Up @@ -40,21 +40,10 @@ def crawl():
raise Exception("Arrived at a page with no links")
elif wiki_page in pages:
raise Exception("Stuck in a loop")
pages.add(wiki_page)

pages.add(wiki_page)
yield wiki_page


def main():
"""Go to every link yielded by the crawl function"""

with sync_playwright() as sync:
browser = sync.chromium.launch(headless=False, slow_mo=50)
page = browser.new_page()

for link in crawl():
page.goto(f"https://wikipedia.org{link}")


if __name__ == "__main__":
main()
for wiki_page in crawl():
webbrowser.open(f"https://wikipedia.org/{wiki_page}")

0 comments on commit 9f74272

Please sign in to comment.