-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcrikey.py
51 lines (41 loc) · 1.6 KB
/
crikey.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
from playwright.sync_api import sync_playwright
import re, time # non PEP lol
class Crikey():
"""Crikey - Create screenshots and test websites using playwright with python
* any url will work with snip
* customization still needed for most url_string building
* would be nice to include some loops of lists
"""
def __init__(self):
pass
def now(self):
return time.strftime("%Y%m%d-%H%M%S")
def clean(self,dirt=None):
if dirt == None:
print("need string")
pass
else:
cleanish = re.sub(r'^https?://','',dirt)
clean = re.sub(r'[?.=&/]','_', cleanish)
return clean
def snip(self,url=None, screenshot=None):
if screenshot == None:
screenshot = f"{self.now()}_{self.clean(url)}.png"
with sync_playwright() as p:
browser = p.chromium.launch()
page = browser.new_page()
page.goto(url, timeout=6000, wait_until="load")
time.sleep(3)
page.screenshot(path=screenshot, full_page=True)
print(f"{page.title()}\n{url}\n{screenshot}")
def build_auto_query(self,region="orangecounty", # craigslist fields search strings
make="BMW", model="*",
min_yr='2008', max_yr="2018"):
url = f'https://{region}.craigslist.org/search/\
cta?auto_make_model={make}%20{model}&auto_transmission=1\
&max_auto_year={max_yr}&min_auto_year={min_yr}&query=bmw#search=1~gallery~0~0'
return url
if __name__ == "__main__":
app = Crikey()
url = app.build_auto_query()
app.snip(url)