-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path6-core-scraper.py
30 lines (28 loc) · 973 Bytes
/
6-core-scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import requests
from bs4 import BeautifulSoup
from itertools import chain
from multiprocessing import Pool
import sys
def mailfinder(url):
r=requests.get(url)
soup = BeautifulSoup(r.text, 'lxml')
mails = soup.findAll("div", {"class": "city_tab"})
hotel_ref=[i.find('a',href=True)['href']for i in mails if "View Email ID" in i.text ]
return hotel_ref
def mailextractor(ref):
r=requests.get(ref)
soup = BeautifulSoup(r.text, 'lxml')
mails = soup.findAll("td", {"class": "table_space_td_right1"})
return mails[-1].text.split(',')
if __name__ == "__main__":
_,url,start,end=sys.argv
l=[url+f"/pag={i}/" for i in range(int(start),int(end)+1)]
with Pool(12) as p:
mp=p.map(mailfinder, l)
_hotel_ref=list(chain(*mp))
with Pool(12) as p:
mails=p.map(mailextractor, _hotel_ref)
with open('mail.txt', 'w+') as f:
for item in mails:
x=' '.join(item)
f.write(f'{x}\n')