forked from patrickbollmann/flathunt-easy
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathflathunt.py
102 lines (85 loc) · 3.18 KB
/
flathunt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import requests
import re
import time
import telegram_send
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
url_immonet = ""
url_immowelt = ""
url_wg_gesucht = ""
url_ebay = ""
if not url_immowelt and not url_immonet:
print(f"Please change empty string with appropirate URL depending on search criteria")
print(f"Example https://www.immonet.de/immobiliensuche/beta?sortby=0&suchart=1&objecttype=1&marketingtype=2&parentcat=1&toprice=1000&locationname=M%C3%BCnchen&t235=a&locationIds=4916")
print("========================")
quit()
FILE_PATH = "flats.txt"
while True:
with open(FILE_PATH) as f:
old_flats = f.readlines()
old_flats = set(old_flats)
# start web browser
options = webdriver.ChromeOptions()
options.add_argument("headless")
options.add_argument('--disable-blink-features=AutomationControlled')
driver = webdriver.Chrome(options=options) #Needs .exe path for Windows
new_flats = []
# Load the webpage
driver.get(url_immonet)
time.sleep(5)
html = driver.page_source
# Parse the HTML with BeautifulSoup
soup = BeautifulSoup(html, 'html.parser')
# Extract href links
links = soup.find_all('a', href=True)
# Print the links
for link in links:
if "expose" in link["href"]:
if link["href"] + "\n" not in old_flats:
print("NEW FLAT ", link["href"])
new_flats.append(link["href"])
# check immowelt
print("check immowelt")
r = requests.get(url_immowelt)
soup = BeautifulSoup(r.text, 'html.parser')
for flat in soup.find_all('a', href=True):
link = flat["href"]
if "expose" in link:
if link + "\n" not in old_flats:
print("NEW FLAT ", link)
new_flats.append(link)
# check wg-gesucht
print("check wg-gesucht")
r = requests.get(url_wg_gesucht)
soup = BeautifulSoup(r.text, 'html.parser')
regex = re.compile("\.[0-9]{3,}.html$")
for flat in soup.find_all('a', href=True):
if regex.search(flat["href"]):
link = "https://www.wg-gesucht.de/"+flat["href"]
if link + "\n" not in old_flats:
print("NEW FLAT ", link)
new_flats.append(link)
# check ebay
print("check ebay-kleinanzeigen")
driver.get(url_ebay)
html = driver.page_source.split("Alternative Anzeigen in der Umgebung")[0]
time.sleep(2)
soup = BeautifulSoup(html, 'html.parser')
for flat in soup.find_all('a', href=True):
if not ("/s-anzeige/" in flat["href"]): continue
link = "https://www.ebay-kleinanzeigen.de"+flat["href"]
if link + "\n" not in old_flats:
print("NEW FLAT ", link)
new_flats.append(link)
if(new_flats != []):
print("sending telegram msg")
telegram_send.send(messages=["Es wurden neue Wohnungen gefunden!"])
telegram_send.send(messages=new_flats)
with open(FILE_PATH, "a") as f:
for flat in new_flats:
f.writelines([flat + "\n"])
else:
print("No new flats found!!!")
driver.quit()
time.sleep(300)