This repository was archived by the owner on Sep 14, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata.py
94 lines (66 loc) · 2.69 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# coding: utf-8
import urllib.parse
import requests
from bs4 import BeautifulSoup
from collections import namedtuple
TrainInfo = namedtuple('TrainInfo', 'departure arrival transportation')
class TrainInfoQueryBuilder(object):
__base_url = 'https://reiseauskunft.bahn.de/bin/query.exe/dn?'
def __init__(self):
# Start the search immediatly
self.params = {'start': '1'}
def with_route(self, start, destination):
start_key = 'S'
destination_key = 'Z'
self.params[start_key] = start
self.params[destination_key] = destination
return self
def with_departure_time(self, time):
self.params['time'] = time
return self
def include_regional_train(self):
self.params['REQ0JourneyProduct_prod_section_0_3'] = '1'
return self
def include_interurban_train(self):
self.params['REQ0JourneyProduct_prod_section_0_4'] = '1'
return self
def include_metro(self):
self.params['REQ0JourneyProduct_prod_section_0_7'] = '1'
return self
def include_tram(self):
self.params['REQ0JourneyProduct_prod_section_0_8'] = '1'
return self
def build(self):
return self.__base_url + urllib.parse.urlencode(self.params)
class DataFetcher(object):
def __init__(self, proxies=None, ignore_ssl=False):
self.proxies = proxies
self.ignore_ssl = ignore_ssl
if(ignore_ssl):
requests.packages.urllib3.disable_warnings() # disable ssl warning
def fetch_data(self, url):
return requests.get(url, proxies=self.proxies, verify=not self.ignore_ssl)
def _soup(content):
return BeautifulSoup(content, 'html.parser')
def create_timetable(response_html):
response = _soup(response_html)
table_top_rows = response('tr', 'firstrow')
table_bottom_rows = response('tr', 'last')
times = []
for top_row, bottom_row in zip(table_top_rows, table_bottom_rows):
departure = get_time(top_row)
arrival = get_time(bottom_row)
means_of_transportation = get_means_of_transportion(top_row)
times.append(TrainInfo(departure, arrival, means_of_transportation))
return times
def get_means_of_transportion(row_html):
row = _soup(str(row_html))
means_of_transportation = row('td', 'products')
return 'N/A' if not means_of_transportation else means_of_transportation[0].get_text().replace('\n', '')
def get_time(row_html):
row = _soup(str(row_html))
return row('td', 'time')[0].get_text().replace('\n', '')
def get_timetable(url, proxies=None, ignore_ssl=False):
fetcher = DataFetcher(proxies, ignore_ssl)
response = fetcher.fetch_data(url)
return create_timetable(response.text)