-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHTML Parser.py
47 lines (40 loc) · 1.38 KB
/
HTML Parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
from html.parser import HTMLParser
# create a subclass and override the handler methods
class MyHTMLParser1(HTMLParser):
def handle_starttag(self, tag, attrs):
print("Start :", tag)
if attrs:
for attr in attrs:
print("-> " + str(attr[0]) + " > " + str(attr[1]))
def handle_endtag(self, tag):
print("End :", tag)
def handle_startendtag(self, tag, attrs):
print("Empty :", tag)
if attrs:
for attr in attrs:
print("-> " + str(attr[0]) + " > " + str(attr[1]))
class MyHTMLParser2(HTMLParser):
def handle_comment(self, data):
if '\n' in data:
print(">>> Multi-line Comment\n" + data)
else:
print(">>> Single-line Comment\n" + data)
def handle_data(self, data):
if data.strip():
print(">>> Data\n" + data)
if __name__ == '__main__':
if int(input('Parser1 or Parser2?!')) == 1:
input_html = []
parser = MyHTMLParser1() # instantiate the parser and fed it some HTML
for _ in range(int(input())):
input_html.append(input())
for i in input_html:
parser.feed(i)
else:
html = ""
for i in range(int(input())):
html += input().rstrip()
html += '\n'
parser = MyHTMLParser2()
parser.feed(html)
parser.close()