Skip to content

Commit

Permalink
💄 - Black dom_parser
Browse files Browse the repository at this point in the history
  • Loading branch information
drinfernoo committed Nov 24, 2022
1 parent 2ea807a commit b4697ff
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 19 deletions.
11 changes: 2 additions & 9 deletions providerModules/a4kOfficial/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,21 +47,14 @@ def check_url(url):

def get_all_relative_py_files(file):
files = os.listdir(os.path.dirname(file))
return [
filename[:-3]
for filename in files
if not filename.startswith("__") and filename.endswith(".py")
]
return [filename[:-3] for filename in files if not filename.startswith("__") and filename.endswith(".py")]


def parseDOM(html, name="", attrs=None, ret=False):
if attrs:
import re

attrs = dict(
(key, re.compile(value + ("$" if value else "")))
for key, value in attrs.items()
)
attrs = dict((key, re.compile(value + ("$" if value else ""))) for key, value in attrs.items())
from providerModules.a4kOfficial import dom_parser

results = dom_parser.parse_dom(html, name, attrs, ret)
Expand Down
27 changes: 17 additions & 10 deletions providerModules/a4kOfficial/dom_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,13 @@


def __get_dom_content(html, name, match):
if match.endswith('/>'): return ''
if match.endswith('/>'):
return ''

# override tag name with tag from match if possible
tag = re.match('<([^\s/>]+)', match)
if tag: name = tag.group(1)
if tag:
name = tag.group(1)

start_str = '<%s' % name
end_str = "</%s" % name
Expand All @@ -48,11 +50,11 @@ def __get_dom_content(html, name, match):
if start == -1 and end == -1:
result = ''
elif start > -1 and end > -1:
result = html[start + len(match):end]
result = html[start + len(match) : end]
elif end > -1:
result = html[:end]
elif start > -1:
result = html[start + len(match):]
result = html[start + len(match) :]
else:
result = ''

Expand Down Expand Up @@ -97,22 +99,26 @@ def __get_dom_elements(item, name, attrs):

def __get_attribs(element):
attribs = {}
for match in re.finditer('''\s+(?P<key>[^=]+)=\s*(?:(?P<delim>["'])(?P<value1>.*?)(?P=delim)|(?P<value2>[^"'][^>\s]*))''', element):
for match in re.finditer(
'''\s+(?P<key>[^=]+)=\s*(?:(?P<delim>["'])(?P<value1>.*?)(?P=delim)|(?P<value2>[^"'][^>\s]*))''', element
):
match = match.groupdict()
value1 = match.get('value1')
value2 = match.get('value2')
value = value1 if value1 is not None else value2
if value is None: continue
if value is None:
continue
attribs[match['key'].lower().strip()] = value
return attribs


def parse_dom(html, name='', attrs=None, req=False, exclude_comments=False):
if attrs is None: attrs = {}
if attrs is None:
attrs = {}
name = name.strip()
if isinstance(html, str) or isinstance(html, DomMatch):
html = [html]
elif isinstance(html, bytes):# and six.PY2:
elif isinstance(html, bytes): # and six.PY2:
try:
html = [html.decode("utf-8")] # Replace with chardet thingy
except:
Expand Down Expand Up @@ -145,10 +151,11 @@ def parse_dom(html, name='', attrs=None, req=False, exclude_comments=False):
results = []
for element in __get_dom_elements(item, name, attrs):
attribs = __get_attribs(element)
if req and not req <= set(attribs.keys()): continue
if req and not req <= set(attribs.keys()):
continue
temp = __get_dom_content(item, name, element).strip()
results.append(DomMatch(attribs, temp))
item = item[item.find(temp, item.find(element)):]
item = item[item.find(temp, item.find(element)) :]
all_results += results

return all_results

0 comments on commit b4697ff

Please sign in to comment.