Skip to content

Commit 6e87524

Browse files
Merge pull request #349 from vss-2/issue-348
[python] Fix missing html5lib dependency #348
2 parents c7376ed + f6fd5af commit 6e87524

File tree

4 files changed

+282
-186
lines changed

4 files changed

+282
-186
lines changed

python-package/geobr/list_geobr.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import pandas as pd
33
from io import StringIO
44
from urllib.error import HTTPError
5-
5+
import re
66

77
def list_geobr():
88
"""Prints available functions, according to latest README.md file
@@ -19,8 +19,12 @@ def list_geobr():
1919

2020
try:
2121
html_data = get("https://github.com/ipeaGIT/geobr/blob/master/README.md").text
22-
23-
df = pd.read_html(StringIO(html_data))[1]
22+
find_emoji = html_data.index("👉")
23+
html_data = html_data[find_emoji:]
24+
escaped_data = html_data.replace("\\u003c", "<").replace("\\u003e", ">")
25+
tables = re.findall("<table>(.+?)</table>", escaped_data)
26+
available_datasets = "<table>" + tables[0].replace("\\n", "") + "</table>"
27+
df = pd.DataFrame(pd.read_html(StringIO(available_datasets))[0])
2428

2529
except HTTPError:
2630
print(

0 commit comments

Comments
 (0)