Skip to content

Commit

Permalink
fix: id with weird path should also be extracted
Browse files Browse the repository at this point in the history
  • Loading branch information
l3d00m committed Sep 8, 2024
1 parent c33a2d6 commit b91b53a
Show file tree
Hide file tree
Showing 3 changed files with 3 additions and 4 deletions.
3 changes: 0 additions & 3 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,3 @@ max-line-length = 160
# Define setup.py command aliases here
test = pytest

[tool:pytest]
collect_ignore = ['setup.py']

2 changes: 1 addition & 1 deletion sked_parser/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def create_id(sked_path, faculty_short, current_sem_str, extracted_semester):
# Unqoute the URL first
sked_path = unquote(sked_path)
# Get a basic id from the url page, which is the last part excluding the .extension
id_re = re.compile(r'\w/.+/(.+?)\.+(html|csv)', re.IGNORECASE)
id_re = re.compile(r'\w/(?:.*/)?(.+?)\.+(html|csv)', re.IGNORECASE)
m = id_re.search(sked_path)
if not m:
raise Exception(f"Path {sked_path} did not match to ID regex, so we can't extract an ID")
Expand Down
2 changes: 2 additions & 0 deletions tests/test_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,8 @@ def sked_path(part_str): return f"e/semester/{part_str}.html"
# Mutliple special chars
in_str = "b-.-eit"
assert create_id(sked_path(in_str), faculty_short, current_sem_str, extracted_semester) == "e_b_eit_1_ws"
# Special URL for faculty E
assert create_id("e/E-IST.html", faculty_short, current_sem_str, extracted_semester) == "e_ist_1_ws"
# Complicated semester specification
in_str = "PSA_M_1. Semester_Schwerpunkt"
assert create_id(sked_path(in_str), faculty_short, current_sem_str,
Expand Down

0 comments on commit b91b53a

Please sign in to comment.