Skip to content

Commit

Permalink
strip common punct from around nums
Browse files Browse the repository at this point in the history
  • Loading branch information
longhotsummer committed Feb 10, 2025
1 parent 90bae07 commit 29d3d1f
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 1 deletion.
6 changes: 5 additions & 1 deletion indigo/analysis/refs/provisions.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,16 +320,20 @@ def find_numbered_hier_element(self, root: Element, names: Optional[List[str]],
names = [f'{{{ns}}}{n}' for n in names]
dead_ends = [f'{{{ns}}}{n}' for n in ['quotedStructure', 'embeddedStructure', 'content']]
not_outside_of = None if not_outside_of is None else [f'{{{ns}}}{n}' for n in not_outside_of]
clean_num = self.clean_num(num)

# do a breadth-first search, starting at root, and walk upwards, expanding until we find something or reach the top
for elem in bfs_upward_search(root, names, dead_ends, not_outside_of):
# ignore matches to the root element, which avoids things like section (1)(a)(a) matching the same (a) twice
if elem == root:
continue
num_elem = elem.find('a:num', {'a': ns})
if num_elem is not None and num_elem.text.rstrip(".") == num:
if num_elem is not None and self.clean_num(num_elem.text) == clean_num:
return elem

def clean_num(self, num):
return num.strip("()").rstrip(".º")


class ProvisionRefsMatcher(CitationMatcher):
""" Finds internal references to sections in documents, of the form:
Expand Down
75 changes: 75 additions & 0 deletions indigo/tests/test_provision_refs.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,81 @@ def test_local_sections_synonyms(self):
etree.tostring(actual, encoding='unicode')
)

def test_num_punct(self):
# nums can be surrounded by () or have . or º at the end
doc = AkomaNtosoDocument(document_fixture(xml="""
<section eId="sec_7">
<num>7.</num>
<heading>Section 7</heading>
<content>
<p>As given in section 26, blah.</p>
<p>As given in section 26(a), blah.</p>
<p>As given in section 26(a)(1)(iii), blah.</p>
<p>As given in section 26B, blah.</p>
</content>
</section>
<section eId="sec_26">
<num>26.</num>
<heading>Important heading</heading>
<subsection eId="sec_26__subsec_a">
<num>a)</num>
<paragraph eId="sec_26__subsec_a__para_1">
<num>1)</num>
</paragraph>
<paragraph eId="sec_26__subsec_a__para_2">
<num>(2)</num>
</paragraph>
</subsection>
</section>
<section eId="sec_26B">
<num>26Bº</num>
<heading>Another important heading</heading>
<content>
<p>Another important provision.</p>
</content>
</section>
"""))

expected = AkomaNtosoDocument(document_fixture(xml="""
<section eId="sec_7">
<num>7.</num>
<heading>Section 7</heading>
<content>
<p>As given in section <ref href="#sec_26">26</ref>, blah.</p>
<p>As given in section <ref href="#sec_26__subsec_a">26(a)</ref>, blah.</p>
<p>As given in section <ref href="#sec_26__subsec_a__para_1">26(a)(1)</ref>(iii), blah.</p>
<p>As given in section <ref href="#sec_26B">26B</ref>, blah.</p>
</content>
</section>
<section eId="sec_26">
<num>26.</num>
<heading>Important heading</heading>
<subsection eId="sec_26__subsec_a">
<num>a)</num>
<paragraph eId="sec_26__subsec_a__para_1">
<num>1)</num>
</paragraph>
<paragraph eId="sec_26__subsec_a__para_2">
<num>(2)</num>
</paragraph>
</subsection>
</section>
<section eId="sec_26B">
<num>26Bº</num>
<heading>Another important heading</heading>
<content>
<p>Another important provision.</p>
</content>
</section>
"""))

actual = etree.fromstring(doc.to_xml())
self.finder.markup_xml_matches(self.frbr_uri, actual)
self.assertEqual(
expected.to_xml(encoding='unicode'),
etree.tostring(actual, encoding='unicode')
)

def test_local_ambiugous_levels(self):
doc = AkomaNtosoDocument(document_fixture(xml="""
<section eId="sec_7">
Expand Down

0 comments on commit 29d3d1f

Please sign in to comment.