From 298e388e942ea861ba46f1b3b9d1c56aacad624a Mon Sep 17 00:00:00 2001 From: grossir Date: Wed, 10 Jan 2024 18:18:12 +0000 Subject: [PATCH] =?UTF-8?q?Deploying=20to=20gh-pages=20from=20@=20freelawp?= =?UTF-8?q?roject/eyecite@a759aac298c1a19a9312c6bf66567a90cbc6d669=20?= =?UTF-8?q?=F0=9F=9A=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- models.html | 364 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 231 insertions(+), 133 deletions(-) diff --git a/models.html b/models.html index 9c5d9d6..5685a55 100644 --- a/models.html +++ b/models.html @@ -28,7 +28,7 @@

Module eyecite.models

import re
 from collections import UserString
-from dataclasses import dataclass, field
+from dataclasses import asdict, dataclass, field
 from datetime import datetime
 from typing import (
     Any,
@@ -43,7 +43,7 @@ 

Module eyecite.models

cast, ) -from eyecite.utils import HashableDict +from eyecite.utils import hash_sha256 ResourceType = Hashable @@ -88,7 +88,7 @@

Module eyecite.models

) -@dataclass(eq=True, unsafe_hash=True) +@dataclass(eq=False, unsafe_hash=False) class CitationBase: """Base class for objects returned by `eyecite.find.get_citations`. We define several subclasses of this class below, representing the various @@ -107,7 +107,7 @@

Module eyecite.models

def __post_init__(self): """Set up groups and metadata.""" # Allow groups to be used in comparisons: - self.groups = HashableDict(self.token.groups) + self.groups = self.token.groups # Make metadata a self.Metadata object: self.metadata = ( self.Metadata(**self.metadata) @@ -129,21 +129,52 @@

Module eyecite.models

+ ")" ) + def __hash__(self) -> int: + """In general, citations are considered equivalent if they have the + same group values (i.e., the same regex group content that is extracted + from the matched text). Subclasses may override this method in order to + specify equivalence behavior that is more appropriate for certain + kinds of citations (e.g., see CaseCitation override). + + self.groups typically contains different keys for different objects: + + FullLawCitation (non-exhaustive and non-guaranteed): + - chapter + - reporter + - law_section + - issue + - page + - docket_number + - pamphlet + - title + + FullJournalCitation (non-exhaustive and non-guaranteed): + - volume + - reporter + - page + + FullCaseCitation (see CaseCitation.__hash__() notes) + """ + return hash( + hash_sha256( + {**dict(self.groups.items()), **{"class": type(self).__name__}} + ) + ) + + def __eq__(self, other): + """This method is inherited by all subclasses and should not be + overridden. It implements object equality in exactly the same way as + defined in an object's __hash__() function, which should be overridden + instead if desired. + """ + return self.__hash__() == other.__hash__() + @dataclass(eq=True, unsafe_hash=True) class Metadata: """Define fields on self.metadata.""" parenthetical: Optional[str] = None - def comparison_hash(self) -> int: - """Return hash that will be the same if two cites are semantically - equivalent, unless the citation is a CaseCitation missing a page. - """ - if isinstance(self, CaseCitation) and self.groups["page"] is None: - return id(self) - else: - return hash((type(self), tuple(self.groups.items()))) - def corrected_citation(self): """Return citation with any variations normalized.""" return self.matched_text() @@ -198,7 +229,7 @@

Module eyecite.models

return start, end -@dataclass(eq=True, unsafe_hash=True, repr=False) +@dataclass(eq=False, unsafe_hash=False, repr=False) class ResourceCitation(CitationBase): """Base class for a case, law, or journal citation. Could be short or long.""" @@ -222,6 +253,26 @@

Module eyecite.models

) super().__post_init__() + def __hash__(self) -> int: + """ResourceCitation objects are hashed in the same way as their + parent class (CitationBase) objects, except that we also take into + consideration the all_editions field. + """ + return hash( + hash_sha256( + { + **dict(self.groups.items()), + **{ + "all_editions": sorted( + [asdict(e) for e in self.all_editions], + key=lambda d: d["short_name"], # type: ignore + ), + "class": type(self).__name__, + }, + } + ) + ) + @dataclass(eq=True, unsafe_hash=True) class Metadata(CitationBase.Metadata): """Define fields on self.metadata.""" @@ -229,11 +280,6 @@

Module eyecite.models

pin_cite: Optional[str] = None year: Optional[str] = None - def comparison_hash(self) -> int: - """Return hash that will be the same if two cites are semantically - equivalent.""" - return hash((super().comparison_hash(), self.all_editions)) - def add_metadata(self, words: "Tokens"): """Extract metadata from text before and after citation.""" self.guess_edition() @@ -276,13 +322,13 @@

Module eyecite.models

self.edition_guess = editions[0] -@dataclass(eq=True, unsafe_hash=True, repr=False) +@dataclass(eq=False, unsafe_hash=False, repr=False) class FullCitation(ResourceCitation): """Abstract base class indicating that a citation fully identifies a resource.""" -@dataclass(eq=True, unsafe_hash=True, repr=False) +@dataclass(eq=False, unsafe_hash=False, repr=False) class FullLawCitation(FullCitation): """Citation to a source from `reporters_db/laws.json`.""" @@ -319,7 +365,7 @@

Module eyecite.models

return "".join(parts) -@dataclass(eq=True, unsafe_hash=True, repr=False) +@dataclass(eq=False, unsafe_hash=False, repr=False) class FullJournalCitation(FullCitation): """Citation to a source from `reporters_db/journals.json`.""" @@ -345,12 +391,43 @@

Module eyecite.models

return "".join(parts) -@dataclass(eq=True, unsafe_hash=True, repr=False) +@dataclass(eq=False, unsafe_hash=False, repr=False) class CaseCitation(ResourceCitation): """Convenience class which represents a single citation found in a document. """ + def __hash__(self) -> int: + """CaseCitation objects that have the same volume, reporter, and page + are considered equivalent, unless the citation is missing a page, in + which case the object's hash will be unique for safety. + + self.groups for CaseCitation objects usually contains these keys: + - page (guaranteed here: https://github.com/freelawproject/reporters-db/blob/main/tests.py#L129) # noqa: E501 + - reporter (guaranteed here: https://github.com/freelawproject/reporters-db/blob/main/tests.py#L129) # noqa: E501 + - volume (almost always present, but some tax court citations don't have volumes) # noqa: E501 + - reporter_nominative (sometimes) + - volumes_nominative (sometimes) + """ + if self.groups["page"] is None: + return id(self) + else: + return hash( + hash_sha256( + { + **{ + k: self.groups[k] + for k in ["volume", "page"] + if k in self.groups + }, + **{ + "reporter": self.corrected_reporter(), + "class": type(self).__name__, + }, + } + ) + ) + @dataclass(eq=True, unsafe_hash=True) class Metadata(FullCitation.Metadata): """Define fields on self.metadata.""" @@ -367,7 +444,7 @@

Module eyecite.models

self.metadata.court = "scotus" -@dataclass(eq=True, unsafe_hash=True, repr=False) +@dataclass(eq=False, unsafe_hash=False, repr=False) class FullCaseCitation(CaseCitation, FullCitation): """Convenience class which represents a standard, fully named citation, i.e., the kind of citation that marks the first time a document is cited. @@ -417,7 +494,7 @@

Module eyecite.models

return "".join(parts) -@dataclass(eq=True, unsafe_hash=True, repr=False) +@dataclass(eq=False, unsafe_hash=False, repr=False) class ShortCaseCitation(CaseCitation): """Convenience class which represents a short form citation, i.e., the kind of citation made after a full citation has already appeared. This kind of @@ -447,7 +524,7 @@

Module eyecite.models

return "".join(parts) -@dataclass(eq=True, unsafe_hash=True, repr=False) +@dataclass(eq=False, unsafe_hash=False, repr=False) class SupraCitation(CitationBase): """Convenience class which represents a 'supra' citation, i.e., a citation to something that is above in the document. Like a short form citation, @@ -486,7 +563,7 @@

Module eyecite.models

return "".join(parts) -@dataclass(eq=True, unsafe_hash=True, repr=False) +@dataclass(eq=False, unsafe_hash=False, repr=False) class IdCitation(CitationBase): """Convenience class which represents an 'id' or 'ibid' citation, i.e., a citation to the document referenced immediately prior. An 'id' citation is @@ -497,6 +574,10 @@

Module eyecite.models

Example: "... foo bar," id., at 240 """ + def __hash__(self) -> int: + """IdCitation objects are always considered unique for safety.""" + return id(self) + @dataclass(eq=True, unsafe_hash=True) class Metadata(CitationBase.Metadata): """Define fields on self.metadata.""" @@ -511,7 +592,7 @@

Module eyecite.models

return "".join(parts) -@dataclass(eq=True, unsafe_hash=True, repr=False) +@dataclass(eq=False, unsafe_hash=False, repr=False) class UnknownCitation(CitationBase): """Convenience class which represents an unknown citation. A recognized citation should theoretically be parsed as a CaseCitation, FullLawCitation, @@ -519,16 +600,9 @@

Module eyecite.models

a naive catch-all. """ - -def NonopinionCitation(*args, **kwargs): - from warnings import warn - - warn( - """NonopinionCitation will be deprecated in eyecite 2.5.0. - Please use UnknownCitation instead.""", - DeprecationWarning, - ) - return UnknownCitation(*args, **kwargs) + def __hash__(self) -> int: + """UnknownCitation objects are always considered unique for safety.""" + return id(self) @dataclass(eq=True, unsafe_hash=True) @@ -675,13 +749,20 @@

Module eyecite.models

def __hash__(self): """Resources are the same if their citations are semantically - equivalent. + equivalent, as defined by their hash function. Note: Resources composed of citations with missing page numbers are NOT considered the same, even if their other attributes are identical. This is to avoid potential false positives. """ - return self.citation.comparison_hash() + return hash( + hash_sha256( + { + "citation": hash(self.citation), + "class": type(self).__name__, + } + ) + ) def __eq__(self, other): return self.__hash__() == other.__hash__()
@@ -692,29 +773,6 @@

Module eyecite.models

-

Functions

-
-
-def NonopinionCitation(*args, **kwargs) -
-
-
-
- -Expand source code - -
def NonopinionCitation(*args, **kwargs):
-    from warnings import warn
-
-    warn(
-        """NonopinionCitation will be deprecated in eyecite 2.5.0.
-        Please use UnknownCitation instead.""",
-        DeprecationWarning,
-    )
-    return UnknownCitation(*args, **kwargs)
-
-
-

Classes

@@ -735,6 +793,37 @@

Classes

document. """ + def __hash__(self) -> int: + """CaseCitation objects that have the same volume, reporter, and page + are considered equivalent, unless the citation is missing a page, in + which case the object's hash will be unique for safety. + + self.groups for CaseCitation objects usually contains these keys: + - page (guaranteed here: https://github.com/freelawproject/reporters-db/blob/main/tests.py#L129) # noqa: E501 + - reporter (guaranteed here: https://github.com/freelawproject/reporters-db/blob/main/tests.py#L129) # noqa: E501 + - volume (almost always present, but some tax court citations don't have volumes) # noqa: E501 + - reporter_nominative (sometimes) + - volumes_nominative (sometimes) + """ + if self.groups["page"] is None: + return id(self) + else: + return hash( + hash_sha256( + { + **{ + k: self.groups[k] + for k in ["volume", "page"] + if k in self.groups + }, + **{ + "reporter": self.corrected_reporter(), + "class": type(self).__name__, + }, + } + ) + ) + @dataclass(eq=True, unsafe_hash=True) class Metadata(FullCitation.Metadata): """Define fields on self.metadata.""" @@ -809,7 +898,6 @@

Inherited members