diff --git a/models.html b/models.html index 9c5d9d6..5685a55 100644 --- a/models.html +++ b/models.html @@ -28,7 +28,7 @@
eyecite.models
import re
from collections import UserString
-from dataclasses import dataclass, field
+from dataclasses import asdict, dataclass, field
from datetime import datetime
from typing import (
Any,
@@ -43,7 +43,7 @@ Module eyecite.models
cast,
)
-from eyecite.utils import HashableDict
+from eyecite.utils import hash_sha256
ResourceType = Hashable
@@ -88,7 +88,7 @@ Module eyecite.models
)
-@dataclass(eq=True, unsafe_hash=True)
+@dataclass(eq=False, unsafe_hash=False)
class CitationBase:
"""Base class for objects returned by `eyecite.find.get_citations`. We
define several subclasses of this class below, representing the various
@@ -107,7 +107,7 @@ Module eyecite.models
def __post_init__(self):
"""Set up groups and metadata."""
# Allow groups to be used in comparisons:
- self.groups = HashableDict(self.token.groups)
+ self.groups = self.token.groups
# Make metadata a self.Metadata object:
self.metadata = (
self.Metadata(**self.metadata)
@@ -129,21 +129,52 @@ Module eyecite.models
+ ")"
)
+ def __hash__(self) -> int:
+ """In general, citations are considered equivalent if they have the
+ same group values (i.e., the same regex group content that is extracted
+ from the matched text). Subclasses may override this method in order to
+ specify equivalence behavior that is more appropriate for certain
+ kinds of citations (e.g., see CaseCitation override).
+
+ self.groups typically contains different keys for different objects:
+
+ FullLawCitation (non-exhaustive and non-guaranteed):
+ - chapter
+ - reporter
+ - law_section
+ - issue
+ - page
+ - docket_number
+ - pamphlet
+ - title
+
+ FullJournalCitation (non-exhaustive and non-guaranteed):
+ - volume
+ - reporter
+ - page
+
+ FullCaseCitation (see CaseCitation.__hash__() notes)
+ """
+ return hash(
+ hash_sha256(
+ {**dict(self.groups.items()), **{"class": type(self).__name__}}
+ )
+ )
+
+ def __eq__(self, other):
+ """This method is inherited by all subclasses and should not be
+ overridden. It implements object equality in exactly the same way as
+ defined in an object's __hash__() function, which should be overridden
+ instead if desired.
+ """
+ return self.__hash__() == other.__hash__()
+
@dataclass(eq=True, unsafe_hash=True)
class Metadata:
"""Define fields on self.metadata."""
parenthetical: Optional[str] = None
- def comparison_hash(self) -> int:
- """Return hash that will be the same if two cites are semantically
- equivalent, unless the citation is a CaseCitation missing a page.
- """
- if isinstance(self, CaseCitation) and self.groups["page"] is None:
- return id(self)
- else:
- return hash((type(self), tuple(self.groups.items())))
-
def corrected_citation(self):
"""Return citation with any variations normalized."""
return self.matched_text()
@@ -198,7 +229,7 @@ Module eyecite.models
return start, end
-@dataclass(eq=True, unsafe_hash=True, repr=False)
+@dataclass(eq=False, unsafe_hash=False, repr=False)
class ResourceCitation(CitationBase):
"""Base class for a case, law, or journal citation. Could be short or
long."""
@@ -222,6 +253,26 @@ Module eyecite.models
)
super().__post_init__()
+ def __hash__(self) -> int:
+ """ResourceCitation objects are hashed in the same way as their
+ parent class (CitationBase) objects, except that we also take into
+ consideration the all_editions field.
+ """
+ return hash(
+ hash_sha256(
+ {
+ **dict(self.groups.items()),
+ **{
+ "all_editions": sorted(
+ [asdict(e) for e in self.all_editions],
+ key=lambda d: d["short_name"], # type: ignore
+ ),
+ "class": type(self).__name__,
+ },
+ }
+ )
+ )
+
@dataclass(eq=True, unsafe_hash=True)
class Metadata(CitationBase.Metadata):
"""Define fields on self.metadata."""
@@ -229,11 +280,6 @@ Module eyecite.models
pin_cite: Optional[str] = None
year: Optional[str] = None
- def comparison_hash(self) -> int:
- """Return hash that will be the same if two cites are semantically
- equivalent."""
- return hash((super().comparison_hash(), self.all_editions))
-
def add_metadata(self, words: "Tokens"):
"""Extract metadata from text before and after citation."""
self.guess_edition()
@@ -276,13 +322,13 @@ Module eyecite.models
self.edition_guess = editions[0]
-@dataclass(eq=True, unsafe_hash=True, repr=False)
+@dataclass(eq=False, unsafe_hash=False, repr=False)
class FullCitation(ResourceCitation):
"""Abstract base class indicating that a citation fully identifies a
resource."""
-@dataclass(eq=True, unsafe_hash=True, repr=False)
+@dataclass(eq=False, unsafe_hash=False, repr=False)
class FullLawCitation(FullCitation):
"""Citation to a source from `reporters_db/laws.json`."""
@@ -319,7 +365,7 @@ Module eyecite.models
return "".join(parts)
-@dataclass(eq=True, unsafe_hash=True, repr=False)
+@dataclass(eq=False, unsafe_hash=False, repr=False)
class FullJournalCitation(FullCitation):
"""Citation to a source from `reporters_db/journals.json`."""
@@ -345,12 +391,43 @@ Module eyecite.models
return "".join(parts)
-@dataclass(eq=True, unsafe_hash=True, repr=False)
+@dataclass(eq=False, unsafe_hash=False, repr=False)
class CaseCitation(ResourceCitation):
"""Convenience class which represents a single citation found in a
document.
"""
+ def __hash__(self) -> int:
+ """CaseCitation objects that have the same volume, reporter, and page
+ are considered equivalent, unless the citation is missing a page, in
+ which case the object's hash will be unique for safety.
+
+ self.groups for CaseCitation objects usually contains these keys:
+ - page (guaranteed here: https://github.com/freelawproject/reporters-db/blob/main/tests.py#L129) # noqa: E501
+ - reporter (guaranteed here: https://github.com/freelawproject/reporters-db/blob/main/tests.py#L129) # noqa: E501
+ - volume (almost always present, but some tax court citations don't have volumes) # noqa: E501
+ - reporter_nominative (sometimes)
+ - volumes_nominative (sometimes)
+ """
+ if self.groups["page"] is None:
+ return id(self)
+ else:
+ return hash(
+ hash_sha256(
+ {
+ **{
+ k: self.groups[k]
+ for k in ["volume", "page"]
+ if k in self.groups
+ },
+ **{
+ "reporter": self.corrected_reporter(),
+ "class": type(self).__name__,
+ },
+ }
+ )
+ )
+
@dataclass(eq=True, unsafe_hash=True)
class Metadata(FullCitation.Metadata):
"""Define fields on self.metadata."""
@@ -367,7 +444,7 @@ Module eyecite.models
self.metadata.court = "scotus"
-@dataclass(eq=True, unsafe_hash=True, repr=False)
+@dataclass(eq=False, unsafe_hash=False, repr=False)
class FullCaseCitation(CaseCitation, FullCitation):
"""Convenience class which represents a standard, fully named citation,
i.e., the kind of citation that marks the first time a document is cited.
@@ -417,7 +494,7 @@ Module eyecite.models
return "".join(parts)
-@dataclass(eq=True, unsafe_hash=True, repr=False)
+@dataclass(eq=False, unsafe_hash=False, repr=False)
class ShortCaseCitation(CaseCitation):
"""Convenience class which represents a short form citation, i.e., the kind
of citation made after a full citation has already appeared. This kind of
@@ -447,7 +524,7 @@ Module eyecite.models
return "".join(parts)
-@dataclass(eq=True, unsafe_hash=True, repr=False)
+@dataclass(eq=False, unsafe_hash=False, repr=False)
class SupraCitation(CitationBase):
"""Convenience class which represents a 'supra' citation, i.e., a citation
to something that is above in the document. Like a short form citation,
@@ -486,7 +563,7 @@ Module eyecite.models
return "".join(parts)
-@dataclass(eq=True, unsafe_hash=True, repr=False)
+@dataclass(eq=False, unsafe_hash=False, repr=False)
class IdCitation(CitationBase):
"""Convenience class which represents an 'id' or 'ibid' citation, i.e., a
citation to the document referenced immediately prior. An 'id' citation is
@@ -497,6 +574,10 @@ Module eyecite.models
Example: "... foo bar," id., at 240
"""
+ def __hash__(self) -> int:
+ """IdCitation objects are always considered unique for safety."""
+ return id(self)
+
@dataclass(eq=True, unsafe_hash=True)
class Metadata(CitationBase.Metadata):
"""Define fields on self.metadata."""
@@ -511,7 +592,7 @@ Module eyecite.models
return "".join(parts)
-@dataclass(eq=True, unsafe_hash=True, repr=False)
+@dataclass(eq=False, unsafe_hash=False, repr=False)
class UnknownCitation(CitationBase):
"""Convenience class which represents an unknown citation. A recognized
citation should theoretically be parsed as a CaseCitation, FullLawCitation,
@@ -519,16 +600,9 @@ Module eyecite.models
a naive catch-all.
"""
-
-def NonopinionCitation(*args, **kwargs):
- from warnings import warn
-
- warn(
- """NonopinionCitation will be deprecated in eyecite 2.5.0.
- Please use UnknownCitation instead.""",
- DeprecationWarning,
- )
- return UnknownCitation(*args, **kwargs)
+ def __hash__(self) -> int:
+ """UnknownCitation objects are always considered unique for safety."""
+ return id(self)
@dataclass(eq=True, unsafe_hash=True)
@@ -675,13 +749,20 @@ Module eyecite.models
def __hash__(self):
"""Resources are the same if their citations are semantically
- equivalent.
+ equivalent, as defined by their hash function.
Note: Resources composed of citations with missing page numbers are
NOT considered the same, even if their other attributes are identical.
This is to avoid potential false positives.
"""
- return self.citation.comparison_hash()
+ return hash(
+ hash_sha256(
+ {
+ "citation": hash(self.citation),
+ "class": type(self).__name__,
+ }
+ )
+ )
def __eq__(self, other):
return self.__hash__() == other.__hash__()
@@ -692,29 +773,6 @@ eyecite.models
-def NonopinionCitation(*args, **kwargs)
-
def NonopinionCitation(*args, **kwargs):
- from warnings import warn
-
- warn(
- """NonopinionCitation will be deprecated in eyecite 2.5.0.
- Please use UnknownCitation instead.""",
- DeprecationWarning,
- )
- return UnknownCitation(*args, **kwargs)
-Metadata
add_metadata
comparison_hash
corrected_citation
corrected_citation_full
corrected_reporter
-def comparison_hash(self) ‑> int
-
Return hash that will be the same if two cites are semantically -equivalent, unless the citation is a CaseCitation missing a page.
def comparison_hash(self) -> int:
- """Return hash that will be the same if two cites are semantically
- equivalent, unless the citation is a CaseCitation missing a page.
- """
- if isinstance(self, CaseCitation) and self.groups["page"] is None:
- return id(self)
- else:
- return hash((type(self), tuple(self.groups.items())))
-
def corrected_citation(self)
Metadata
add_metadata
comparison_hash
corrected_citation
corrected_reporter
dump
Metadata
add_metadata
comparison_hash
corrected_citation
corrected_citation_full
corrected_reporter
Metadata
add_metadata
comparison_hash
corrected_citation
corrected_citation_full
corrected_reporter
Metadata
add_metadata
comparison_hash
corrected_citation
corrected_citation_full
corrected_reporter
CitationBase
:
Metadata
comparison_hash
corrected_citation
corrected_citation_full
dump
-def comparison_hash(self) ‑> int
-
Return hash that will be the same if two cites are semantically -equivalent.
def comparison_hash(self) -> int:
- """Return hash that will be the same if two cites are semantically
- equivalent."""
- return hash((super().comparison_hash(), self.all_editions))
-
def corrected_citation(self)
Metadata
add_metadata
comparison_hash
corrected_citation
corrected_reporter
dump
CitationBase
:
Metadata
comparison_hash
corrected_citation
corrected_citation_full
dump
CitationBase
:
Metadata
comparison_hash
corrected_citation
corrected_citation_full
dump
eyecite