-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathutils.py
178 lines (131 loc) · 4.73 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
from __future__ import annotations
import re
import statistics
from typing import TYPE_CHECKING, Any, Type, Union
import en_core_web_sm
import pandas as pd
from google.generativeai.types.discuss_types import ChatResponse
from openai.openai_object import OpenAIObject
from pandas.core.frame import DataFrame
from titlecase import titlecase
from mappings import CAP_CIRCUIT_MAPPING, STATE_SLUG_MAPPING
from settings import SCDB_JUSTICE_MAPPING_PATH
SPACY_NLP = en_core_web_sm.load()
###################################
# Types
###################################
if TYPE_CHECKING:
from api import APIBackend
from models import CourtCase
AnswerType = dict[str, str]
APIBackendType = Type[APIBackend]
APIResponseObjectType = Union[OpenAIObject, ChatResponse]
###################################
# General utility functions
###################################
def print_progress(iteration: int, total_iterations: int, prefix: str = "") -> None:
iteration = iteration + 1
percent: str = ("{0:.1f}").format(100 * (iteration / float(total_iterations)))
filled: int = int(50 * iteration // total_iterations)
bar: str = "█" * filled + "-" * (50 - filled)
end = "\n" if iteration == total_iterations else " "
print(f"\r{prefix} Progress: |{bar}| {percent}% complete.", end=end, flush=True)
def rstrip_period(s: str) -> str:
return s.rstrip(".")
def titlecase_callback(word, **kwargs) -> str | None:
if word in ["v.", "vs.", "et", "al."]:
return word.lower()
else:
return None
def format_case_name(case_name: str) -> str:
case_name = titlecase(case_name.lower(), callback=titlecase_callback)
case_name = re.sub(r"\.\sV\.\s", ". v. ", case_name)
case_name = re.sub(r"\.\sVs\.\s", ". vs. ", case_name)
return case_name
def get_greedy_log_probs(greedy_response_object: OpenAIObject) -> float:
return statistics.fmean(
greedy_response_object.choices[0]["logprobs"]["token_logprobs"]
)
def agreement_cutoff(score: str) -> int:
return 0 if int(float(score)) <= 70 else 100
def noop(x: Any) -> Any:
return x
###################################
# Helpers to prepare SCOTUS, COA, USDC data
###################################
SCDB_JUSTICE_MAPPING: DataFrame = pd.read_csv(
SCDB_JUSTICE_MAPPING_PATH, index_col=False
)
SCDB_JUSTICE_MAPPING.scdb_id.astype("int")
def get_judge_name_from_scdb_id(scdb_id: int | str | None) -> dict[str, str]:
if type(scdb_id) is int:
lookup: DataFrame = SCDB_JUSTICE_MAPPING.loc[
SCDB_JUSTICE_MAPPING["scdb_id"] == scdb_id
]
if len(lookup) == 1:
return {
"first_name": lookup.iloc[0]["first"],
"last_name": lookup.iloc[0]["last"],
}
else:
raise Warning(f"Justice ID not in SCDB_JUSTICE_MAPPING: {scdb_id}")
else:
raise TypeError
def get_disposition_from_scdb_id(id: int | None) -> str:
if id in [2]:
return "affirm"
elif id in [3, 4, 5, 8]:
return "reverse"
else:
return ""
def get_disposition_from_songer_id(id: int | None) -> int | None:
if id in [1]:
return 1
elif id in [2, 3, 4, 7]:
return 0
else:
return None
def get_case_citation_for_scotus_case(case: CourtCase) -> str:
if case.us_citation and not pd.isna(case.us_citation):
return case.us_citation
elif case.sct_citation and not pd.isna(case.sct_citation):
return case.sct_citation
else:
raise KeyError
def get_circuit_from_cap_id(id: int) -> int:
try:
return CAP_CIRCUIT_MAPPING[id]
except KeyError as e:
return 99
def get_state_from_cap_slug(slug: str) -> str:
try:
return STATE_SLUG_MAPPING[slug]
except KeyError:
return STATE_SLUG_MAPPING["misc"]
except:
raise
def get_majority_author_from_cap_dict(case: dict) -> str | None:
for o in case["data"]["opinions"]:
if o["type"] == "majority" and o["author"]:
return o["author"]
elif o["type"] == "rehearing" and o["author"]:
return o["author"]
return None
def get_majority_opinion_from_cap_dict(case: dict) -> str | None:
for o in eval(case["casebody"])["data"]["opinions"]:
if o["type"] == "majority":
return o["text"]
elif o["type"] == "rehearing":
return o["text"]
return None
def get_citation_from_cap_dict(citations: dict) -> str:
for c in citations:
if c["type"] == "official":
return c["cite"]
raise KeyError
def get_importance_from_cap_dict(case: dict) -> float:
try:
return eval(case["analysis"])["pagerank"]["percentile"]
except KeyError as e:
Warning(f'No PageRank key for {case["id"]}')
return 0.5