Skip to content

Commit

Permalink
Merge pull request #50 from smart-on-fhir/mikix/coverage
Browse files Browse the repository at this point in the history
ci: add coverage check
  • Loading branch information
mikix authored Jul 8, 2024
2 parents 075d567 + a04be98 commit db5928e
Show file tree
Hide file tree
Showing 16 changed files with 158 additions and 250 deletions.
18 changes: 14 additions & 4 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ concurrency:
jobs:
unittest:
name: unit tests
runs-on: ubuntu-22.04
runs-on: ubuntu-latest
strategy:
matrix:
# don't go crazy with the Python versions as they eat up CI minutes
Expand All @@ -23,7 +23,7 @@ jobs:
- uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

Expand All @@ -34,10 +34,20 @@ jobs:
- name: Test with pytest
run: |
python -m pytest
python -m pytest --cov=chart_review --cov-report=xml
- name: Check coverage report
if: github.ref != 'refs/heads/main'
uses: orgoro/[email protected]
with:
coverageFile: coverage.xml
token: ${{ secrets.GITHUB_TOKEN }}
thresholdAll: .99
thresholdNew: 1
thresholdModified: 1

lint:
runs-on: ubuntu-22.04
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

Expand Down
21 changes: 1 addition & 20 deletions chart_review/agree.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,30 +62,11 @@ def confusion_matrix(
elif not truth_positive and not annotator_positive:
TN.append(key)
else:
raise Exception("Guard: Impossible comparison of reviewers")
raise Exception("Guard: Impossible comparison of reviewers") # pragma: no cover

return {"TP": TP, "FN": FN, "FP": FP, "TN": TN}


def append_matrix(first: dict, second: dict) -> dict:
"""
Append two different confusion_matrix matrix dictionaries.
For example, (Annotator1 VS NLP) appended to (Annotator2 vs NLP).
TODO: Warning: assumes first and second have no overlapping NoteRange,
may not be applicable for other studies.
:param first: confusion_matrix matrix
:param second: confusion_matrix matrix
:return:
"""
added = {}
for header in ["TP", "FP", "FN", "TN"]:
added[header] = first[header] + second[header]
return added


def score_kappa(matrix: dict) -> float:
"""
Computes Cohen kappa for pair-wise annotators.
Expand Down
5 changes: 3 additions & 2 deletions chart_review/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@ def main_cli(argv: list[str] = None) -> None:
args = parser.parse_args(argv)
args.func(args)
except Exception as exc:
sys.exit(str(exc))
print(str(exc), file=sys.stderr)
sys.exit(1)


if __name__ == "__main__":
main_cli()
main_cli() # pragma: no cover
5 changes: 1 addition & 4 deletions chart_review/cohort.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from typing import Iterable

from chart_review.common import guard_iter, guard_in
from chart_review import agree, common, config, errors, external, simplify, types


Expand Down Expand Up @@ -86,13 +85,12 @@ def class_labels(self):

def _select_labels(self, label_pick: str = None) -> Iterable[str]:
if label_pick:
guard_in(label_pick, self.class_labels)
return [label_pick]
else:
return self.class_labels

def confusion_matrix(
self, truth: str, annotator: str, note_range: Iterable, label_pick: str = None
self, truth: str, annotator: str, note_range: types.NoteSet, label_pick: str = None
) -> dict:
"""
This is the rollup of counting each symptom only once, not multiple times.
Expand All @@ -104,7 +102,6 @@ def confusion_matrix(
:return: dict
"""
labels = self._select_labels(label_pick)
note_range = set(guard_iter(note_range))
return agree.confusion_matrix(
self.annotations,
truth,
Expand Down
6 changes: 2 additions & 4 deletions chart_review/commands/accuracy.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,9 @@ def print_accuracy(args: argparse.Namespace) -> None:
annotator = args.annotator

if truth not in reader.note_range:
print(f"Unrecognized annotator '{truth}'")
return
raise ValueError(f"Unrecognized annotator '{truth}'")
if annotator not in reader.note_range:
print(f"Unrecognized annotator '{annotator}'")
return
raise ValueError(f"Unrecognized annotator '{annotator}'")

# Grab the intersection of ranges
note_range = set(reader.note_range[truth])
Expand Down
62 changes: 0 additions & 62 deletions chart_review/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,65 +58,3 @@ def write_text(path: str, text: str) -> None:

with open(path, "w") as f:
f.write(text)


def csv_map(csv_file: str) -> dict:
"""
Get a simple map of csv file contents. For more complex handling, use pandas.
:param csv_file: path to CSV file with rows "key,value"
:return: dict {key:value}
"""
res = {}
with open(csv_file, "r") as fp:
for line in fp.readlines():
line = line.replace('"', "").strip()
key, val = line.split(",")
res[key] = val
return res


###############################################################################
# Helper Functions: Pretty Print JSON, useful for Testing/Debugging
###############################################################################
def print_json(jsonable) -> None:
"""
:param jsonable: dict or list of serializable object
"""
if isinstance(jsonable, dict):
print(json.dumps(jsonable, indent=4))
elif isinstance(jsonable, list):
print(json.dumps(jsonable, indent=4))
else:
print(jsonable)


def print_line(heading=None) -> None:
"""
:param heading: optionally give a header to the line seperator
"""
seperator = "##############################################################"
if heading:
print(f"\n{seperator}\n{heading}\n{seperator}")
else:
print(seperator)


###############################################################################
# Helper Functions: enum type smoothing
###############################################################################
def guard_iter(object) -> Iterable:
if isinstance(object, Enum):
return guard_iter(object.value)
elif isinstance(object, EnumMeta):
return guard_iter(object.value)
elif isinstance(object, Iterable):
return object
else:
raise Exception(f"expected Iterable|Enum but got {type(object)}")


def guard_in(entry, strict: Iterable):
if entry in strict:
return entry
else:
raise Exception(f"expected entry {entry} to be in {strict}")
6 changes: 3 additions & 3 deletions chart_review/external.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,16 +106,16 @@ def merge_external(
config: dict,
) -> None:
"""Loads an external csv file annotator and merges them into an existing simple dict"""
if filename := config.get("filename"):
if isinstance(config, dict) and (filename := config.get("filename")):
full_filename = os.path.join(project_dir, filename)
detected_id_type, label_map = _load_csv_labels(full_filename)
else:
sys.exit(f"Did not understand config for external annotator '{name}'")
raise ValueError(f"Did not understand config for external annotator '{name}'")

# Inspect exported json to see if it has the metadata we'll need.
for row in exported_json:
if "docref_mappings" not in row.get("data", {}):
sys.exit(
raise ValueError(
"Your Label Studio export does not include DocRef/Encounter ID mapping metadata!\n"
"Consider re-uploading your notes using Cumulus ETL's chart-review command."
)
Expand Down
140 changes: 0 additions & 140 deletions chart_review/labelstudio.py

This file was deleted.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ line-length = 100
tests = [
"ddt",
"pytest",
"pytest-cov",
]
dev = [
"bandit[toml]",
Expand Down
7 changes: 7 additions & 0 deletions tests/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,10 @@ def capture_stdout() -> io.StringIO:
stdout = io.StringIO()
with contextlib.redirect_stdout(stdout):
yield stdout

@staticmethod
@contextlib.contextmanager
def capture_stderr() -> io.StringIO:
stderr = io.StringIO()
with contextlib.redirect_stderr(stderr):
yield stderr
2 changes: 1 addition & 1 deletion tests/data/external/enc.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
BLARG_ID,SYMPTOM
ENC_ID,SYMPTOM
"ABC-Enc","happy"
"ABC","ignored"
"ABC-Enc","tired"
Expand Down
1 change: 1 addition & 0 deletions tests/data/ignore/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ ignore:
- 3-Enc
- Encounter/Anon-4-Enc
- 5
- Encounter/Unknown # just test we don't blow up
Loading

0 comments on commit db5928e

Please sign in to comment.