Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat!: reorganize CLI commands #44

Merged
merged 1 commit into from
Jun 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion chart_review/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""Chart Review public entry point"""

__version__ = "1.3.0"
__version__ = "2.0.0"
17 changes: 6 additions & 11 deletions chart_review/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,18 @@
import argparse
import sys

import chart_review
from chart_review.commands import accuracy, info
from chart_review.commands import accuracy, default, ids, labels


def define_parser() -> argparse.ArgumentParser:
"""Fills out an argument parser with all the CLI options."""
parser = argparse.ArgumentParser()
default.make_subparser(parser)

parser.add_argument(
"--version",
action="version",
version=f"chart-review {chart_review.__version__}",
)

subparsers = parser.add_subparsers(required=True)
accuracy.make_subparser(subparsers.add_parser("accuracy"))
info.make_subparser(subparsers.add_parser("info"))
subparsers = parser.add_subparsers()
accuracy.make_subparser(subparsers.add_parser("accuracy", help="calculate F1 and Kappa scores"))
ids.make_subparser(subparsers.add_parser("ids", help="map Label Studio IDs to FHIR IDs"))
labels.make_subparser(subparsers.add_parser("labels", help="show label usage by annotator"))

return parser

Expand Down
20 changes: 16 additions & 4 deletions chart_review/cli_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,30 @@

import argparse

from chart_review import cohort, config

def add_project_args(parser: argparse.ArgumentParser) -> None:

def add_project_args(parser: argparse.ArgumentParser, is_global: bool = False) -> None:
group = parser.add_argument_group("configuration")
group.add_argument(
"--project-dir",
default=".",
"-p",
default=None if is_global else argparse.SUPPRESS,
metavar="DIR",
help=(
"Directory holding project files, "
"directory holding project files, "
"like labelstudio-export.json (default: current dir)"
),
)
group.add_argument(
"--config", "-c", metavar="PATH", help="Config file (default: [project-dir]/config.yaml)"
"--config",
"-c",
default=None if is_global else argparse.SUPPRESS,
metavar="PATH",
help="config file (default: [project-dir]/config.yaml)",
)


def get_cohort_reader(args: argparse.Namespace) -> cohort.CohortReader:
proj_config = config.ProjectConfig(project_dir=args.project_dir, config_path=args.config)
return cohort.CohortReader(proj_config)
47 changes: 47 additions & 0 deletions chart_review/commands/default.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
"""Methods for showing config & calculated setup info."""

import argparse

import rich
import rich.box
import rich.table

import chart_review
from chart_review import cli_utils, console_utils


def print_info(args: argparse.Namespace) -> None:
"""Show project information on the console."""
reader = cli_utils.get_cohort_reader(args)
console = rich.get_console()

# Charts
chart_table = rich.table.Table(
"Annotator",
"Chart Count",
"Chart IDs",
box=rich.box.ROUNDED,
)
for annotator in sorted(reader.note_range):
notes = reader.note_range[annotator]
chart_table.add_row(
annotator,
str(len(notes)),
console_utils.pretty_note_range(notes),
)

console.print(chart_table)
console_utils.print_ignored_charts(reader)

console.print()
console.print("Pass --help to see more options.")


def make_subparser(parser: argparse.ArgumentParser) -> None:
cli_utils.add_project_args(parser, is_global=True)
parser.add_argument(
"--version",
action="version",
version=f"chart-review {chart_review.__version__}",
)
parser.set_defaults(func=print_info)
50 changes: 50 additions & 0 deletions chart_review/commands/ids.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import argparse
import csv
import sys

from chart_review import cli_utils


def make_subparser(parser: argparse.ArgumentParser) -> None:
cli_utils.add_project_args(parser)
parser.set_defaults(func=print_ids)
Comment on lines +8 to +10

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

re" this and labels, maybe not now, but as you add in some of the other commands, should this move to a centralized location/base class?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My plan was to let each command own its own specific logic, but provide all the shared code via cli_utils.py or other places.

Like you see above with cli_utils.add_project_args, but more of that.



def print_ids(args: argparse.Namespace) -> None:
"""
Prints a mapping of all project IDs.

Currently, this writes a CSV file to stdout. In the future, this could get fancier.
At the time of writing, it wasn't clear how to present the information in a way that
sensible to a casual console user - so I went with the more technical-oriented CSV file.
"""
reader = cli_utils.get_cohort_reader(args)

writer = csv.writer(sys.stdout)
writer.writerow(["chart_id", "original_fhir_id", "anonymized_fhir_id"])

# IDS
for chart in reader.ls_export:
chart_id = str(chart["id"])
chart_data = chart.get("data", {})
printed = False

# Grab encounters first
orig_id = f"Encounter/{chart_data['enc_id']}" if "enc_id" in chart_data else ""
anon_id = f"Encounter/{chart_data['anon_id']}" if "anon_id" in chart_data else ""
if orig_id or anon_id:
writer.writerow([chart_id, orig_id, anon_id])
printed = True

# Now each DocRef ID
for orig_id, anon_id in chart_data.get("docref_mappings", {}).items():
writer.writerow(
[chart_id, f"DocumentReference/{orig_id}", f"DocumentReference/{anon_id}"]
)
printed = True

if not printed:
# Guarantee that every Chart ID shows up at least once - so it's clearer that the
# chart ID is included in the Label Studio export but that it does not have any
# IDs mapped to it.
writer.writerow([chart_id, None, None])
154 changes: 0 additions & 154 deletions chart_review/commands/info.py

This file was deleted.

51 changes: 51 additions & 0 deletions chart_review/commands/labels.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import argparse

import rich
import rich.box
import rich.table
import rich.text

from chart_review import cli_utils, console_utils, types


def make_subparser(parser: argparse.ArgumentParser) -> None:
cli_utils.add_project_args(parser)
parser.set_defaults(func=print_labels)


def print_labels(args: argparse.Namespace) -> None:
"""Show label information on the console."""
reader = cli_utils.get_cohort_reader(args)

# Calculate all label counts for each annotator
label_names = sorted(reader.class_labels, key=str.casefold)
label_notes: dict[str, dict[str, types.NoteSet]] = {} # annotator -> label -> note IDs
any_annotator_note_sets: dict[str, types.NoteSet] = {}
for annotator, mentions in reader.annotations.mentions.items():
label_notes[annotator] = {}
for name in label_names:
note_ids = {note_id for note_id, labels in mentions.items() if name in labels}
label_notes[annotator][name] = note_ids
any_annotator_note_sets.setdefault(name, types.NoteSet()).update(note_ids)

label_table = rich.table.Table(
"Annotator",
"Chart Count",
"Label",
box=rich.box.ROUNDED,
)

# First add summary entries, for counts across the union of all annotators
for name in label_names:
count = str(len(any_annotator_note_sets.get(name, {})))
label_table.add_row(rich.text.Text("Any", style="italic"), count, name)

# Now do each annotator as their own little boxed section
for annotator in sorted(label_notes.keys(), key=str.casefold):
label_table.add_section()
for name, note_set in label_notes[annotator].items():
count = str(len(note_set))
label_table.add_row(annotator, count, name)

rich.get_console().print(label_table)
console_utils.print_ignored_charts(reader)
Loading
Loading