-
Notifications
You must be signed in to change notification settings - Fork 16
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
77263ec
commit dd3858e
Showing
9 changed files
with
116 additions
and
47 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,10 @@ | ||
from .preprocessors import TextPreProcessor | ||
from .postprocessors import TextPostProcessor | ||
from .model import Model | ||
from .base import BaseComponent, Component | ||
|
||
__all__ = [ | ||
"TextPreProcessor", | ||
"TextPostProcessor", | ||
"Model", | ||
"BaseComponent", | ||
"Component", | ||
] |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,24 +1,50 @@ | ||
from typing import Any | ||
from healthchain.io.cdaconnector import CdaConnector | ||
from healthchain.pipeline.base import BasePipeline | ||
from healthchain.pipeline.components.preprocessors import TextPreProcessor | ||
from healthchain.pipeline.components.postprocessors import TextPostProcessor | ||
from healthchain.pipeline.components.model import Model | ||
from healthchain.pipeline.modelrouter import ModelRouter | ||
|
||
|
||
# TODO: Implement this pipeline in full | ||
class MedicalCodingPipeline(BasePipeline): | ||
def configure_pipeline(self, model_path: str) -> None: | ||
""" | ||
A pipeline for medical coding tasks using NLP models. | ||
This pipeline is configured to process clinical documents using a medical NLP model | ||
for tasks like named entity recognition and linking (NER+L). It uses CDA format | ||
for input and output handling. | ||
Examples: | ||
>>> # Using with SpaCy/MedCAT | ||
>>> pipeline = MedicalCodingPipeline.load("medcatlite") | ||
>>> | ||
>>> # Using with Hugging Face | ||
>>> pipeline = MedicalCodingPipeline.load( | ||
... "bert-base-uncased", | ||
... task="ner" | ||
... ) | ||
>>> results = pipeline(documents) | ||
""" | ||
|
||
def configure_pipeline(self, model_name: str, **model_kwargs: Any) -> None: | ||
""" | ||
Configure the pipeline with a medical NLP model and CDA connectors. | ||
Args: | ||
model_name: Name or path of the model to load | ||
**model_kwargs: Additional configuration for the model | ||
Raises: | ||
ValueError: If no appropriate integration can be found for the model | ||
ImportError: If required dependencies are not installed | ||
""" | ||
cda_connector = CdaConnector() | ||
self.add_input(cda_connector) | ||
# Add preprocessing component | ||
self.add_node(TextPreProcessor(), stage="preprocessing") | ||
|
||
# Add NER component | ||
model = Model( | ||
model_path | ||
) # TODO: should converting the CcdData be a model concern? | ||
self.add_node(model, stage="ner+l") | ||
try: | ||
model = ModelRouter.get_integration(model_name, **model_kwargs) | ||
except (ValueError, ImportError) as e: | ||
raise type(e)( | ||
f"Failed to configure pipeline with model '{model_name}'. Error: {str(e)}" | ||
) | ||
|
||
# Add postprocessing component | ||
self.add_node(TextPostProcessor(), stage="postprocessing") | ||
self.add_input(cda_connector) | ||
self.add_node(model, stage="ner+l") | ||
self.add_output(cda_connector) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
from healthchain.pipeline.components.base import BaseComponent | ||
from healthchain.pipeline.components.integrations import ( | ||
SpacyComponent, | ||
HuggingFaceComponent, | ||
) | ||
import re | ||
from typing import Any | ||
|
||
|
||
class ModelRouter: | ||
""" | ||
A router that selects the appropriate integration component based on the model name. | ||
This is an internal utility class used by pipelines to determine which integration | ||
to use for a given model. | ||
""" | ||
|
||
@staticmethod | ||
def get_integration(model_name: str, **kwargs: Any) -> BaseComponent: | ||
""" | ||
Determine and return the appropriate integration component for the given model. | ||
Args: | ||
model_name: Name or path of the model to load | ||
**kwargs: Additional arguments for the integration component | ||
Returns: | ||
An initialized integration component (SpacyComponent, HuggingFaceComponent, etc.) | ||
""" | ||
# SpaCy models typically follow these patterns | ||
spacy_patterns = [ | ||
r"^en_core_.*$", # standard spacy models | ||
r"^en_core_sci_.*$", # scispacy models | ||
r"^.*/spacy/.*$", # local spacy model paths | ||
r"^medcatlite$", # medcat model | ||
] | ||
|
||
# Hugging Face models typically include these patterns | ||
hf_patterns = [ | ||
r"^bert-.*$", | ||
r"^gpt-.*$", | ||
r"^t5-.*$", | ||
r"^distilbert-.*$", | ||
r".*/huggingface/.*$", | ||
] | ||
|
||
# Check for SpaCy models | ||
for pattern in spacy_patterns: | ||
if re.match(pattern, model_name): | ||
return SpacyComponent(model_name) | ||
|
||
# Check for Hugging Face models | ||
for pattern in hf_patterns: | ||
if re.match(pattern, model_name): | ||
return HuggingFaceComponent( | ||
model=model_name, | ||
task=kwargs.get("task", "text-classification"), | ||
) | ||
|
||
raise ValueError( | ||
f"Could not determine appropriate integration for model: {model_name}" | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters