Skip to content

Commit

Permalink
predict_no_context (#31)
Browse files Browse the repository at this point in the history
  • Loading branch information
phlobo authored Mar 6, 2024
1 parent 2662e8e commit e3b461e
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 1 deletion.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "xmen"
version = "1.0.4"
version = "1.0.5"
description = "An extensible toolkit for Cross-lingual (x) Medical Entity Normalization."
license = "Apache-2.0"
authors = ["Florian Borchert <[email protected]>"]
Expand Down
37 changes: 37 additions & 0 deletions xmen/linkers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from xmen.log import logger

from xmen.reranking import Reranker
from xmen.data import from_spans


class EntityLinker(ABC):
Expand All @@ -31,6 +32,42 @@ def get_logger(self):
def predict(self, passages: list, entities: list) -> list:
pass

def predict_no_context(
self, entities: str | list[str], label: str | list[str] = None, batch_size: int = None
) -> list:
"""
Generates candidate concepts for the given entities (one or more) without any context.
Args:
- entities (str | list[str]): The entity or entities for which to generate candidates.
- label (str | list[str]): The label or labels for the entities. If a single label is provided, it will be used for all entities.
- batch_size (int): The batch size to use for prediction. If None, the default batch size of the model will be used.
"""
is_str = False
if isinstance(entities, str):
is_str = True
entities = [entities]
assert label is None or isinstance(label, str)
label = [label]
elif label is None or isinstance(label, str):
label = [label] * len(entities)
assert len(entities) == len(label)

spans = []
sentences = []
indices = []
for e, l in zip(entities, label):
indices.append(len(sentences))
spans.append([{"char_start_index": 0, "char_end_index": len(e), "label": l, "span": e}])
sentences.append(e)
ds = from_spans(entities=spans, sentences=sentences)
result = self.predict_batch(ds, batch_size)
if is_str:
assert len(result["entities"]) == 1
return result["entities"][0]
else:
return result["entities"]


class RerankedLinker(EntityLinker):
def __init__(self, linker: EntityLinker, ranker: Reranker):
Expand Down

0 comments on commit e3b461e

Please sign in to comment.