Skip to content

Commit

Permalink
change to post with request body
Browse files Browse the repository at this point in the history
  • Loading branch information
cjer committed Aug 19, 2021
1 parent 83f8160 commit df5bb69
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 28 deletions.
5 changes: 3 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ RUN apt-get update
RUN apt-get --no-install-recommends -y install git \
ca-certificates
RUN update-ca-certificates

RUN apt-get --no-install-recommends -y install python3-pip
RUN rm -rf /var/lib/apt/lists/*

COPY . /NEMO

Expand All @@ -12,7 +15,5 @@ WORKDIR /NEMO/
RUN cd /NEMO/ \
&& gunzip data/*.gz || true

RUN apt-get --no-install-recommends -y install python3-pip
RUN rm -rf /var/lib/apt/lists/*
RUN cd /NEMO/ \
&& pip install -r requirements_cpu_only.txt -f https://download.pytorch.org/whl/torch_stable.html
61 changes: 35 additions & 26 deletions api_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,17 @@ class MorphModelName(str, Enum):
description="Name of an available morph model.",
)


class NEMOQuery(BaseModel):
sentences: str
tokenized: Optional[bool]= False

class Config:
schema_extra = {
"example": {
"sentences": "עשרות אנשים מגיעים מתאילנד לישראל.\nתופעה זו התבררה אתמול בוועדת העבודה והרווחה של הכנסת.",
"tokenized": False,
}
}
#response models
class NEMODoc(BaseModel):
tokenized_text: List[str]
Expand Down Expand Up @@ -294,16 +304,16 @@ def load_all_models():
loaded_models[model] = m


@app.get("/run_ner_model/",
@app.post("/run_ner_model/",
response_model=List[NCRFPreds],
summary="Get NER sequence label predictions, no morphological segmentation"
)
def run_ner_model(sentences: str=sent_query,
model_name: ModelName=ModelName.token_single,
tokenized: Optional[bool]=tokenized_query):
def run_ner_model(q: NEMOQuery,
model_name: Optional[ModelName]=ModelName.token_single,
):
model = loaded_models[model_name]
temp_input = temporary_filename()
tok_sents = create_input_file(sentences, temp_input, tokenized)
tok_sents = create_input_file(q.sentences, temp_input, q.tokenized)
preds = ncrf_decode(model['model'], model['data'], temp_input)
response = []
for t, p in zip(tok_sents, preds):
Expand All @@ -312,13 +322,13 @@ def run_ner_model(sentences: str=sent_query,
return response


@app.get("/multi_to_single/", response_model=List[TokenMultiDoc],
@app.post("/multi_to_single/", response_model=List[TokenMultiDoc],
summary="Use token-multi model to get token-level NER labels. No morphological segmentation."
)
def multi_to_single(sentences: str=sent_query,
def multi_to_single(q: NEMOQuery,
multi_model_name: Optional[MultiModelName]=multi_model_query,
tokenized: Optional[bool]=tokenized_query):
model_out = run_ner_model(sentences, multi_model_name, tokenized)
):
model_out = run_ner_model(q, multi_model_name)
tok_sents, ner_multi_preds = zip(*[(x.tokenized_text, x.ncrf_preds) for x in model_out])
ner_single_preds = [[fix_multi_biose(label) for label in sent] for sent in ner_multi_preds]

Expand All @@ -331,14 +341,14 @@ def multi_to_single(sentences: str=sent_query,
return response


@app.get("/multi_align_hybrid/",
@app.post("/multi_align_hybrid/",
response_model=List[HybridDoc],
summary="Use token-multi model for MD and NER labels"
)
def multi_align_hybrid(sentences: str=sent_query,
def multi_align_hybrid(q: NEMOQuery,
multi_model_name: Optional[MultiModelName]=multi_model_query,
tokenized: Optional[bool]=tokenized_query):
model_out = run_ner_model(sentences, multi_model_name, tokenized)
):
model_out = run_ner_model(q, multi_model_name)
tok_sents, ner_multi_preds = zip(*[(x.tokenized_text, x.ncrf_preds) for x in model_out])
ner_single_preds = [[fix_multi_biose(label) for label in sent] for sent in ner_multi_preds]
ma_lattice = run_yap_hebma(tok_sents)
Expand All @@ -365,14 +375,14 @@ def multi_align_hybrid(sentences: str=sent_query,
return response


@app.get("/morph_yap/",
@app.post("/morph_yap/",
response_model=List[MorphNERDoc],
summary="Standard pipeline - use yap for morpho-syntax, then use NER morph model for NER labels"
)
def morph_yap(sentences: str=sent_query,
def morph_yap(q: NEMOQuery,
morph_model_name: Optional[MorphModelName]=morph_model_query,
tokenized: Optional[bool]=tokenized_query):
tok_sents = get_sents(sentences, tokenized)
):
tok_sents = get_sents(q.sentences, q.tokenized)
yap_out = run_yap_joint(tok_sents)
md_sents = (bclm.get_sentences_list(nemo.read_lattices(yap_out['md_lattice']), ['form']).apply(lambda x: [t[0] for t in x] )).to_list()
model = loaded_models[morph_model_name]
Expand All @@ -397,15 +407,14 @@ def morph_yap(sentences: str=sent_query,
flatten = lambda l: [item for sublist in l for item in sublist]


@app.get("/morph_hybrid/",
@app.post("/morph_hybrid/",
response_model=List[MorphHybridDoc] ,
summary="Segment using hybrid method (w/ token-multi). Then get NER labels with morph model.")
def morph_hybrid(sentences: str=sent_query,
def morph_hybrid(q: NEMOQuery,
multi_model_name: Optional[MultiModelName]=multi_model_query,
morph_model_name: Optional[MorphModelName]=morph_model_query,
tokenized: Optional[bool]=tokenized_query,
align_tokens: Optional[bool] = False):
model_out = run_ner_model(sentences, multi_model_name, tokenized)
model_out = run_ner_model(q, multi_model_name)
tok_sents, ner_multi_preds = zip(*[(x.tokenized_text, x.ncrf_preds) for x in model_out])
ner_single_preds = [[fix_multi_biose(label) for label in sent] for sent in ner_multi_preds]
ma_lattice = run_yap_hebma(tok_sents)
Expand Down Expand Up @@ -460,18 +469,18 @@ def morph_hybrid(sentences: str=sent_query,
return response


@app.get("/morph_hybrid_align_tokens/",
@app.post("/morph_hybrid_align_tokens/",
response_model=List[MorphHybridDoc] ,
summary="Segment using hybrid method (w/ token-multi). Then get NER labels with morph model + align with tokens to get token-level NER.")
def morph_hybrid_align_tokens(sentences: str=sent_query,
def morph_hybrid_align_tokens(q: NEMOQuery,
multi_model_name: Optional[MultiModelName]=multi_model_query,
morph_model_name: Optional[MorphModelName]=morph_model_query,
tokenized: Optional[bool] = tokenized_query):
return morph_hybrid(sentences, multi_model_name, morph_model_name, tokenized, align_tokens=True)
return morph_hybrid(q.sentences, multi_model_name, morph_model_name, tokenized, align_tokens=True)


#
# @app.get("/run_separate_nemo/")
# @app.post("/run_separate_nemo/")
# def run_separate_nemo(command: str, model_name: str, sentence: str):
# if command in available_commands:
# if command == 'run_ner_model':
Expand Down

0 comments on commit df5bb69

Please sign in to comment.