Skip to content

Commit

Permalink
Release 1.4.0
Browse files Browse the repository at this point in the history
  • Loading branch information
IgnatovFedor authored Oct 17, 2023
2 parents 34d240c + b5f52cc commit 168e979
Show file tree
Hide file tree
Showing 30 changed files with 3,570 additions and 2,041 deletions.
16 changes: 10 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,19 @@ Please leave us [your feedback](https://forms.gle/i64fowQmiVhMMC7f9) on how we c

**Models**

[Named Entity Recognition](http://docs.deeppavlov.ai/en/master/features/models/NER.html) | [Intent/Sentence Classification](http://docs.deeppavlov.ai/en/master/features/models/classifiers.html) |
[Named Entity Recognition](http://docs.deeppavlov.ai/en/master/features/models/NER.html) | [Intent/Sentence Classification](http://docs.deeppavlov.ai/en/master/features/models/classification.html) |

[Question Answering over Text (SQuAD)](http://docs.deeppavlov.ai/en/master/features/models/SQuAD.html) | [Knowledge Base Question Answering](http://docs.deeppavlov.ai/en/master/features/models/kbqa.html)
[Question Answering over Text (SQuAD)](http://docs.deeppavlov.ai/en/master/features/models/SQuAD.html) | [Knowledge Base Question Answering](http://docs.deeppavlov.ai/en/master/features/models/KBQA.html)

[Sentence Similarity/Ranking](http://docs.deeppavlov.ai/en/master/features/models/neural_ranking.html) | [TF-IDF Ranking](http://docs.deeppavlov.ai/en/master/features/models/tfidf_ranking.html)
[Sentence Similarity/Ranking](http://docs.deeppavlov.ai/en/master/features/models/neural_ranking.html) | [TF-IDF Ranking](http://docs.deeppavlov.ai/en/master/features/models/tfidf_ranking.html)

[Automatic Spelling Correction](http://docs.deeppavlov.ai/en/master/features/models/spelling_correction.html) | [Entity Linking](http://docs.deeppavlov.ai/en/master/features/models/entity_linking.html)
[Syntactic Parsing](http://docs.deeppavlov.ai/en/master/features/models/syntax_parser.html) | [Morphological Tagging](http://docs.deeppavlov.ai/en/master/features/models/morpho_tagger.html)

[Open Domain Questions Answering](http://docs.deeppavlov.ai/en/master/features/models/odqa.html) | [Russian SuperGLUE](http://docs.deeppavlov.ai/en/master/features/models/superglue.html)
[Automatic Spelling Correction](http://docs.deeppavlov.ai/en/master/features/models/spelling_correction.html) | [Entity Extraction](http://docs.deeppavlov.ai/en/master/features/models/entity_extraction.html)

[Open Domain Questions Answering](http://docs.deeppavlov.ai/en/master/features/models/ODQA.html) | [Russian SuperGLUE](http://docs.deeppavlov.ai/en/master/features/models/superglue.html)

[Relation Extraction](http://docs.deeppavlov.ai/en/master/features/models/relation_extraction.html)

**Embeddings**

Expand Down Expand Up @@ -179,7 +183,7 @@ from deeppavlov import evaluate_model
model = evaluate_model(<config_path>, install=True, download=True)
```

DeepPavlov also [allows](https://docs.deeppavlov.ai/en/master/features/python.html) to build a model from components for
DeepPavlov also [allows](https://docs.deeppavlov.ai/en/master/intro/python.html) to build a model from components for
inference using Python.

## License
Expand Down
2 changes: 1 addition & 1 deletion deeppavlov/_meta.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = '1.3.0'
__version__ = '1.4.0'
__author__ = 'Neural Networks and Deep Learning lab, MIPT'
__description__ = 'An open source library for building end-to-end dialog systems and training chatbots.'
__keywords__ = ['NLP', 'NER', 'SQUAD', 'Intents', 'Chatbot']
Expand Down
9 changes: 7 additions & 2 deletions deeppavlov/models/kbqa/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,11 @@ def correct_variables(query_triplets: List[str], answer_ent: List[str], query_in
for j in range(len(triplet_elements)):
if triplet_elements[j] not in ent_var and triplet_elements[j].startswith("?"):
triplet_elements[j] = query_info["mid_var"]
break
if triplet_elements[j].startswith("?") \
and triplet_elements[j] not in [query_info["mid_var"], query_info["unk_var"]]:
triplet_elements[j] = query_info["unk_var"]
break
query_triplets[i] = " ".join(triplet_elements)
query_triplets[i] = query_triplets[i].replace(ent_var, query_info["unk_var"])
return query_triplets
Expand Down Expand Up @@ -174,8 +176,11 @@ def make_sparql_query(query_info: Tuple[List[str], List[str], List[str], Dict[st
query_triplets = [fill_slots(elem, entities, types, rels, delete_rel_prefix=True) for elem in query_triplets]
query_triplets = correct_variables(query_triplets, answer_ent, query_info_dict)
filled_queries = []
for triplets_p in list(itertools.permutations(query_triplets)):
filled_queries.append(query_from_triplets(triplets_p, answer_ent, query_info_dict))
if any(["qualifier" in filter_info_element for filter_info_element in filter_info]):
filled_queries.append(query_from_triplets(query_triplets, answer_ent, query_info_dict))
else:
for triplets_p in list(itertools.permutations(query_triplets)):
filled_queries.append(query_from_triplets(triplets_p, answer_ent, query_info_dict))
return filled_queries


Expand Down
15 changes: 11 additions & 4 deletions deeppavlov/models/preprocessors/torch_transformers_preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,7 +510,7 @@ def __init__(self,
subword_mask_mode: str = "first",
return_features: bool = False,
**kwargs):
self._re_tokenizer = re.compile(r"[\w']+|[^\w ]")
self._re_tokenizer = re.compile(r"[\d]+[\d\.,]+[\d]+|[\w'\.:@]+|[^\w ]")
self.provide_subword_tags = provide_subword_tags
self.mode = kwargs.get('mode')
self.max_seq_length = max_seq_length
Expand All @@ -536,9 +536,16 @@ def __call__(self,
for s in tokens:
tokens_list = []
tokens_offsets_list = []
for elem in re.finditer(self._re_tokenizer, s):
tokens_list.append(elem[0])
tokens_offsets_list.append((elem.start(), elem.end()))
matches = tuple(re.finditer(self._re_tokenizer, s))
for i, elem in enumerate(matches):
if (i == len(matches) - 1) and (elem[0][-1] == '.'):
tokens_list.append(elem[0][:-1])
tokens_list.append('.')
tokens_offsets_list.append((elem.start(), elem.end() - 1))
tokens_offsets_list.append((elem.end() - 1, elem.end()))
else:
tokens_list.append(elem[0])
tokens_offsets_list.append((elem.start(), elem.end()))
tokens_batch.append(tokens_list)
tokens_offsets_batch.append(tokens_offsets_list)
tokens = tokens_batch
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,8 @@ def train_on_batch(self,
labels=b_labels).loss
if self.crf is not None:
self.crf(y, y_masks)
if self.is_data_parallel:
loss = loss.mean()
self._make_step(loss)

return {'loss': loss.item()}
Expand Down
3 changes: 2 additions & 1 deletion deeppavlov/requirements/transformers.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
transformers>=4.13.0,<4.25.0
transformers>=4.13.0,<4.25.0;python_version<"3.8"
transformers==4.30.0;python_version>="3.8"
Loading

0 comments on commit 168e979

Please sign in to comment.