forked from facebookresearch/fairseq
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
adding support for Italian Umberto ( umberto.commoncrawl and umberto.…
…wikipedia ) from Musixmatch (facebookresearch#1008) Summary: Pull Request resolved: fairinternal/fairseq-py#1008 # Before submitting - [ ] Was this discussed/approved via a Github issue? (no need for typos, doc improvements) - [ ] Did you read the [contributor guideline](https://github.com/pytorch/fairseq/blob/master/CONTRIBUTING.md)? - [ ] Did you make sure to update the docs? - [ ] Did you write any new necessary tests? ## What does this PR do? Fixes # (issue). ## PR review Anyone in the community is free to review the PR once the tests have passed. If we didn't discuss your PR in Github issues there's a high chance it will not be merged. ## Did you have fun? Make sure you had fun coding � Pull Request resolved: facebookresearch#1639 Differential Revision: D19555377 Pulled By: myleott fbshipit-source-id: 8ef2b6635a2c609f6ed7dd8ba403eba0787590d8
- Loading branch information
1 parent
4f71c63
commit cce6dcb
Showing
4 changed files
with
76 additions
and
50 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
# Copyright (c) Facebook, Inc. and its affiliates. | ||
# | ||
# This source code is licensed under the MIT license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
""" | ||
CamemBERT: a Tasty French Language Model | ||
""" | ||
|
||
from fairseq.models import register_model | ||
|
||
from .hub_interface import RobertaHubInterface | ||
from .model import RobertaModel | ||
|
||
|
||
@register_model('camembert') | ||
class CamembertModel(RobertaModel): | ||
|
||
@classmethod | ||
def hub_models(cls): | ||
return { | ||
'camembert.v0': 'http://dl.fbaipublicfiles.com/fairseq/models/camembert.v0.tar.gz', | ||
} | ||
|
||
@classmethod | ||
def from_pretrained(cls, model_name_or_path, checkpoint_file='model.pt', data_name_or_path='.', bpe='sentencepiece', **kwargs): | ||
from fairseq import hub_utils | ||
x = hub_utils.from_pretrained( | ||
model_name_or_path, | ||
checkpoint_file, | ||
data_name_or_path, | ||
archive_map=cls.hub_models(), | ||
bpe=bpe, | ||
load_checkpoint_heads=True, | ||
**kwargs, | ||
) | ||
return RobertaHubInterface(x['args'], x['task'], x['models'][0]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
# Copyright (c) Facebook, Inc. and its affiliates. | ||
# | ||
# This source code is licensed under the MIT license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
""" | ||
Unsupervised Cross-lingual Representation Learning at Scale | ||
""" | ||
|
||
from fairseq.models import register_model | ||
|
||
from .hub_interface import RobertaHubInterface | ||
from .model import RobertaModel | ||
|
||
|
||
@register_model('xlmr') | ||
class XLMRModel(RobertaModel): | ||
|
||
@classmethod | ||
def hub_models(cls): | ||
return { | ||
'xlmr.base': 'http://dl.fbaipublicfiles.com/fairseq/models/xlmr.base.tar.gz', | ||
'xlmr.large': 'http://dl.fbaipublicfiles.com/fairseq/models/xlmr.large.tar.gz', | ||
} | ||
|
||
@classmethod | ||
def from_pretrained(cls, model_name_or_path, checkpoint_file='model.pt', data_name_or_path='.', bpe='sentencepiece', **kwargs): | ||
from fairseq import hub_utils | ||
x = hub_utils.from_pretrained( | ||
model_name_or_path, | ||
checkpoint_file, | ||
data_name_or_path, | ||
archive_map=cls.hub_models(), | ||
bpe=bpe, | ||
load_checkpoint_heads=True, | ||
**kwargs, | ||
) | ||
return RobertaHubInterface(x['args'], x['task'], x['models'][0]) |