From cce6dcb1cca85955a82879ea5064fe8202e8f412 Mon Sep 17 00:00:00 2001
From: Simone Francia <francia.simone1@gmail.com>
Date: Fri, 24 Jan 2020 09:58:19 -0800
Subject: [PATCH] adding support for Italian Umberto ( umberto.commoncrawl and
 umberto.wikipedia ) from Musixmatch (#1008)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Summary:
Pull Request resolved: https://github.com/fairinternal/fairseq-py/pull/1008

# Before submitting

- [ ] Was this discussed/approved via a Github issue? (no need for typos, doc improvements)
- [ ] Did you read the [contributor guideline](https://github.com/pytorch/fairseq/blob/master/CONTRIBUTING.md)?
- [ ] Did you make sure to update the docs?
- [ ] Did you write any new necessary tests?

## What does this PR do?
Fixes # (issue).

## PR review
Anyone in the community is free to review the PR once the tests have passed.
If we didn't discuss your PR in Github issues there's a high chance it will not be merged.

## Did you have fun?
Make sure you had fun coding �
Pull Request resolved: https://github.com/pytorch/fairseq/pull/1639

Differential Revision: D19555377

Pulled By: myleott

fbshipit-source-id: 8ef2b6635a2c609f6ed7dd8ba403eba0787590d8
---
 examples/roberta/README.md                |  5 ++-
 fairseq/models/roberta/model.py           | 48 -----------------------
 fairseq/models/roberta/model_camembert.py | 36 +++++++++++++++++
 fairseq/models/roberta/model_xlmr.py      | 37 +++++++++++++++++
 4 files changed, 76 insertions(+), 50 deletions(-)
 create mode 100644 fairseq/models/roberta/model_camembert.py
 create mode 100644 fairseq/models/roberta/model_xlmr.py

diff --git a/examples/roberta/README.md b/examples/roberta/README.md
index 0c603f2002..f9250f2ea4 100644
--- a/examples/roberta/README.md
+++ b/examples/roberta/README.md
@@ -8,8 +8,9 @@ RoBERTa iterates on BERT's pretraining procedure, including training the model l
 
 ### What's New:
 
-- November 2019: French model (CamemBERT) is available [CamemBERT](https://github.com/pytorch/fairseq/tree/master/examples/camembert).
-- November 2019: Multilingual encoder (XLM-RoBERTa) is available [XLM-R](https://github.com/pytorch/fairseq/tree/master/examples/xlmr).
+- January 2020: Italian model (UmBERTo) is available from [Musixmatch Research](https://github.com/musixmatchresearch): [UmBERTo](https://github.com/musixmatchresearch/umberto).
+- November 2019: French model (CamemBERT) is available: [CamemBERT](https://github.com/pytorch/fairseq/tree/master/examples/camembert).
+- November 2019: Multilingual encoder (XLM-RoBERTa) is available: [XLM-R](https://github.com/pytorch/fairseq/tree/master/examples/xlmr).
 - September 2019: TensorFlow and TPU support via the [transformers library](https://github.com/huggingface/transformers).
 - August 2019: RoBERTa is now supported in the [pytorch-transformers library](https://github.com/huggingface/pytorch-transformers).
 - August 2019: Added [tutorial for finetuning on WinoGrande](https://github.com/pytorch/fairseq/tree/master/examples/roberta/wsc#roberta-training-on-winogrande-dataset).
diff --git a/fairseq/models/roberta/model.py b/fairseq/models/roberta/model.py
index 925298c204..95a5ff33de 100644
--- a/fairseq/models/roberta/model.py
+++ b/fairseq/models/roberta/model.py
@@ -199,53 +199,6 @@ def upgrade_state_dict_named(self, state_dict, name):
                     state_dict[prefix + 'classification_heads.' + k] = v
 
 
-@register_model('xlmr')
-class XLMRModel(RobertaModel):
-    @classmethod
-    def hub_models(cls):
-        return {
-            'xlmr.base': 'http://dl.fbaipublicfiles.com/fairseq/models/xlmr.base.tar.gz',
-            'xlmr.large': 'http://dl.fbaipublicfiles.com/fairseq/models/xlmr.large.tar.gz',
-        }
-
-    @classmethod
-    def from_pretrained(cls, model_name_or_path, checkpoint_file='model.pt', data_name_or_path='.', bpe='sentencepiece', **kwargs):
-        from fairseq import hub_utils
-        x = hub_utils.from_pretrained(
-            model_name_or_path,
-            checkpoint_file,
-            data_name_or_path,
-            archive_map=cls.hub_models(),
-            bpe=bpe,
-            load_checkpoint_heads=True,
-            **kwargs,
-        )
-        return RobertaHubInterface(x['args'], x['task'], x['models'][0])
-
-
-@register_model('camembert')
-class CamembertModel(RobertaModel):
-    @classmethod
-    def hub_models(cls):
-        return {
-            'camembert.v0': 'http://dl.fbaipublicfiles.com/fairseq/models/camembert.v0.tar.gz',
-        }
-
-    @classmethod
-    def from_pretrained(cls, model_name_or_path, checkpoint_file='model.pt', data_name_or_path='.', bpe='sentencepiece', **kwargs):
-        from fairseq import hub_utils
-        x = hub_utils.from_pretrained(
-            model_name_or_path,
-            checkpoint_file,
-            data_name_or_path,
-            archive_map=cls.hub_models(),
-            bpe=bpe,
-            load_checkpoint_heads=True,
-            **kwargs,
-        )
-        return RobertaHubInterface(x['args'], x['task'], x['models'][0])
-
-
 class RobertaLMHead(nn.Module):
     """Head for masked language modeling."""
 
@@ -413,5 +366,4 @@ def xlm_architecture(args):
     args.encoder_embed_dim = getattr(args, 'encoder_embed_dim', 1280)
     args.encoder_ffn_embed_dim = getattr(args, 'encoder_ffn_embed_dim', 1280*4)
     args.encoder_attention_heads = getattr(args, 'encoder_attention_heads', 16)
-
     base_architecture(args)
diff --git a/fairseq/models/roberta/model_camembert.py b/fairseq/models/roberta/model_camembert.py
new file mode 100644
index 0000000000..b62e3e3197
--- /dev/null
+++ b/fairseq/models/roberta/model_camembert.py
@@ -0,0 +1,36 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+CamemBERT: a Tasty French Language Model
+"""
+
+from fairseq.models import register_model
+
+from .hub_interface import RobertaHubInterface
+from .model import RobertaModel
+
+
+@register_model('camembert')
+class CamembertModel(RobertaModel):
+
+    @classmethod
+    def hub_models(cls):
+        return {
+            'camembert.v0': 'http://dl.fbaipublicfiles.com/fairseq/models/camembert.v0.tar.gz',
+        }
+
+    @classmethod
+    def from_pretrained(cls, model_name_or_path, checkpoint_file='model.pt', data_name_or_path='.', bpe='sentencepiece', **kwargs):
+        from fairseq import hub_utils
+        x = hub_utils.from_pretrained(
+            model_name_or_path,
+            checkpoint_file,
+            data_name_or_path,
+            archive_map=cls.hub_models(),
+            bpe=bpe,
+            load_checkpoint_heads=True,
+            **kwargs,
+        )
+        return RobertaHubInterface(x['args'], x['task'], x['models'][0])
diff --git a/fairseq/models/roberta/model_xlmr.py b/fairseq/models/roberta/model_xlmr.py
new file mode 100644
index 0000000000..fa71a27d12
--- /dev/null
+++ b/fairseq/models/roberta/model_xlmr.py
@@ -0,0 +1,37 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Unsupervised Cross-lingual Representation Learning at Scale
+"""
+
+from fairseq.models import register_model
+
+from .hub_interface import RobertaHubInterface
+from .model import RobertaModel
+
+
+@register_model('xlmr')
+class XLMRModel(RobertaModel):
+
+    @classmethod
+    def hub_models(cls):
+        return {
+            'xlmr.base': 'http://dl.fbaipublicfiles.com/fairseq/models/xlmr.base.tar.gz',
+            'xlmr.large': 'http://dl.fbaipublicfiles.com/fairseq/models/xlmr.large.tar.gz',
+        }
+
+    @classmethod
+    def from_pretrained(cls, model_name_or_path, checkpoint_file='model.pt', data_name_or_path='.', bpe='sentencepiece', **kwargs):
+        from fairseq import hub_utils
+        x = hub_utils.from_pretrained(
+            model_name_or_path,
+            checkpoint_file,
+            data_name_or_path,
+            archive_map=cls.hub_models(),
+            bpe=bpe,
+            load_checkpoint_heads=True,
+            **kwargs,
+        )
+        return RobertaHubInterface(x['args'], x['task'], x['models'][0])