diff --git a/examples/language_model/README.md b/examples/language_model/README.md index d598cd0e20..4b041146e3 100644 --- a/examples/language_model/README.md +++ b/examples/language_model/README.md @@ -82,7 +82,7 @@ $ fairseq-train --task language_modeling data-bin/wikitext-103 \ --max-epoch 35 --arch fconv_lm_dauphin_wikitext103 --optimizer nag \ --lr 1.0 --lr-scheduler reduce_lr_on_plateau --lr-shrink 0.5 \ --clip-norm 0.1 --dropout 0.2 --weight-decay 5e-06 --criterion adaptive_loss \ - --adaptive-softmax-cutoff 10000,20000,200000 --max-tokens 1024 --tokens-per-sample 1024 + --adaptive-softmax-cutoff 10000,20000,200000 --max-tokens 1024 --tokens-per-sample 1024 \ --ddp-backend=no_c10d # Evaluate: