diff --git a/README.md b/README.md index b800f42..1477cd1 100644 --- a/README.md +++ b/README.md @@ -159,7 +159,7 @@ python finetune/finetune.py \ The size of the SE dataset is better manageable when using streaming. We also have to precise the split of the dataset that is used. For more details, check the [dataset's page](https://huggingface.co/datasets/ArmelR/stack-exchange-instruction) on 🤗. Similarly we can modify the command to account for the availability of GPUs ```bash -python -m torch.distributed.launch \ +python -m torch.distributed.run \ --nproc_per_node number_of_gpus finetune/finetune.py \ --model_path="bigcode/starcoder"\ --dataset_name="ArmelR/stack-exchange-instruction"\ diff --git a/finetune/finetune.py b/finetune/finetune.py index 96ab961..a7f6ac9 100644 --- a/finetune/finetune.py +++ b/finetune/finetune.py @@ -76,7 +76,6 @@ def get_args(): parser.add_argument("--num_warmup_steps", type=int, default=100) parser.add_argument("--weight_decay", type=float, default=0.05) - parser.add_argument("--local_rank", type=int, default=0) parser.add_argument("--no_fp16", action="store_false") parser.add_argument("--bf16", action="store_true", default=True) parser.add_argument("--no_gradient_checkpointing", action="store_false", default=False) @@ -309,4 +308,4 @@ def main(args): logging.set_verbosity_error() - main(args) \ No newline at end of file + main(args)