Skip to content

Commit 17d3df6

Browse files
committed
use torchrun instead of torch.distributed.launch
torch.distributed.launch deprecated
1 parent 1458729 commit 17d3df6

21 files changed

+124
-125
lines changed

argparser.py

-1
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ def get_argparser():
7777
parser = argparse.ArgumentParser()
7878

7979
# Performance Options
80-
parser.add_argument("--local_rank", type=int, default=0)
8180
parser.add_argument("--random_seed", type=int, default=42, help="random seed (default: 42)")
8281
parser.add_argument("--num_workers", type=int, default=4, help='number of workers (default: 1)')
8382

run.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ def get_dataset(opts):
135135

136136
def main(opts):
137137
dist.init_process_group(backend='nccl', init_method='env://')
138-
device_id, device = opts.local_rank, torch.device(opts.local_rank)
138+
device_id, device = int(os.environ['LOCAL_RANK']), torch.device(int(os.environ['LOCAL_RANK']))
139139
rank, world_size = dist.get_rank(), dist.get_world_size()
140140
torch.cuda.set_device(device_id)
141141

scripts/ade/joint.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ echo "Writing in ${RESULTSFILE}"
3131
BATCH_SIZE=8
3232
EPOCHS=60
3333

34-
CUDA_VISIBLE_DEVICES=${GPU} python3 -m torch.distributed.launch --master_port ${PORT} --nproc_per_node=${NB_GPU} run.py --date ${START_DATE} --data_root ${DATA_ROOT} --overlap --batch_size ${BATCH_SIZE} --dataset ${DATASET} --name ${NAME} --task ${TASK} --lr 0.02 --epochs ${EPOCHS} --method ${METHOD} --opt_level O1 ${OPTIONS}
34+
CUDA_VISIBLE_DEVICES=${GPU} torchrun --master_port ${PORT} --nproc_per_node=${NB_GPU} run.py --date ${START_DATE} --data_root ${DATA_ROOT} --overlap --batch_size ${BATCH_SIZE} --dataset ${DATASET} --name ${NAME} --task ${TASK} --lr 0.02 --epochs ${EPOCHS} --method ${METHOD} --opt_level O1 ${OPTIONS}
3535

3636
echo ${SCREENNAME}
3737

scripts/ade/lgkd_ade_100-10.sh

+6-6
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,12 @@ EPOCHS=60
4444
PREV_KD=1
4545
NOVEL_KD=1
4646

47-
CUDA_VISIBLE_DEVICES=${GPU} python3 -m torch.distributed.launch --master_port ${PORT} --nproc_per_node=${NB_GPU} run.py --date ${START_DATE} --data_root ${DATA_ROOT} --overlap --batch_size ${INITIAL_BATCH_SIZE} --dataset ${DATASET} --name ${NAME} --task ${TASK} --step 0 --lr 0.02 --epochs ${INITIAL_EPOCHS} --method ${METHOD} --opt_level O1 ${OPTIONS}
48-
CUDA_VISIBLE_DEVICES=${GPU} python3 -m torch.distributed.launch --master_port ${PORT} --nproc_per_node=${NB_GPU} run.py --date ${START_DATE} --data_root ${DATA_ROOT} --overlap --batch_size ${BATCH_SIZE} --dataset ${DATASET} --name ${NAME} --task ${TASK} --step 1 --lr 0.002 --epochs ${EPOCHS} --method ${METHOD} --opt_level O1 ${OPTIONS} --prev_kd ${PREV_KD} --novel_kd ${NOVEL_KD}
49-
CUDA_VISIBLE_DEVICES=${GPU} python3 -m torch.distributed.launch --master_port ${PORT} --nproc_per_node=${NB_GPU} run.py --date ${START_DATE} --data_root ${DATA_ROOT} --overlap --batch_size ${BATCH_SIZE} --dataset ${DATASET} --name ${NAME} --task ${TASK} --step 2 --lr 0.002 --epochs ${EPOCHS} --method ${METHOD} --opt_level O1 ${OPTIONS} --prev_kd ${PREV_KD} --novel_kd ${NOVEL_KD}
50-
CUDA_VISIBLE_DEVICES=${GPU} python3 -m torch.distributed.launch --master_port ${PORT} --nproc_per_node=${NB_GPU} run.py --date ${START_DATE} --data_root ${DATA_ROOT} --overlap --batch_size ${BATCH_SIZE} --dataset ${DATASET} --name ${NAME} --task ${TASK} --step 3 --lr 0.002 --epochs ${EPOCHS} --method ${METHOD} --opt_level O1 ${OPTIONS} --prev_kd ${PREV_KD} --novel_kd ${NOVEL_KD}
51-
CUDA_VISIBLE_DEVICES=${GPU} python3 -m torch.distributed.launch --master_port ${PORT} --nproc_per_node=${NB_GPU} run.py --date ${START_DATE} --data_root ${DATA_ROOT} --overlap --batch_size ${BATCH_SIZE} --dataset ${DATASET} --name ${NAME} --task ${TASK} --step 4 --lr 0.002 --epochs ${EPOCHS} --method ${METHOD} --opt_level O1 ${OPTIONS} --prev_kd ${PREV_KD} --novel_kd ${NOVEL_KD}
52-
CUDA_VISIBLE_DEVICES=${GPU} python3 -m torch.distributed.launch --master_port ${PORT} --nproc_per_node=${NB_GPU} run.py --date ${START_DATE} --data_root ${DATA_ROOT} --overlap --batch_size ${BATCH_SIZE} --dataset ${DATASET} --name ${NAME} --task ${TASK} --step 5 --lr 0.002 --epochs ${EPOCHS} --method ${METHOD} --opt_level O1 ${OPTIONS} --prev_kd ${PREV_KD} --novel_kd ${NOVEL_KD}
47+
CUDA_VISIBLE_DEVICES=${GPU} torchrun --master_port ${PORT} --nproc_per_node=${NB_GPU} run.py --date ${START_DATE} --data_root ${DATA_ROOT} --overlap --batch_size ${INITIAL_BATCH_SIZE} --dataset ${DATASET} --name ${NAME} --task ${TASK} --step 0 --lr 0.02 --epochs ${INITIAL_EPOCHS} --method ${METHOD} --opt_level O1 ${OPTIONS}
48+
CUDA_VISIBLE_DEVICES=${GPU} torchrun --master_port ${PORT} --nproc_per_node=${NB_GPU} run.py --date ${START_DATE} --data_root ${DATA_ROOT} --overlap --batch_size ${BATCH_SIZE} --dataset ${DATASET} --name ${NAME} --task ${TASK} --step 1 --lr 0.002 --epochs ${EPOCHS} --method ${METHOD} --opt_level O1 ${OPTIONS} --prev_kd ${PREV_KD} --novel_kd ${NOVEL_KD}
49+
CUDA_VISIBLE_DEVICES=${GPU} torchrun --master_port ${PORT} --nproc_per_node=${NB_GPU} run.py --date ${START_DATE} --data_root ${DATA_ROOT} --overlap --batch_size ${BATCH_SIZE} --dataset ${DATASET} --name ${NAME} --task ${TASK} --step 2 --lr 0.002 --epochs ${EPOCHS} --method ${METHOD} --opt_level O1 ${OPTIONS} --prev_kd ${PREV_KD} --novel_kd ${NOVEL_KD}
50+
CUDA_VISIBLE_DEVICES=${GPU} torchrun --master_port ${PORT} --nproc_per_node=${NB_GPU} run.py --date ${START_DATE} --data_root ${DATA_ROOT} --overlap --batch_size ${BATCH_SIZE} --dataset ${DATASET} --name ${NAME} --task ${TASK} --step 3 --lr 0.002 --epochs ${EPOCHS} --method ${METHOD} --opt_level O1 ${OPTIONS} --prev_kd ${PREV_KD} --novel_kd ${NOVEL_KD}
51+
CUDA_VISIBLE_DEVICES=${GPU} torchrun --master_port ${PORT} --nproc_per_node=${NB_GPU} run.py --date ${START_DATE} --data_root ${DATA_ROOT} --overlap --batch_size ${BATCH_SIZE} --dataset ${DATASET} --name ${NAME} --task ${TASK} --step 4 --lr 0.002 --epochs ${EPOCHS} --method ${METHOD} --opt_level O1 ${OPTIONS} --prev_kd ${PREV_KD} --novel_kd ${NOVEL_KD}
52+
CUDA_VISIBLE_DEVICES=${GPU} torchrun --master_port ${PORT} --nproc_per_node=${NB_GPU} run.py --date ${START_DATE} --data_root ${DATA_ROOT} --overlap --batch_size ${BATCH_SIZE} --dataset ${DATASET} --name ${NAME} --task ${TASK} --step 5 --lr 0.002 --epochs ${EPOCHS} --method ${METHOD} --opt_level O1 ${OPTIONS} --prev_kd ${PREV_KD} --novel_kd ${NOVEL_KD}
5353
python3 average_csv.py ${RESULTSFILE}
5454

5555
echo ${SCREENNAME}

scripts/ade/lgkd_ade_100-50.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@ EPOCHS=60
4444
PREV_KD=1
4545
NOVEL_KD=20
4646

47-
CUDA_VISIBLE_DEVICES=${GPU} python3 -m torch.distributed.launch --master_port ${PORT} --nproc_per_node=${NB_GPU} run.py --date ${START_DATE} --data_root ${DATA_ROOT} --overlap --batch_size ${INITIAL_BATCH_SIZE} --dataset ${DATASET} --name ${NAME} --task ${TASK} --step 0 --lr 0.02 --epochs ${INITIAL_EPOCHS} --method ${METHOD} --opt_level O1 ${OPTIONS}
48-
CUDA_VISIBLE_DEVICES=${GPU} python3 -m torch.distributed.launch --master_port ${PORT} --nproc_per_node=${NB_GPU} run.py --date ${START_DATE} --data_root ${DATA_ROOT} --overlap --batch_size ${BATCH_SIZE} --dataset ${DATASET} --name ${NAME} --task ${TASK} --step 1 --lr 0.002 --epochs ${EPOCHS} --method ${METHOD} --opt_level O1 ${OPTIONS} --prev_kd ${PREV_KD} --novel_kd ${NOVEL_KD}
47+
CUDA_VISIBLE_DEVICES=${GPU} torchrun --master_port ${PORT} --nproc_per_node=${NB_GPU} run.py --date ${START_DATE} --data_root ${DATA_ROOT} --overlap --batch_size ${INITIAL_BATCH_SIZE} --dataset ${DATASET} --name ${NAME} --task ${TASK} --step 0 --lr 0.02 --epochs ${INITIAL_EPOCHS} --method ${METHOD} --opt_level O1 ${OPTIONS}
48+
CUDA_VISIBLE_DEVICES=${GPU} torchrun --master_port ${PORT} --nproc_per_node=${NB_GPU} run.py --date ${START_DATE} --data_root ${DATA_ROOT} --overlap --batch_size ${BATCH_SIZE} --dataset ${DATASET} --name ${NAME} --task ${TASK} --step 1 --lr 0.002 --epochs ${EPOCHS} --method ${METHOD} --opt_level O1 ${OPTIONS} --prev_kd ${PREV_KD} --novel_kd ${NOVEL_KD}
4949
python3 average_csv.py ${RESULTSFILE}
5050

5151
echo ${SCREENNAME}

scripts/ade/lgkd_ade_50.sh

+3-3
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,9 @@ EPOCHS=60
4444
PREV_KD=2
4545
NOVEL_KD=10
4646

47-
CUDA_VISIBLE_DEVICES=${GPU} python3 -m torch.distributed.launch --master_port ${PORT} --nproc_per_node=${NB_GPU} run.py --date ${START_DATE} --data_root ${DATA_ROOT} --overlap --batch_size ${INITIAL_BATCH_SIZE} --dataset ${DATASET} --name ${NAME} --task ${TASK} --step 0 --lr 0.02 --epochs ${INITIAL_EPOCHS} --method ${METHOD} --opt_level O1 ${OPTIONS}
48-
CUDA_VISIBLE_DEVICES=${GPU} python3 -m torch.distributed.launch --master_port ${PORT} --nproc_per_node=${NB_GPU} run.py --date ${START_DATE} --data_root ${DATA_ROOT} --overlap --batch_size ${BATCH_SIZE} --dataset ${DATASET} --name ${NAME} --task ${TASK} --step 1 --lr 0.002 --epochs ${EPOCHS} --method ${METHOD} --opt_level O1 ${OPTIONS} --prev_kd ${PREV_KD} --novel_kd ${NOVEL_KD}
49-
CUDA_VISIBLE_DEVICES=${GPU} python3 -m torch.distributed.launch --master_port ${PORT} --nproc_per_node=${NB_GPU} run.py --date ${START_DATE} --data_root ${DATA_ROOT} --overlap --batch_size ${BATCH_SIZE} --dataset ${DATASET} --name ${NAME} --task ${TASK} --step 2 --lr 0.002 --epochs ${EPOCHS} --method ${METHOD} --opt_level O1 ${OPTIONS} --prev_kd ${PREV_KD} --novel_kd ${NOVEL_KD}
47+
CUDA_VISIBLE_DEVICES=${GPU} torchrun --master_port ${PORT} --nproc_per_node=${NB_GPU} run.py --date ${START_DATE} --data_root ${DATA_ROOT} --overlap --batch_size ${INITIAL_BATCH_SIZE} --dataset ${DATASET} --name ${NAME} --task ${TASK} --step 0 --lr 0.02 --epochs ${INITIAL_EPOCHS} --method ${METHOD} --opt_level O1 ${OPTIONS}
48+
CUDA_VISIBLE_DEVICES=${GPU} torchrun --master_port ${PORT} --nproc_per_node=${NB_GPU} run.py --date ${START_DATE} --data_root ${DATA_ROOT} --overlap --batch_size ${BATCH_SIZE} --dataset ${DATASET} --name ${NAME} --task ${TASK} --step 1 --lr 0.002 --epochs ${EPOCHS} --method ${METHOD} --opt_level O1 ${OPTIONS} --prev_kd ${PREV_KD} --novel_kd ${NOVEL_KD}
49+
CUDA_VISIBLE_DEVICES=${GPU} torchrun --master_port ${PORT} --nproc_per_node=${NB_GPU} run.py --date ${START_DATE} --data_root ${DATA_ROOT} --overlap --batch_size ${BATCH_SIZE} --dataset ${DATASET} --name ${NAME} --task ${TASK} --step 2 --lr 0.002 --epochs ${EPOCHS} --method ${METHOD} --opt_level O1 ${OPTIONS} --prev_kd ${PREV_KD} --novel_kd ${NOVEL_KD}
5050
python3 average_csv.py ${RESULTSFILE}
5151

5252
echo ${SCREENNAME}

scripts/ade/plop_ade_100-10.sh

+6-6
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,12 @@ BATCH_SIZE=8
4242
INITIAL_EPOCHS=60
4343
EPOCHS=60
4444

45-
CUDA_VISIBLE_DEVICES=${GPU} python3 -m torch.distributed.launch --master_port ${PORT} --nproc_per_node=${NB_GPU} run.py --date ${START_DATE} --data_root ${DATA_ROOT} --overlap --batch_size ${INITIAL_BATCH_SIZE} --dataset ${DATASET} --name ${NAME} --task ${TASK} --step 0 --lr 0.02 --epochs ${INITIAL_EPOCHS} --method ${METHOD} --opt_level O1 ${OPTIONS}
46-
CUDA_VISIBLE_DEVICES=${GPU} python3 -m torch.distributed.launch --master_port ${PORT} --nproc_per_node=${NB_GPU} run.py --date ${START_DATE} --data_root ${DATA_ROOT} --overlap --batch_size ${BATCH_SIZE} --dataset ${DATASET} --name ${NAME} --task ${TASK} --step 1 --lr 0.002 --epochs ${EPOCHS} --method ${METHOD} --opt_level O1 ${OPTIONS}
47-
CUDA_VISIBLE_DEVICES=${GPU} python3 -m torch.distributed.launch --master_port ${PORT} --nproc_per_node=${NB_GPU} run.py --date ${START_DATE} --data_root ${DATA_ROOT} --overlap --batch_size ${BATCH_SIZE} --dataset ${DATASET} --name ${NAME} --task ${TASK} --step 2 --lr 0.002 --epochs ${EPOCHS} --method ${METHOD} --opt_level O1 ${OPTIONS}
48-
CUDA_VISIBLE_DEVICES=${GPU} python3 -m torch.distributed.launch --master_port ${PORT} --nproc_per_node=${NB_GPU} run.py --date ${START_DATE} --data_root ${DATA_ROOT} --overlap --batch_size ${BATCH_SIZE} --dataset ${DATASET} --name ${NAME} --task ${TASK} --step 3 --lr 0.002 --epochs ${EPOCHS} --method ${METHOD} --opt_level O1 ${OPTIONS}
49-
CUDA_VISIBLE_DEVICES=${GPU} python3 -m torch.distributed.launch --master_port ${PORT} --nproc_per_node=${NB_GPU} run.py --date ${START_DATE} --data_root ${DATA_ROOT} --overlap --batch_size ${BATCH_SIZE} --dataset ${DATASET} --name ${NAME} --task ${TASK} --step 4 --lr 0.002 --epochs ${EPOCHS} --method ${METHOD} --opt_level O1 ${OPTIONS}
50-
CUDA_VISIBLE_DEVICES=${GPU} python3 -m torch.distributed.launch --master_port ${PORT} --nproc_per_node=${NB_GPU} run.py --date ${START_DATE} --data_root ${DATA_ROOT} --overlap --batch_size ${BATCH_SIZE} --dataset ${DATASET} --name ${NAME} --task ${TASK} --step 5 --lr 0.002 --epochs ${EPOCHS} --method ${METHOD} --opt_level O1 ${OPTIONS}
45+
CUDA_VISIBLE_DEVICES=${GPU} torchrun --master_port ${PORT} --nproc_per_node=${NB_GPU} run.py --date ${START_DATE} --data_root ${DATA_ROOT} --overlap --batch_size ${INITIAL_BATCH_SIZE} --dataset ${DATASET} --name ${NAME} --task ${TASK} --step 0 --lr 0.02 --epochs ${INITIAL_EPOCHS} --method ${METHOD} --opt_level O1 ${OPTIONS}
46+
CUDA_VISIBLE_DEVICES=${GPU} torchrun --master_port ${PORT} --nproc_per_node=${NB_GPU} run.py --date ${START_DATE} --data_root ${DATA_ROOT} --overlap --batch_size ${BATCH_SIZE} --dataset ${DATASET} --name ${NAME} --task ${TASK} --step 1 --lr 0.002 --epochs ${EPOCHS} --method ${METHOD} --opt_level O1 ${OPTIONS}
47+
CUDA_VISIBLE_DEVICES=${GPU} torchrun --master_port ${PORT} --nproc_per_node=${NB_GPU} run.py --date ${START_DATE} --data_root ${DATA_ROOT} --overlap --batch_size ${BATCH_SIZE} --dataset ${DATASET} --name ${NAME} --task ${TASK} --step 2 --lr 0.002 --epochs ${EPOCHS} --method ${METHOD} --opt_level O1 ${OPTIONS}
48+
CUDA_VISIBLE_DEVICES=${GPU} torchrun --master_port ${PORT} --nproc_per_node=${NB_GPU} run.py --date ${START_DATE} --data_root ${DATA_ROOT} --overlap --batch_size ${BATCH_SIZE} --dataset ${DATASET} --name ${NAME} --task ${TASK} --step 3 --lr 0.002 --epochs ${EPOCHS} --method ${METHOD} --opt_level O1 ${OPTIONS}
49+
CUDA_VISIBLE_DEVICES=${GPU} torchrun --master_port ${PORT} --nproc_per_node=${NB_GPU} run.py --date ${START_DATE} --data_root ${DATA_ROOT} --overlap --batch_size ${BATCH_SIZE} --dataset ${DATASET} --name ${NAME} --task ${TASK} --step 4 --lr 0.002 --epochs ${EPOCHS} --method ${METHOD} --opt_level O1 ${OPTIONS}
50+
CUDA_VISIBLE_DEVICES=${GPU} torchrun --master_port ${PORT} --nproc_per_node=${NB_GPU} run.py --date ${START_DATE} --data_root ${DATA_ROOT} --overlap --batch_size ${BATCH_SIZE} --dataset ${DATASET} --name ${NAME} --task ${TASK} --step 5 --lr 0.002 --epochs ${EPOCHS} --method ${METHOD} --opt_level O1 ${OPTIONS}
5151
python3 average_csv.py ${RESULTSFILE}
5252

5353
echo ${SCREENNAME}

0 commit comments

Comments
 (0)