From da64982964dfb48e693335320f032180ce710f76 Mon Sep 17 00:00:00 2001 From: Mike Walmsley Date: Fri, 3 Nov 2023 17:42:47 -0400 Subject: [PATCH] cache tweak --- only_for_me/narval/requirements.txt | 1 + only_for_me/narval/train.py | 5 +++-- only_for_me/narval/train.sh | 4 +--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/only_for_me/narval/requirements.txt b/only_for_me/narval/requirements.txt index 9f22e9df..0daeb251 100644 --- a/only_for_me/narval/requirements.txt +++ b/only_for_me/narval/requirements.txt @@ -12,6 +12,7 @@ astropy pandas matplotlib fastparquet # replacing pyarrow below +webdataset # below already required by packages above # tqdm diff --git a/only_for_me/narval/train.py b/only_for_me/narval/train.py index aac758f6..c90d67c2 100644 --- a/only_for_me/narval/train.py +++ b/only_for_me/narval/train.py @@ -117,7 +117,8 @@ color=args.color, resize_after_crop=args.resize_after_crop, # hardware parameters - gpus=args.gpus, + # gpus=args.gpus, + gpus=0, nodes=args.nodes, mixed_precision=args.mixed_precision, wandb_logger=wandb_logger, @@ -125,7 +126,7 @@ num_workers=11, # system has 24 cpu, 12 cpu per gpu, leave a little wiggle room random_state=random_state, learning_rate=1e-3, - cache_dir=os.environ['SLURM_TMPDIR'] + cache_dir=os.environ['SLURM_TMPDIR'] + '/cache' ) wandb.finish() \ No newline at end of file diff --git a/only_for_me/narval/train.sh b/only_for_me/narval/train.sh index 9ab4851b..1299b1da 100644 --- a/only_for_me/narval/train.sh +++ b/only_for_me/narval/train.sh @@ -11,9 +11,7 @@ nvidia-smi PYTHON=/home/walml/envs/zoobot39_dev/bin/python -cp -r /project/def-bovy/walml/data/roots/galaxy_mnist $SLURM_TMPDIR/walml/finetune/data/ - -ls $SLURM_TMPDIR/walml/finetune/data/galaxy_mnist +mkdir $SLURM_TMPDIR/cache export NCCL_BLOCKING_WAIT=1 #Set this environment variable if you wish to use the NCCL backend for inter-GPU communication. # export MASTER_ADDR=$(hostname) #Store the master node’s IP address in the MASTER_ADDR environment variable.