Skip to content

Commit

Permalink
cache tweak
Browse files Browse the repository at this point in the history
  • Loading branch information
mwalmsley committed Nov 3, 2023
1 parent 6e0c87e commit da64982
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 5 deletions.
1 change: 1 addition & 0 deletions only_for_me/narval/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ astropy
pandas
matplotlib
fastparquet # replacing pyarrow below
webdataset
# below already required by packages above

# tqdm
Expand Down
5 changes: 3 additions & 2 deletions only_for_me/narval/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,15 +117,16 @@
color=args.color,
resize_after_crop=args.resize_after_crop,
# hardware parameters
gpus=args.gpus,
# gpus=args.gpus,
gpus=0,
nodes=args.nodes,
mixed_precision=args.mixed_precision,
wandb_logger=wandb_logger,
prefetch_factor=4,
num_workers=11, # system has 24 cpu, 12 cpu per gpu, leave a little wiggle room
random_state=random_state,
learning_rate=1e-3,
cache_dir=os.environ['SLURM_TMPDIR']
cache_dir=os.environ['SLURM_TMPDIR'] + '/cache'
)

wandb.finish()
4 changes: 1 addition & 3 deletions only_for_me/narval/train.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,7 @@ nvidia-smi

PYTHON=/home/walml/envs/zoobot39_dev/bin/python

cp -r /project/def-bovy/walml/data/roots/galaxy_mnist $SLURM_TMPDIR/walml/finetune/data/

ls $SLURM_TMPDIR/walml/finetune/data/galaxy_mnist
mkdir $SLURM_TMPDIR/cache

export NCCL_BLOCKING_WAIT=1 #Set this environment variable if you wish to use the NCCL backend for inter-GPU communication.
# export MASTER_ADDR=$(hostname) #Store the master node’s IP address in the MASTER_ADDR environment variable.
Expand Down

0 comments on commit da64982

Please sign in to comment.