diff --git a/examples/async_bin_relocation_fwbw_drq/async_drq_randomized.py b/examples/async_bin_relocation_fwbw_drq/async_drq_randomized.py index d7a009cd..c5fb76e0 100644 --- a/examples/async_bin_relocation_fwbw_drq/async_drq_randomized.py +++ b/examples/async_bin_relocation_fwbw_drq/async_drq_randomized.py @@ -49,7 +49,7 @@ flags.DEFINE_integer("seed", 42, "Random seed.") flags.DEFINE_bool("save_model", False, "Whether to save model.") flags.DEFINE_integer("batch_size", 256, "Batch size.") -flags.DEFINE_integer("utd_ratio", 4, "UTD ratio.") +flags.DEFINE_integer("critic_actor_ratio", 4, "critic to actor update ratio.") flags.DEFINE_integer("max_steps", 1000000, "Maximum number of training steps.") flags.DEFINE_integer("replay_buffer_capacity", 200000, "Replay buffer capacity.") @@ -374,7 +374,7 @@ def stats_callback(type: str, payload: dict) -> dict: continue # run n-1 critic updates and 1 critic + actor update. # This makes training on GPU faster by reducing the large batch transfer time from CPU to GPU - for critic_step in range(FLAGS.utd_ratio - 1): + for critic_step in range(FLAGS.critic_actor_ratio - 1): with timer.context("sample_replay_buffer"): batch = next(replay_iterator) demo_batch = next(demo_iterator) diff --git a/examples/async_bin_relocation_fwbw_drq/run_actor.sh b/examples/async_bin_relocation_fwbw_drq/run_actor.sh index 380a427c..a887cd2a 100644 --- a/examples/async_bin_relocation_fwbw_drq/run_actor.sh +++ b/examples/async_bin_relocation_fwbw_drq/run_actor.sh @@ -8,7 +8,7 @@ python async_drq_randomized.py "$@" \ --seed 0 \ --random_steps 200 \ --training_starts 200 \ - --utd_ratio 4 \ + --critic_actor_ratio 4 \ --batch_size 256 \ --eval_period 2000 \ --encoder_type resnet-pretrained \ diff --git a/examples/async_bin_relocation_fwbw_drq/run_bw_learner.sh b/examples/async_bin_relocation_fwbw_drq/run_bw_learner.sh index 683471c4..ade37842 100644 --- a/examples/async_bin_relocation_fwbw_drq/run_bw_learner.sh +++ b/examples/async_bin_relocation_fwbw_drq/run_bw_learner.sh @@ -7,7 +7,7 @@ python async_drq_randomized.py "$@" \ --seed 0 \ --random_steps 200 \ --training_starts 200 \ - --utd_ratio 4 \ + --critic_actor_ratio 4 \ --batch_size 256 \ --eval_period 2000 \ --encoder_type resnet-pretrained \ diff --git a/examples/async_bin_relocation_fwbw_drq/run_fw_learner.sh b/examples/async_bin_relocation_fwbw_drq/run_fw_learner.sh index e5b8a563..67d16533 100644 --- a/examples/async_bin_relocation_fwbw_drq/run_fw_learner.sh +++ b/examples/async_bin_relocation_fwbw_drq/run_fw_learner.sh @@ -7,7 +7,7 @@ python async_drq_randomized.py "$@" \ --seed 0 \ --random_steps 200 \ --training_starts 200 \ - --utd_ratio 4 \ + --critic_actor_ratio 4 \ --batch_size 256 \ --eval_period 2000 \ --encoder_type resnet-pretrained \ diff --git a/examples/async_cable_route_drq/async_drq_randomized.py b/examples/async_cable_route_drq/async_drq_randomized.py index e94ea72b..e3a3b1a7 100644 --- a/examples/async_cable_route_drq/async_drq_randomized.py +++ b/examples/async_cable_route_drq/async_drq_randomized.py @@ -49,7 +49,7 @@ flags.DEFINE_integer("seed", 42, "Random seed.") flags.DEFINE_bool("save_model", False, "Whether to save model.") flags.DEFINE_integer("batch_size", 256, "Batch size.") -flags.DEFINE_integer("utd_ratio", 4, "UTD ratio.") +flags.DEFINE_integer("critic_actor_ratio", 4, "critic to actor update ratio.") flags.DEFINE_integer("max_steps", 1000000, "Maximum number of training steps.") flags.DEFINE_integer("replay_buffer_capacity", 200000, "Replay buffer capacity.") @@ -285,7 +285,7 @@ def stats_callback(type: str, payload: dict) -> dict: for step in tqdm.tqdm(range(FLAGS.max_steps), dynamic_ncols=True, desc="learner"): # run n-1 critic updates and 1 critic + actor update. # This makes training on GPU faster by reducing the large batch transfer time from CPU to GPU - for critic_step in range(FLAGS.utd_ratio - 1): + for critic_step in range(FLAGS.critic_actor_ratio - 1): with timer.context("sample_replay_buffer"): batch = next(replay_iterator) demo_batch = next(demo_iterator) diff --git a/examples/async_cable_route_drq/run_actor.sh b/examples/async_cable_route_drq/run_actor.sh index f6531b01..eb47a1cc 100644 --- a/examples/async_cable_route_drq/run_actor.sh +++ b/examples/async_cable_route_drq/run_actor.sh @@ -8,7 +8,7 @@ python async_drq_randomized.py "$@" \ --seed 0 \ --random_steps 0 \ --training_starts 200 \ - --utd_ratio 4 \ + --critic_actor_ratio 4 \ --batch_size 256 \ --eval_period 2000 \ --encoder_type resnet-pretrained \ diff --git a/examples/async_cable_route_drq/run_learner.sh b/examples/async_cable_route_drq/run_learner.sh index 2705bd17..a8355d9b 100644 --- a/examples/async_cable_route_drq/run_learner.sh +++ b/examples/async_cable_route_drq/run_learner.sh @@ -7,7 +7,7 @@ python async_drq_randomized.py "$@" \ --seed 0 \ --random_steps 600 \ --training_starts 200 \ - --utd_ratio 4 \ + --critic_actor_ratio 4 \ --batch_size 256 \ --eval_period 2000 \ --encoder_type resnet-pretrained \ diff --git a/examples/async_drq_sim/async_drq_sim.py b/examples/async_drq_sim/async_drq_sim.py index e8f7698f..29f55ba0 100644 --- a/examples/async_drq_sim/async_drq_sim.py +++ b/examples/async_drq_sim/async_drq_sim.py @@ -45,7 +45,7 @@ flags.DEFINE_integer("seed", 42, "Random seed.") flags.DEFINE_bool("save_model", False, "Whether to save model.") flags.DEFINE_integer("batch_size", 256, "Batch size.") -flags.DEFINE_integer("utd_ratio", 4, "UTD ratio.") +flags.DEFINE_integer("critic_actor_ratio", 4, "critic to actor update ratio.") flags.DEFINE_integer("max_steps", 1000000, "Maximum number of training steps.") flags.DEFINE_integer("replay_buffer_capacity", 200000, "Replay buffer capacity.") @@ -258,7 +258,7 @@ def stats_callback(type: str, payload: dict) -> dict: for step in tqdm.tqdm(range(FLAGS.max_steps), dynamic_ncols=True, desc="learner"): # run n-1 critic updates and 1 critic + actor update. # This makes training on GPU faster by reducing the large batch transfer time from CPU to GPU - for critic_step in range(FLAGS.utd_ratio - 1): + for critic_step in range(FLAGS.critic_actor_ratio - 1): with timer.context("sample_replay_buffer"): batch = next(replay_iterator) diff --git a/examples/async_drq_sim/run_actor.sh b/examples/async_drq_sim/run_actor.sh index e7c187d6..1cf4557a 100644 --- a/examples/async_drq_sim/run_actor.sh +++ b/examples/async_drq_sim/run_actor.sh @@ -7,7 +7,7 @@ python async_drq_sim.py "$@" \ --seed 0 \ --random_steps 1000 \ --training_starts 1000 \ - --utd_ratio 4 \ + --critic_actor_ratio 4 \ --batch_size 256 \ --eval_period 2000 \ --encoder_type resnet-pretrained \ diff --git a/examples/async_drq_sim/run_learner.sh b/examples/async_drq_sim/run_learner.sh index 08fbf923..4836e6c9 100644 --- a/examples/async_drq_sim/run_learner.sh +++ b/examples/async_drq_sim/run_learner.sh @@ -6,7 +6,7 @@ python async_drq_sim.py "$@" \ --seed 0 \ --random_steps 1000 \ --training_starts 1000 \ - --utd_ratio 4 \ + --critic_actor_ratio 4 \ --eval_period 2000 \ --encoder_type resnet-pretrained \ # --demo_path franka_lift_cube_image_20_trajs.pkl \ diff --git a/examples/async_pcb_insert_drq/async_drq_randomized.py b/examples/async_pcb_insert_drq/async_drq_randomized.py index 99f4439a..9a1a0596 100644 --- a/examples/async_pcb_insert_drq/async_drq_randomized.py +++ b/examples/async_pcb_insert_drq/async_drq_randomized.py @@ -50,7 +50,7 @@ flags.DEFINE_integer("seed", 42, "Random seed.") flags.DEFINE_bool("save_model", False, "Whether to save model.") flags.DEFINE_integer("batch_size", 256, "Batch size.") -flags.DEFINE_integer("utd_ratio", 4, "UTD ratio.") +flags.DEFINE_integer("critic_actor_ratio", 4, "critic to actor update ratio.") flags.DEFINE_integer("max_steps", 1000000, "Maximum number of training steps.") flags.DEFINE_integer("replay_buffer_capacity", 200000, "Replay buffer capacity.") @@ -341,7 +341,7 @@ def stats_callback(type: str, payload: dict) -> dict: for step in tqdm.tqdm(range(FLAGS.max_steps), dynamic_ncols=True, desc="learner"): # run n-1 critic updates and 1 critic + actor update. # This makes training on GPU faster by reducing the large batch transfer time from CPU to GPU - for critic_step in range(FLAGS.utd_ratio - 1): + for critic_step in range(FLAGS.critic_actor_ratio - 1): with timer.context("sample_replay_buffer"): batch = next(replay_iterator) demo_batch = next(demo_iterator) diff --git a/examples/async_pcb_insert_drq/run_actor.sh b/examples/async_pcb_insert_drq/run_actor.sh index 904238e8..41596b0e 100644 --- a/examples/async_pcb_insert_drq/run_actor.sh +++ b/examples/async_pcb_insert_drq/run_actor.sh @@ -8,7 +8,7 @@ python async_drq_randomized.py "$@" \ --seed 0 \ --random_steps 0 \ --training_starts 200 \ - --utd_ratio 4 \ + --critic_actor_ratio 4 \ --batch_size 256 \ --eval_period 2000 \ --encoder_type resnet-pretrained \ diff --git a/examples/async_pcb_insert_drq/run_learner.sh b/examples/async_pcb_insert_drq/run_learner.sh index a930daef..ad47bfe4 100644 --- a/examples/async_pcb_insert_drq/run_learner.sh +++ b/examples/async_pcb_insert_drq/run_learner.sh @@ -7,7 +7,7 @@ python async_drq_randomized.py "$@" \ --seed 0 \ --random_steps 1000 \ --training_starts 200 \ - --utd_ratio 4 \ + --critic_actor_ratio 4 \ --batch_size 256 \ --eval_period 2000 \ --encoder_type resnet-pretrained \ diff --git a/examples/async_peg_insert_drq/async_drq_randomized.py b/examples/async_peg_insert_drq/async_drq_randomized.py index 9331698f..81cc4a8f 100644 --- a/examples/async_peg_insert_drq/async_drq_randomized.py +++ b/examples/async_peg_insert_drq/async_drq_randomized.py @@ -45,8 +45,7 @@ flags.DEFINE_integer("max_traj_length", 100, "Maximum length of trajectory.") flags.DEFINE_integer("seed", 42, "Random seed.") flags.DEFINE_bool("save_model", False, "Whether to save model.") -flags.DEFINE_integer("batch_size", 256, "Batch size.") -flags.DEFINE_integer("utd_ratio", 4, "UTD ratio.") +flags.DEFINE_integer("critic_actor_ratio", 4, "critic to actor update ratio.") flags.DEFINE_integer("max_steps", 1000000, "Maximum number of training steps.") flags.DEFINE_integer("replay_buffer_capacity", 200000, "Replay buffer capacity.") @@ -276,7 +275,7 @@ def stats_callback(type: str, payload: dict) -> dict: for step in tqdm.tqdm(range(FLAGS.max_steps), dynamic_ncols=True, desc="learner"): # run n-1 critic updates and 1 critic + actor update. # This makes training on GPU faster by reducing the large batch transfer time from CPU to GPU - for critic_step in range(FLAGS.utd_ratio - 1): + for critic_step in range(FLAGS.critic_actor_ratio - 1): with timer.context("sample_replay_buffer"): batch = next(replay_iterator) demo_batch = next(demo_iterator) diff --git a/examples/async_peg_insert_drq/run_actor.sh b/examples/async_peg_insert_drq/run_actor.sh index 3e82387e..9b34b2a3 100644 --- a/examples/async_peg_insert_drq/run_actor.sh +++ b/examples/async_peg_insert_drq/run_actor.sh @@ -8,7 +8,7 @@ python async_drq_randomized.py "$@" \ --seed 0 \ --random_steps 0 \ --training_starts 200 \ - --utd_ratio 4 \ + --critic_actor_ratio 4 \ --batch_size 256 \ --eval_period 2000 \ --encoder_type resnet-pretrained \ diff --git a/examples/async_peg_insert_drq/run_learner.sh b/examples/async_peg_insert_drq/run_learner.sh index 6d7e047f..c2823a19 100644 --- a/examples/async_peg_insert_drq/run_learner.sh +++ b/examples/async_peg_insert_drq/run_learner.sh @@ -7,7 +7,7 @@ python async_drq_randomized.py "$@" \ --seed 0 \ --random_steps 1000 \ --training_starts 200 \ - --utd_ratio 4 \ + --critic_actor_ratio 4 \ --batch_size 256 \ --eval_period 2000 \ --encoder_type resnet-pretrained \ diff --git a/examples/async_sac_state_sim/async_sac_state_sim.py b/examples/async_sac_state_sim/async_sac_state_sim.py index ecf81bfb..5994208e 100644 --- a/examples/async_sac_state_sim/async_sac_state_sim.py +++ b/examples/async_sac_state_sim/async_sac_state_sim.py @@ -36,7 +36,7 @@ flags.DEFINE_integer("seed", 42, "Random seed.") flags.DEFINE_bool("save_model", False, "Whether to save model.") flags.DEFINE_integer("batch_size", 256, "Batch size.") -flags.DEFINE_integer("utd_ratio", 8, "UTD ratio.") +flags.DEFINE_integer("critic_actor_ratio", 8, "critic to actor update ratio.") flags.DEFINE_integer("max_steps", 1000000, "Maximum number of training steps.") flags.DEFINE_integer("replay_buffer_capacity", 1000000, "Replay buffer capacity.") @@ -284,7 +284,7 @@ def main(_): ) replay_iterator = replay_buffer.get_iterator( sample_args={ - "batch_size": FLAGS.batch_size * FLAGS.utd_ratio, + "batch_size": FLAGS.batch_size * FLAGS.critic_actor_ratio, }, device=sharding.replicate(), ) diff --git a/examples/async_sac_state_sim/run_actor.sh b/examples/async_sac_state_sim/run_actor.sh index 17730616..ce4ff879 100644 --- a/examples/async_sac_state_sim/run_actor.sh +++ b/examples/async_sac_state_sim/run_actor.sh @@ -8,7 +8,7 @@ python async_sac_state_sim.py "$@" \ --seed 0 \ --random_steps 1000 \ --training_starts 1000 \ - --utd_ratio 8 \ + --critic_actor_ratio 8 \ --batch_size 256 \ --eval_period 2000 \ --debug diff --git a/examples/async_sac_state_sim/run_learner.sh b/examples/async_sac_state_sim/run_learner.sh index 98715d16..f5a2cb83 100644 --- a/examples/async_sac_state_sim/run_learner.sh +++ b/examples/async_sac_state_sim/run_learner.sh @@ -7,7 +7,7 @@ python async_sac_state_sim.py "$@" \ --seed 0 \ --random_steps 1000 \ --training_starts 1000 \ - --utd_ratio 8 \ + --critic_actor_ratio 8 \ --batch_size 256 \ --eval_period 2000 \ --debug # wandb is disabled when debug