diff --git a/.vscode/tasks.json b/.vscode/tasks.json index f7896a038b4..967f8f9f3e4 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -4,7 +4,6 @@ "version": "2.0.0", "tasks": [ { - // setup python env "label": "setup_python_env", "type": "shell", "linux": { @@ -15,7 +14,6 @@ } }, { - // run formatter "label": "run_formatter", "type": "shell", "linux": { @@ -23,7 +21,8 @@ }, "windows": { "command": "${workspaceFolder}/isaaclab.bat --format" - } + }, + "problemMatcher": [] } ] } diff --git a/=1.4.2 b/=1.4.2 new file mode 100644 index 00000000000..9a4925bdea1 --- /dev/null +++ b/=1.4.2 @@ -0,0 +1,27 @@ +Requirement already satisfied: skrl in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (1.4.1) +Collecting skrl + Downloading skrl-1.4.2-py3-none-any.whl.metadata (4.8 kB) +Requirement already satisfied: gymnasium in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from skrl) (1.0.0) +Requirement already satisfied: packaging in /home/xuezhi/.local/lib/python3.10/site-packages (from skrl) (24.2) +Requirement already satisfied: tensorboard in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from skrl) (2.18.0) +Requirement already satisfied: tqdm in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from skrl) (4.67.1) +Requirement already satisfied: numpy>=1.21.0 in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from gymnasium->skrl) (1.26.4) +Requirement already satisfied: cloudpickle>=1.2.0 in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from gymnasium->skrl) (3.1.1) +Requirement already satisfied: typing-extensions>=4.3.0 in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from gymnasium->skrl) (4.12.2) +Requirement already satisfied: farama-notifications>=0.0.1 in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from gymnasium->skrl) (0.0.4) +Requirement already satisfied: absl-py>=0.4 in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from tensorboard->skrl) (2.1.0) +Requirement already satisfied: grpcio>=1.48.2 in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from tensorboard->skrl) (1.70.0) +Requirement already satisfied: markdown>=2.6.8 in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from tensorboard->skrl) (3.7) +Requirement already satisfied: protobuf!=4.24.0,>=3.19.6 in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from tensorboard->skrl) (3.20.3) +Requirement already satisfied: setuptools>=41.0.0 in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from tensorboard->skrl) (75.8.0) +Requirement already satisfied: six>1.9 in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from tensorboard->skrl) (1.17.0) +Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from tensorboard->skrl) (0.7.2) +Requirement already satisfied: werkzeug>=1.0.1 in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from tensorboard->skrl) (3.1.3) +Requirement already satisfied: MarkupSafe>=2.1.1 in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from werkzeug>=1.0.1->tensorboard->skrl) (3.0.2) +Downloading skrl-1.4.2-py3-none-any.whl (403 kB) +Installing collected packages: skrl + Attempting uninstall: skrl + Found existing installation: skrl 1.4.1 + Uninstalling skrl-1.4.1: + Successfully uninstalled skrl-1.4.1 +Successfully installed skrl-1.4.2 diff --git a/materials/textures/joint1.png b/materials/textures/joint1.png new file mode 100644 index 00000000000..4d46d2ae1bd Binary files /dev/null and b/materials/textures/joint1.png differ diff --git a/materials/textures/joint2.png b/materials/textures/joint2.png new file mode 100644 index 00000000000..ba2700b3522 Binary files /dev/null and b/materials/textures/joint2.png differ diff --git a/materials/textures/joint3.png b/materials/textures/joint3.png new file mode 100644 index 00000000000..ba2700b3522 Binary files /dev/null and b/materials/textures/joint3.png differ diff --git a/materials/textures/joint4.png b/materials/textures/joint4.png new file mode 100644 index 00000000000..ba2700b3522 Binary files /dev/null and b/materials/textures/joint4.png differ diff --git a/materials/textures/joint6.png b/materials/textures/joint6.png new file mode 100644 index 00000000000..406e3f77687 Binary files /dev/null and b/materials/textures/joint6.png differ diff --git a/materials/textures/joint7.png b/materials/textures/joint7.png new file mode 100644 index 00000000000..ed421fafe0d Binary files /dev/null and b/materials/textures/joint7.png differ diff --git a/scripts/demos/arms.py b/scripts/demos/arms.py index 7d80ae54efb..0a0e270ab2c 100644 --- a/scripts/demos/arms.py +++ b/scripts/demos/arms.py @@ -52,6 +52,7 @@ KINOVA_JACO2_N6S300_CFG, KINOVA_GEN3_N7_CFG, SAWYER_CFG, + RIDGEBACK_FRANKA_PANDA_CFG, ) # isort: on @@ -83,7 +84,7 @@ def design_scene() -> tuple[dict, list[list[float]]]: # Create separate groups called "Origin1", "Origin2", "Origin3" # Each group will have a mount and a robot on top of it - origins = define_origins(num_origins=6, spacing=2.0) + origins = define_origins(num_origins=7, spacing=2.0) # Origin 1 with Franka Panda prim_utils.create_prim("/World/Origin1", "Xform", translation=origins[0]) @@ -149,6 +150,14 @@ def design_scene() -> tuple[dict, list[list[float]]]: sawyer_arm_cfg.init_state.pos = (0.0, 0.0, 1.03) sawyer = Articulation(cfg=sawyer_arm_cfg) + # Origin 5 with mobile franka + prim_utils.create_prim("/World/Origin7", "Xform", translation=origins[6]) + # -- Robot + mobileFranka_cfg = RIDGEBACK_FRANKA_PANDA_CFG.replace(prim_path="/World/Origin7/Robot") + mobileFranka_cfg.init_state.pos = (0.0, 0.0, 0.0) + mobileFranka_cfg.init_state.rot = (0.7071068, 0.0, 0.7071068, 0.0) + mobileFranka = Articulation(cfg=mobileFranka_cfg) + # return the scene information scene_entities = { "franka_panda": franka_panda, @@ -157,6 +166,7 @@ def design_scene() -> tuple[dict, list[list[float]]]: "kinova_j2n6s300": kinova_j2n6s300, "kinova_gen3n7": kinova_gen3n7, "sawyer": sawyer, + "mobile_franka": mobileFranka, } return scene_entities, origins diff --git a/source/isaaclab_assets/isaaclab_assets/robots/.vscode/extensions.json b/source/isaaclab_assets/isaaclab_assets/robots/.vscode/extensions.json new file mode 100644 index 00000000000..86e786b31f9 --- /dev/null +++ b/source/isaaclab_assets/isaaclab_assets/robots/.vscode/extensions.json @@ -0,0 +1,8 @@ +{ + // See http://go.microsoft.com/fwlink/?LinkId=827846 + // for the documentation about the extensions.json format + "recommendations": [ + "ms-python.python", + "ms-python.vscode-pylance" + ], +} diff --git a/source/isaaclab_assets/isaaclab_assets/robots/.vscode/launch.json b/source/isaaclab_assets/isaaclab_assets/robots/.vscode/launch.json new file mode 100644 index 00000000000..8a22f73af48 --- /dev/null +++ b/source/isaaclab_assets/isaaclab_assets/robots/.vscode/launch.json @@ -0,0 +1,25 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python: Attach (windows-x86_64/linux-x86_64)", + "type": "python", + "request": "attach", + "pathMappings": [ + { + "localRoot": "${workspaceFolder}", + "remoteRoot": "${workspaceFolder}" + }], + "port": 3000, + "host": "127.0.0.1", + "subProcess": true, + "runtimeArgs": [ + "--preserve-symlinks", + "--preserve-symlinks-main" + ] + } + ] +} diff --git a/source/isaaclab_assets/isaaclab_assets/robots/.vscode/settings.json b/source/isaaclab_assets/isaaclab_assets/robots/.vscode/settings.json new file mode 100644 index 00000000000..63a0b3e70ae --- /dev/null +++ b/source/isaaclab_assets/isaaclab_assets/robots/.vscode/settings.json @@ -0,0 +1,10 @@ +{ + "editor.rulers": [120], + "python.languageServer": "Pylance", + "python.formatting.provider": "black", + "python.formatting.blackArgs": ["--line-length", "120"], + "python.linting.enabled": true, + "python.linting.pylintEnabled": false, + "python.linting.flake8Enabled": true, + "python.linting.flake8Args": ["--max-line-length=120"] +} diff --git a/source/isaaclab_assets/isaaclab_assets/robots/__init__.py b/source/isaaclab_assets/isaaclab_assets/robots/__init__.py index c23cf8e3ce2..f7041fd1add 100644 --- a/source/isaaclab_assets/isaaclab_assets/robots/__init__.py +++ b/source/isaaclab_assets/isaaclab_assets/robots/__init__.py @@ -17,6 +17,7 @@ from .humanoid import * from .humanoid_28 import * from .kinova import * +from .mobile_franka import * from .quadcopter import * from .ridgeback_franka import * from .sawyer import * diff --git a/source/isaaclab_assets/isaaclab_assets/robots/mobile_franka.py b/source/isaaclab_assets/isaaclab_assets/robots/mobile_franka.py new file mode 100644 index 00000000000..34f7c74e7ed --- /dev/null +++ b/source/isaaclab_assets/isaaclab_assets/robots/mobile_franka.py @@ -0,0 +1,82 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers. +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import isaaclab.sim as sim_utils +from isaaclab.actuators import ImplicitActuatorCfg +from isaaclab.assets import ArticulationCfg +from isaaclab.utils.assets import ISAACLAB_NUCLEUS_DIR + +## +# Configuration +## + +MOBILE_FRANKA_CFG = ArticulationCfg( + spawn=sim_utils.UsdFileCfg( + usd_path=f"/home/xuezhi/Downloads/ridgeback_franka6_instanceable.usd", + rigid_props=sim_utils.RigidBodyPropertiesCfg( + rigid_body_enabled=True, + max_linear_velocity=1000.0, + max_angular_velocity=1000.0, + max_depenetration_velocity=100.0, + enable_gyroscopic_forces=True, + ), + articulation_props=sim_utils.ArticulationRootPropertiesCfg( + enabled_self_collisions=False, + solver_position_iteration_count=12, + solver_velocity_iteration_count=1, + sleep_threshold=0.005, + stabilization_threshold=0.001, + ), + ), + init_state=ArticulationCfg.InitialStateCfg( + joint_pos={ + # base + "dummy_base_prismatic_x_joint": 0.0, + "dummy_base_prismatic_y_joint": 0.0, + "dummy_base_revolute_z_joint": 0.0, + # franka_panda + "panda_joint1": 0.0, + "panda_joint2": -1.0, + "panda_joint3": 0.0, + "panda_joint4": -2.2, + "panda_joint5": 0.0, + "panda_joint6": 2.4, + "panda_joint7": 0.8, + "panda_finger_joint1": 0.035, + "panda_finger_joint2": 0.035, + }, + joint_vel={".*": 0.0}, + ), + actuators={ + "arm_actuators": ImplicitActuatorCfg( + joint_names_expr=["panda_joint[1-7]"], + effort_limit=87.0, + velocity_limit=2.175, + stiffness=400.0, + damping=80.0, + ), + "gripper_actuators": ImplicitActuatorCfg( + joint_names_expr=["panda_finger_joint1", "panda_finger_joint2"], + effort_limit=200.0, + velocity_limit=0.2, + stiffness=100000.0, + damping=1000.0, + ), + "base_actuators": ImplicitActuatorCfg( + joint_names_expr=["dummy_base_prismatic_x_joint", "dummy_base_prismatic_y_joint"], + effort_limit=1000.0, + velocity_limit=100.0, # Assuming position control + stiffness=999999986991104.0, + damping=100000.0, + ), + "base_rot_actuators": ImplicitActuatorCfg( + joint_names_expr=["dummy_base_revolute_z_joint"], + effort_limit=1000.0, + velocity_limit=100.0, # Assuming position control + stiffness=17453292716032.0, + damping=1745.32922, + ), + }, +) diff --git a/source/isaaclab_assets/isaaclab_assets/robots/ridgeback_franka.py b/source/isaaclab_assets/isaaclab_assets/robots/ridgeback_franka.py index 0e1a4a8415b..9bd7e8a8fec 100644 --- a/source/isaaclab_assets/isaaclab_assets/robots/ridgeback_franka.py +++ b/source/isaaclab_assets/isaaclab_assets/robots/ridgeback_franka.py @@ -24,14 +24,15 @@ RIDGEBACK_FRANKA_PANDA_CFG = ArticulationCfg( spawn=sim_utils.UsdFileCfg( usd_path=f"{ISAAC_NUCLEUS_DIR}/Robots/Clearpath/RidgebackFranka/ridgeback_franka.usd", + # usd_path=f"/home/xuezhi/Downloads/ridgeback_franka6_instanceable.usd", articulation_props=sim_utils.ArticulationRootPropertiesCfg(enabled_self_collisions=False), activate_contact_sensors=False, ), init_state=ArticulationCfg.InitialStateCfg( joint_pos={ # base - "dummy_base_prismatic_y_joint": 0.0, "dummy_base_prismatic_x_joint": 0.0, + "dummy_base_prismatic_y_joint": 0.0, "dummy_base_revolute_z_joint": 0.0, # franka arm "panda_joint1": 0.0, diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/agents/skrl_mappo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/agents/skrl_mappo_cfg.yaml index dcd794f57a5..479c40fed7c 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/agents/skrl_mappo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/agents/skrl_mappo_cfg.yaml @@ -1,5 +1,4 @@ -seed: 42 - +seed: 500 # Models are instantiated using skrl's model instantiator utility # https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html @@ -78,5 +77,5 @@ agent: # https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html trainer: class: SequentialTrainer - timesteps: 4800 + timesteps: 6000 environment_info: log diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/cart_double_pendulum_env.py b/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/cart_double_pendulum_env.py index 5d956e5c073..b94e968b8d0 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/cart_double_pendulum_env.py +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/cart_double_pendulum_env.py @@ -53,14 +53,14 @@ class CartDoublePendulumEnvCfg(DirectMARLEnvCfg): pendulum_action_scale = 50.0 # [Nm] # reward scales - rew_scale_alive = 1.0 - rew_scale_terminated = -2.0 - rew_scale_cart_pos = 0 - rew_scale_cart_vel = -0.01 - rew_scale_pole_pos = -1.0 - rew_scale_pole_vel = -0.01 - rew_scale_pendulum_pos = -1.0 - rew_scale_pendulum_vel = -0.01 + eps_alive = 1.0 + eps_terminated = -2.0 + eps_cart_pos = 0 + eps_cart_vel = -0.01 + eps_pole_pos = -1.0 + eps_pole_vel = -0.01 + eps_pendulum_pos = -1.0 + eps_pendulum_vel = -0.01 class CartDoublePendulumEnv(DirectMARLEnv): @@ -124,23 +124,29 @@ def _get_observations(self) -> dict[str, torch.Tensor]: return observations def _get_rewards(self) -> dict[str, torch.Tensor]: - total_reward = compute_rewards( - self.cfg.rew_scale_alive, - self.cfg.rew_scale_terminated, - self.cfg.rew_scale_cart_pos, - self.cfg.rew_scale_cart_vel, - self.cfg.rew_scale_pole_pos, - self.cfg.rew_scale_pole_vel, - self.cfg.rew_scale_pendulum_pos, - self.cfg.rew_scale_pendulum_vel, - self.joint_pos[:, self._cart_dof_idx[0]], - self.joint_vel[:, self._cart_dof_idx[0]], - normalize_angle(self.joint_pos[:, self._pole_dof_idx[0]]), - self.joint_vel[:, self._pole_dof_idx[0]], - normalize_angle(self.joint_pos[:, self._pendulum_dof_idx[0]]), - self.joint_vel[:, self._pendulum_dof_idx[0]], - math.prod(self.terminated_dict.values()), + P_cart_0, P_pendulum_0, Delta_P_cart, Delta_P_pendulum, total_reward = compute_rewards( + 1.0, # alpha + 1.0, # beta + self.cfg.eps_alive, # eps_alive + self.cfg.eps_terminated, # eps_terminated + self.cfg.eps_cart_vel, # eps_cart_vel + self.cfg.eps_pole_pos, # eps_pole_pos + self.cfg.eps_pole_vel, # eps_pole_vel + self.cfg.eps_pendulum_pos, # eps_pendulum_pos + self.cfg.eps_pendulum_vel, # eps_pendulum_vel + self.joint_vel[:, self._cart_dof_idx[0]], # cart_vel + normalize_angle(self.joint_pos[:, self._pole_dof_idx[0]]), # pole_pos + self.joint_vel[:, self._pole_dof_idx[0]], # pole_vel + normalize_angle(self.joint_pos[:, self._pendulum_dof_idx[0]]), # pendulum_pos + self.joint_vel[:, self._pendulum_dof_idx[0]], # pendulum_vel + math.prod(self.terminated_dict.values()), # reset_terminated ) + if "log" not in self.extras: + self.extras["log"] = dict() + self.extras["log"]["P_cart_0"] = P_cart_0.mean() + self.extras["log"]["P_pendulum_0"] = P_pendulum_0.mean() + self.extras["log"]["Delta_P_cart"] = Delta_P_cart.mean() + self.extras["log"]["Delta_P_pendulum"] = Delta_P_pendulum.mean() return total_reward def _get_dones(self) -> tuple[dict[str, torch.Tensor], dict[str, torch.Tensor]]: @@ -193,15 +199,15 @@ def normalize_angle(angle): @torch.jit.script def compute_rewards( - rew_scale_alive: float, - rew_scale_terminated: float, - rew_scale_cart_pos: float, - rew_scale_cart_vel: float, - rew_scale_pole_pos: float, - rew_scale_pole_vel: float, - rew_scale_pendulum_pos: float, - rew_scale_pendulum_vel: float, - cart_pos: torch.Tensor, + alpha: float, + beta: float, + eps_alive: float, + eps_terminated: float, + eps_cart_vel: float, + eps_pole_pos: float, + eps_pole_vel: float, + eps_pendulum_pos: float, + eps_pendulum_vel: float, cart_vel: torch.Tensor, pole_pos: torch.Tensor, pole_vel: torch.Tensor, @@ -209,18 +215,28 @@ def compute_rewards( pendulum_vel: torch.Tensor, reset_terminated: torch.Tensor, ): - rew_alive = rew_scale_alive * (1.0 - reset_terminated.float()) - rew_termination = rew_scale_terminated * reset_terminated.float() - rew_pole_pos = rew_scale_pole_pos * torch.sum(torch.square(pole_pos).unsqueeze(dim=1), dim=-1) - rew_pendulum_pos = rew_scale_pendulum_pos * torch.sum( - torch.square(pole_pos + pendulum_pos).unsqueeze(dim=1), dim=-1 + # Base reward components + P_cart_0 = ( + eps_alive * (1.0 - reset_terminated.float()) + + eps_terminated * reset_terminated.float() + + eps_cart_vel * torch.sum(torch.abs(cart_vel).unsqueeze(dim=1), dim=-1) ) - rew_cart_vel = rew_scale_cart_vel * torch.sum(torch.abs(cart_vel).unsqueeze(dim=1), dim=-1) - rew_pole_vel = rew_scale_pole_vel * torch.sum(torch.abs(pole_vel).unsqueeze(dim=1), dim=-1) - rew_pendulum_vel = rew_scale_pendulum_vel * torch.sum(torch.abs(pendulum_vel).unsqueeze(dim=1), dim=-1) - - total_reward = { - "cart": rew_alive + rew_termination + rew_pole_pos + rew_cart_vel + rew_pole_vel, - "pendulum": rew_alive + rew_termination + rew_pendulum_pos + rew_pendulum_vel, - } - return total_reward + + P_pendulum_0 = eps_alive * (1.0 - reset_terminated.float()) + eps_terminated * reset_terminated.float() + + # Cooperative (mutualistic) terms + Delta_P_cart = eps_pole_pos * torch.sum(torch.square(pole_pos).unsqueeze(dim=1), dim=-1) + eps_pole_vel * torch.sum( + torch.abs(pole_vel).unsqueeze(dim=1), dim=-1 + ) + + Delta_P_pendulum = eps_pendulum_pos * torch.sum( + torch.square(pole_pos + pendulum_pos).unsqueeze(dim=1), dim=-1 + ) + eps_pendulum_vel * torch.sum(torch.abs(pendulum_vel).unsqueeze(dim=1), dim=-1) + + # Final rewards incorporating mutualistic principles + R_cart = alpha * P_cart_0 + beta * Delta_P_cart + R_pendulum = alpha * P_pendulum_0 + beta * Delta_P_pendulum + + total_reward = {"cart": R_cart, "pendulum": R_pendulum} + + return P_cart_0, P_pendulum_0, Delta_P_cart, Delta_P_pendulum, total_reward diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/mobile_franka/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/direct/mobile_franka/__init__.py new file mode 100644 index 00000000000..019c967d5ef --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/mobile_franka/__init__.py @@ -0,0 +1,26 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers. +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +""" +ShadowHand Over environment. +""" + +import gymnasium as gym + +from . import agents + +## +# Register Gym environments. +## + +gym.register( + id="MobileFrankaMARL", + entry_point=f"{__name__}.mobile_franka_marl:MobileFrankaEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.mobile_franka_marl_cfg:MobileFrankaMARLCfg", + "skrl_mappo_cfg_entry_point": f"{agents.__name__}:skrl_mappo_cfg.yaml", + }, +) diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/mobile_franka/agents/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/direct/mobile_franka/agents/__init__.py new file mode 100644 index 00000000000..e75ca2bc3f9 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/mobile_franka/agents/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers. +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/mobile_franka/agents/skrl_mappo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/mobile_franka/agents/skrl_mappo_cfg.yaml new file mode 100644 index 00000000000..bca15cf352d --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/mobile_franka/agents/skrl_mappo_cfg.yaml @@ -0,0 +1,82 @@ +seed: 42 + +# Models are instantiated using skrl's model instantiator utility +# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html + +models: + separate: True + policy: # see gaussian_model parameters + class: GaussianMixin + clip_actions: False + clip_log_std: True + min_log_std: -20.0 + max_log_std: 2.0 + initial_log_std: 0.0 + network: + - name: net + input: STATES + layers: [512, 512, 256, 128] + activations: elu + output: ACTIONS + value: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: STATES + layers: [512, 512, 256, 128] + activations: elu + output: ONE + + +# Rollout memory +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +memory: + class: RandomMemory + memory_size: -1 # automatically determined (same as agent:rollouts) + + +# MAPPO agent configuration (field names are from MAPPO_DEFAULT_CONFIG) +# https://skrl.readthedocs.io/en/latest/api/multi_agents/mappo.html +agent: + class: MAPPO + rollouts: 16 + learning_epochs: 5 + mini_batches: 4 + discount_factor: 0.99 + lambda: 0.95 + learning_rate: 5.0e-04 + learning_rate_scheduler: KLAdaptiveLR + learning_rate_scheduler_kwargs: + kl_threshold: 0.016 + state_preprocessor: RunningStandardScaler + state_preprocessor_kwargs: null + shared_state_preprocessor: RunningStandardScaler + shared_state_preprocessor_kwargs: null + value_preprocessor: RunningStandardScaler + value_preprocessor_kwargs: null + random_timesteps: 0 + learning_starts: 0 + grad_norm_clip: 1.0 + ratio_clip: 0.2 + value_clip: 0.2 + clip_predicted_values: True + entropy_loss_scale: 0.0 + value_loss_scale: 2.0 + kl_threshold: 0.0 + rewards_shaper_scale: 1.0 + time_limit_bootstrap: False + # logging and checkpoint + experiment: + directory: "mobile_franka_mappo" + experiment_name: "" + write_interval: auto + checkpoint_interval: auto + + +# Sequential trainer +# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html +trainer: + class: SequentialTrainer + timesteps: 36000 + environment_info: log diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/mobile_franka/mobile_franka_marl.py b/source/isaaclab_tasks/isaaclab_tasks/direct/mobile_franka/mobile_franka_marl.py new file mode 100644 index 00000000000..8033ffcf846 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/mobile_franka/mobile_franka_marl.py @@ -0,0 +1,353 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers. +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +# Isaac Lab 2.0.1 +from __future__ import annotations + +import numpy as np +import torch +from collections.abc import Sequence + +import isaaclab.sim as sim_utils +from isaaclab.assets import Articulation, RigidObject +from isaaclab.envs import DirectMARLEnv +from isaaclab.markers import VisualizationMarkers +from isaaclab.sim.spawners.from_files import GroundPlaneCfg, spawn_ground_plane +from isaaclab.utils.math import sample_uniform, saturate + +from .mobile_franka_marl_cfg import MobileFrankaMARLCfg + + +class MobileFrankaEnv(DirectMARLEnv): + cfg: MobileFrankaMARLCfg + + def __init__(self, cfg: MobileFrankaMARLCfg, render_mode: str | None = None, **kwargs): + super().__init__(cfg, render_mode, **kwargs) + + self.action_scale = 7.5 + # self.start_position_noise = 0.0 + # self.start_rotation_noise = 0.0 + # self.num_props = 4 + self.dof_vel_scale = 0.1 + self.dist_reward_scale = 2.0 + self.rot_reward_scale = 0.5 + self.around_handle_reward_scale = 10.0 + self.open_reward_scale = 7.5 + self.finger_dist_reward_scale = 100.0 + self.action_penalty_scale = 0.01 + self.finger_close_reward_scale = 10.0 + + # self.distX_offset = 0.04 + # self.control_frequency = 120.0/2 + # self.dt=1/self.control_frequency + self.num_franka_dofs = self.mobilefranka.num_joints + self._num_actions = 10 + + # buffers for franka targets + self.franka_dof_targets = torch.zeros( + (self.num_envs, self.num_franka_dofs), dtype=torch.float, device=self.device + ) + self.franka_prev_targets = torch.zeros( + (self.num_envs, self.num_franka_dofs), dtype=torch.float, device=self.device + ) + self.franka_curr_targets = torch.zeros( + (self.num_envs, self.num_franka_dofs), dtype=torch.float, device=self.device + ) + + # list of actuated joints + self.actuated_dof_indices = list() + for joint_name in cfg.actuated_joint_names: + self.actuated_dof_indices.append(self.mobilefranka.joint_names.index(joint_name)) + + # list of mobile base joints + self.actuated_mov_indices = list() + for joint_name in cfg.mobile_base_names: + self.actuated_mov_indices.append(self.mobilefranka.joint_names.index(joint_name)) + + # finger bodies + self.finger_bodies = list() + for body_name in self.cfg.finger_body_names: + self.finger_bodies.append(self.mobilefranka.body_names.index(body_name)) + self.finger_bodies.sort() + self.num_finger = len(self.finger_bodies) + + # xy base joints + self.xy_base_indices = list() + for joint_name in cfg.xy_base_names: + self.xy_base_indices.append(self.mobilefranka.joint_names.index(joint_name)) + + # set the ranges for the target randomization + self.x_lim = [-3, 3] + self.y_lim = [-3, 3] + self.z_lim = [0.2, 1.2] + + # joint limits + joint_pos_limits = self.mobilefranka.root_physx_view.get_dof_limits().to(self.device) + self.lower_limits = joint_pos_limits[..., 0] + self.upper_limits = joint_pos_limits[..., 1] + # print("lower_limits", self.lower_limits[1,self.actuated_mov_indices], "upper_limits", self.upper_limits[1,:]) + + self.target_positions = torch.zeros((self.num_envs, 3), dtype=torch.float, device=self.device) + self.target_positions[:, :] = torch.tensor([2.0, 0.0, 0.5], device=self.device) + self.goal_rot = torch.zeros((self.num_envs, 4), dtype=torch.float, device=self.device) + self.goal_rot[:, 0] = 1.0 + + # initialize goal marker + self.goal_markers = VisualizationMarkers(self.cfg.goal_object_cfg) + # Set the default joint positions for the mobile franka + self.mobilefranka.data.default_joint_pos[:, :] = torch.tensor( + [0.0, 0.0, 0.0, 0.0, -0.7856, 0.0, -2.356, 0.0, 1.572, 0.7854, 0.035, 0.035], device=self.device + ) # base_x, base_y, base_z, joint1-7, finger1-2 + self.mobilefranka.data.default_joint_vel[:, :] = torch.tensor( + [0.0] * self.num_franka_dofs, device=self.device + ) # base_x, base_y, base_z, joint1-7, finger1-2 (12) + self.default_joint_pos = self.mobilefranka.data.default_joint_pos + self.default_joint_vel = self.mobilefranka.data.default_joint_vel + + def _setup_scene(self): + # add MobileFranka and goal object + self.mobilefranka = Articulation(self.cfg.mobile_franka_cfg) + + # add ground plane + spawn_ground_plane(prim_path="/World/ground", cfg=GroundPlaneCfg()) + # clone and replicate (no need to filter for this environment) + self.scene.clone_environments(copy_from_source=False) + # add articulation to scene - we must register to scene to randomize with EventManager + self.scene.articulations["mobilefranka"] = self.mobilefranka + # self.scene.rigid_objects["target_cube"] = self.target_cube + # add lights + light_cfg = sim_utils.DomeLightCfg(intensity=2000.0, color=(0.75, 0.75, 0.75)) + light_cfg.func("/World/Light", light_cfg) + + def _pre_physics_step(self, actions: dict[str, torch.Tensor]) -> None: + self.actions = actions + + def _apply_action(self) -> None: + # print(f"Action franka shape: {self.actions['franka'].shape}, base shape: {self.actions['base'].shape}") + # joints + self.franka_curr_targets[:, self.actuated_dof_indices] = scale( + self.actions["franka"], + self.lower_limits[:, self.actuated_dof_indices], + self.upper_limits[:, self.actuated_dof_indices], + ) + self.franka_curr_targets[:, self.actuated_dof_indices] = ( + self.cfg.act_moving_average * self.franka_curr_targets[:, self.actuated_dof_indices] + + (1.0 - self.cfg.act_moving_average) * self.franka_prev_targets[:, self.actuated_dof_indices] + ) + self.franka_curr_targets[:, self.actuated_dof_indices] = saturate( + self.franka_curr_targets[:, self.actuated_dof_indices], + self.lower_limits[:, self.actuated_dof_indices], + self.upper_limits[:, self.actuated_dof_indices], + ) + + # Last 2 values for mobile base (x, y position) + self.franka_curr_targets[:, self.actuated_mov_indices] = scale( + self.actions["base"], + self.lower_limits[:, self.actuated_mov_indices], + self.upper_limits[:, self.actuated_mov_indices], + ) + self.franka_curr_targets[:, self.actuated_mov_indices] = ( + self.cfg.act_moving_average * self.franka_curr_targets[:, self.actuated_mov_indices] + + (1.0 - self.cfg.act_moving_average) * self.franka_prev_targets[:, self.actuated_mov_indices] + ) + self.franka_curr_targets[:, self.actuated_mov_indices] = saturate( + self.franka_curr_targets[:, self.actuated_mov_indices], + self.lower_limits[:, self.actuated_mov_indices], + self.upper_limits[:, self.actuated_mov_indices], + ) + + # save current targets + self.franka_curr_targets[:, self.actuated_dof_indices] = self.franka_curr_targets[:, self.actuated_dof_indices] + self.franka_curr_targets[:, self.actuated_mov_indices] = self.franka_curr_targets[:, self.actuated_mov_indices] + + # set targets + self.mobilefranka.set_joint_position_target( + self.franka_curr_targets[:, self.actuated_dof_indices], joint_ids=self.actuated_dof_indices + ) + self.mobilefranka.set_joint_position_target( + self.franka_curr_targets[:, self.actuated_mov_indices], joint_ids=self.actuated_mov_indices + ) + + def _get_observations(self) -> dict[str, torch.Tensor]: + # print("joint position", self.mobilefranka.data.joint_pos) + observations = { + "franka": torch.cat( + ( + # -------arm-------- + # DOF positions (12) + unscale(self.joint_pos, self.lower_limits, self.upper_limits), + # DOF velocities (12) + self.dof_vel_scale * self.joint_vel, + # finger positions (3*2) + self.finger_body_pos.view(self.num_envs, self.num_finger * 3), + # actions (7) + self.actions["franka"], + # actions (3) + self.actions["base"], + # positions (3) + self.target_positions, + ), + dim=-1, + ), + "base": torch.cat( + ( + # -------base-------- + # DOF positions (3) + unscale(self.joint_pos, self.lower_limits, self.upper_limits), + # DOF velocities (3) + self.dof_vel_scale * self.joint_vel, + # finger positions (3*2) + self.finger_body_pos.view(self.num_envs, self.num_finger * 3), + # actions (7) + self.actions["franka"], + # actions (3) + self.actions["base"], + # positions (3) + self.target_positions, + ), + dim=-1, + ), + } + return observations + + def _get_states(self) -> torch.Tensor: + states = torch.cat( + ( + # DOF positions (12) + unscale(self.joint_pos, self.lower_limits, self.upper_limits), + # DOF velocities (12) + self.dof_vel_scale * self.joint_vel, + # finger positions (3*2) + self.finger_body_pos.view(self.num_envs, self.num_finger * 3), + # actions (7) + self.actions["franka"], + # actions (3) + self.actions["base"], + # positions (3) + self.target_positions, + ), + dim=-1, + ) + return states + + def _get_rewards(self) -> dict[str, torch.Tensor]: + # Calculate distance from each finger to the target separately + finger_to_target_dists = torch.zeros((self.num_envs, self.num_finger), device=self.device) + + # For each finger, calculate its distance to target + for i in range(self.num_finger): + finger_to_target_dists[:, i] = torch.norm(self.finger_body_pos[:, i] - self.target_positions, p=2, dim=-1) + + # Mean distance across all fingers to target + goal_dist = torch.mean(finger_to_target_dists, dim=1) + + rew_dist = 5 * torch.exp(-self.cfg.dist_reward_scale * goal_dist) + + # log reward components + if "log" not in self.extras: + self.extras["log"] = dict() + self.extras["log"]["dist_reward"] = rew_dist.mean() + self.extras["log"]["dist_goal"] = goal_dist.mean() + + return {"franka": rew_dist, "base": rew_dist} + + def _get_dones(self) -> tuple[dict[str, torch.Tensor], dict[str, torch.Tensor]]: + self.joint_pos = self.mobilefranka.data.joint_pos + out_of_bounds = torch.any(torch.abs(self.joint_pos[:, self.xy_base_indices]) > self.cfg.max_base_pos, dim=1) + time_out = self.episode_length_buf >= self.max_episode_length - 1 + + terminated = {agent: out_of_bounds for agent in self.cfg.possible_agents} + time_outs = {agent: time_out for agent in self.cfg.possible_agents} + return terminated, time_outs + + def _reset_idx(self, env_ids: Sequence[int] | torch.Tensor | None): + if env_ids is None: + env_ids = self.mobilefranka._ALL_INDICES + # reset articulation and rigid body attributes + super()._reset_idx(env_ids) + + # reset goals + self._reset_target_pose(env_ids) + + # reset franka + # delta_max = self.upper_limits[env_ids] - self.default_joint_pos[env_ids] + # delta_min = self.lower_limits[env_ids] - self.default_joint_pos[env_ids] + + # dof_pos_noise = sample_uniform(-1.0, 1.0, (len(env_ids), self.num_franka_dofs), device=self.device) + # rand_delta = delta_min + (delta_max - delta_min) * 0.5 * dof_pos_noise + # dof_pos = self.default_joint_pos[env_ids] + self.cfg.reset_dof_pos_noise * rand_delta + + # dof_vel_noise = sample_uniform(-1.0, 1.0, (len(env_ids), self.num_franka_dofs), device=self.device) + # dof_vel = self.default_joint_vel[env_ids] + self.cfg.reset_dof_vel_noise * dof_vel_noise + + # # print("dof_pos", dof_pos[env_ids,0:3]) + # dof_pos[env_ids,0:3]=torch.tensor([0.0, 0.0, 0.0], device=self.device) + # dof_vel[env_ids,0:3]=torch.tensor([0.0, 0.0, 0.0], device=self.device) + + # Reset franka - get default joint positions + dof_pos = self.default_joint_pos[env_ids] + self.franka_prev_targets[env_ids] = dof_pos + self.franka_curr_targets[env_ids] = dof_pos + self.franka_dof_targets[env_ids] = dof_pos + + # Get default root state and modify it + default_root_state = self.mobilefranka.data.default_root_state[env_ids].clone() + + # Add environment origins for proper placement in each env + default_root_state[:, :2] += self.scene.env_origins[env_ids, :2] # Only x,y + + # Important: Set Z position to proper ground contact height + # This depends on your robot's geometry - adjust as needed + # robot_base_height = 0.05 # Height of robot base from ground + # default_root_state[:, 2] = robot_base_height # Set appropriate Z height + + # Write the corrected pose and velocity + self.mobilefranka.write_root_pose_to_sim(default_root_state[:, :7], env_ids) + self.mobilefranka.write_root_velocity_to_sim(default_root_state[:, 7:], env_ids) + + # Write joint states + self.mobilefranka.write_joint_state_to_sim( + self.default_joint_vel[env_ids], self.default_joint_pos[env_ids], env_ids=env_ids + ) + self.mobilefranka.reset(env_ids) + + # No need for long sleep - robots should be stable on ground + # Compute intermediate values for observation + self._compute_intermediate_values() + + def _reset_target_pose(self, env_ids): + # Reset goal position + rand_pos = sample_uniform(0.0, 1.0, (len(env_ids), 3), device=self.device) + pos = torch.zeros((len(env_ids), 3), device=self.device) + pos[:, 0] = rand_pos[:, 0] * (self.x_lim[1] - self.x_lim[0]) + self.x_lim[0] + pos[:, 1] = rand_pos[:, 1] * (self.y_lim[1] - self.y_lim[0]) + self.y_lim[0] + pos[:, 2] = rand_pos[:, 2] * (self.z_lim[1] - self.z_lim[0]) + self.z_lim[0] + self.target_positions[env_ids] = pos + + # Reset goal rotation + rot = torch.zeros((len(env_ids), 4), dtype=torch.float, device=self.device) + rot[:, 0] = 1.0 + self.goal_rot[env_ids] = rot + goal_pos = self.target_positions + self.scene.env_origins + self.goal_markers.visualize(goal_pos, self.goal_rot) + + def _compute_intermediate_values(self): + self.finger_body_pos = self.mobilefranka.data.body_pos_w[:, self.finger_bodies] + self.finger_body_pos -= self.scene.env_origins.repeat((1, self.num_finger)).reshape( + self.num_envs, self.num_finger, 3 + ) + + self.joint_pos = self.mobilefranka.data.joint_pos + self.joint_vel = self.mobilefranka.data.joint_vel + + +@torch.jit.script +def scale(x, lower, upper): + return 0.5 * (x + 1.0) * (upper - lower) + lower + + +@torch.jit.script +def unscale(x, lower, upper): + return (2.0 * x - upper - lower) / (upper - lower) diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/mobile_franka/mobile_franka_marl_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/direct/mobile_franka/mobile_franka_marl_cfg.py new file mode 100644 index 00000000000..5e37460f456 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/mobile_franka/mobile_franka_marl_cfg.py @@ -0,0 +1,148 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers. +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab_assets.robots.mobile_franka import MOBILE_FRANKA_CFG +from isaaclab_assets.robots.ridgeback_franka import RIDGEBACK_FRANKA_PANDA_CFG + +import isaaclab.sim as sim_utils +from isaaclab.assets import ArticulationCfg, RigidObjectCfg +from isaaclab.envs import DirectMARLEnvCfg +from isaaclab.markers import VisualizationMarkersCfg +from isaaclab.scene import InteractiveSceneCfg +from isaaclab.sim import PhysxCfg, SimulationCfg +from isaaclab.sim.spawners.materials.physics_materials_cfg import RigidBodyMaterialCfg + +# from isaaclab.managers import EventTermCfg as EventTerm, SceneEntityCfg +from isaaclab.utils import configclass + + +@configclass +class MobileFrankaMARLCfg(DirectMARLEnvCfg): + # Environment settings + decimation = 2 + episode_length_s = 500 / (120 / 2) # Adjusted for control frequency + possible_agents = ["franka", "base"] + action_spaces = {"franka": 7, "base": 3} + observation_spaces = {"franka": 40, "base": 40} + state_space = -1 + + # Simulation settings + sim: SimulationCfg = SimulationCfg( + dt=1 / 120, + render_interval=decimation, + gravity=(0.0, 0.0, -9.81), + physics_material=RigidBodyMaterialCfg( + static_friction=1.0, + dynamic_friction=1.0, + ), + physx=PhysxCfg( + # solver_position_iteration_count=12, + # solver_velocity_iteration_count=6, + # contact_offset=0.005, + # rest_offset=0.0, + bounce_threshold_velocity=0.2, + # enable_sleeping=True, + # max_depenetration_velocity=1000.0, + ), + ) + + # Robot configuration + mobile_franka_cfg: ArticulationCfg = MOBILE_FRANKA_CFG.replace(prim_path="/World/envs/env_.*/MobileFranka").replace( + init_state=ArticulationCfg.InitialStateCfg( + pos=(0.0, 0.0, 0.0), + # rot=(0.7071068, 0.0, 0.7071068, 0.0), + # rot=(1.0, 0.0, 0.0, 0.0), + joint_pos={".*": 0.0}, + ), + # solver_position_iteration_count=12, + # solver_velocity_iteration_count=1, + # enable_self_collisions=False, + # enable_gyroscopic_forces=True, + ) + + actuated_joint_names = [ + "panda_joint1", + "panda_joint2", + "panda_joint3", + "panda_joint4", + "panda_joint5", + "panda_joint6", + "panda_joint7", + ] + + mobile_base_names = [ + "dummy_base_prismatic_x_joint", + "dummy_base_prismatic_y_joint", + "dummy_base_revolute_z_joint", + ] + + xy_base_names = [ + "dummy_base_prismatic_x_joint", + "dummy_base_prismatic_y_joint", + ] + + finger_joint_names = [ + "panda_finger_joint1", + "panda_finger_joint2", + ] + + finger_body_names = [ + "panda_leftfinger", + # "panda_finger2", + ] + + # object configuration + # target_cube_cfg: RigidObjectCfg = RigidObjectCfg( + # prim_path="/World/envs/env_.*/object", + # spawn=sim_utils.SphereCfg( + # radius=0.1, + # visual_material=sim_utils.PreviewSurfaceCfg(diffuse_color=(1.0, 0.0, 0.0)), + # physics_material=sim_utils.RigidBodyMaterialCfg(static_friction=0.7), + # rigid_props=sim_utils.RigidBodyPropertiesCfg( + # kinematic_enabled=False, + # disable_gravity=False, + # enable_gyroscopic_forces=True, + # solver_position_iteration_count=8, + # solver_velocity_iteration_count=0, + # sleep_threshold=0.005, + # stabilization_threshold=0.0025, + # max_depenetration_velocity=1000.0, + # ), + # collision_props=sim_utils.CollisionPropertiesCfg(), + # mass_props=sim_utils.MassPropertiesCfg(density=500.0), + # ), + # init_state=RigidObjectCfg.InitialStateCfg(pos=(2.0, 0.0, 0.5), rot=(1.0, 0.0, 0.0, 0.0)), + # ) + # goal object + goal_object_cfg: VisualizationMarkersCfg = VisualizationMarkersCfg( + prim_path="/Visuals/goal_marker", + markers={ + "goal": sim_utils.SphereCfg( + radius=0.1, + visual_material=sim_utils.PreviewSurfaceCfg(diffuse_color=(1.0, 0.0, 0.0)), + ), + }, + ) + + # Scene settings + scene: InteractiveSceneCfg = InteractiveSceneCfg(num_envs=512, env_spacing=3.0, replicate_physics=True) + + action_scale = 7.5 + dof_velocity_scale = 0.1 + max_base_pos = 3.0 + + # Reward scales + dist_reward_scale = 20 + rot_reward_scale = 0.5 + around_handle_reward_scale = 10.0 + open_reward_scale = 7.5 + finger_dist_reward_scale = 100.0 + action_penalty_scale = 0.01 + finger_close_reward_scale = 10.0 + act_moving_average = 1.0 + # Reset noise + reset_position_noise = 0.0 + reset_dof_pos_noise = 0.0 + reset_dof_vel_noise = 0.0 diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/agents/skrl_mappo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/agents/skrl_mappo_cfg.yaml index f67cc31b249..634c62869c7 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/agents/skrl_mappo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/agents/skrl_mappo_cfg.yaml @@ -1,4 +1,4 @@ -seed: 42 +seed: 500 # Models are instantiated using skrl's model instantiator utility diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/shadow_hand_over_env.py b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/shadow_hand_over_env.py index 6e01214f015..e313f93a0f9 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/shadow_hand_over_env.py +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/shadow_hand_over_env.py @@ -408,6 +408,17 @@ def _compute_intermediate_values(self): @torch.jit.script def scale(x, lower, upper): + """ + Scales the input tensor x from the range [-1, 1] to the range [lower, upper]. + + Args: + x (torch.Tensor): Input tensor to be scaled. + lower (torch.Tensor): Lower bound of the target range. + upper (torch.Tensor): Upper bound of the target range. + + Returns: + torch.Tensor: Scaled tensor. + """ return 0.5 * (x + 1.0) * (upper - lower) + lower @@ -418,6 +429,18 @@ def unscale(x, lower, upper): @torch.jit.script def randomize_rotation(rand0, rand1, x_unit_tensor, y_unit_tensor): + """ + Randomizes the rotation based on random values and unit tensors. + + Args: + rand0 (torch.Tensor): Random values for the first rotation axis. + rand1 (torch.Tensor): Random values for the second rotation axis. + x_unit_tensor (torch.Tensor): Unit tensor for the x-axis. + y_unit_tensor (torch.Tensor): Unit tensor for the y-axis. + + Returns: + torch.Tensor: The resulting quaternion after applying the random rotations. + """ return quat_mul( quat_from_angle_axis(rand0 * np.pi, x_unit_tensor), quat_from_angle_axis(rand1 * np.pi, y_unit_tensor) ) diff --git a/source/isaaclab_tasks/isaaclab_tasks/utils/parse_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/utils/parse_cfg.py index 93fa7547b1f..262efa2214d 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/utils/parse_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/utils/parse_cfg.py @@ -51,6 +51,7 @@ def load_cfg_from_registry(task_name: str, entry_point_key: str) -> dict | objec Raises: ValueError: If the entry point key is not available in the gym registry for the task. """ + # obtain the configuration entry point cfg_entry_point = gym.spec(task_name).kwargs.get(entry_point_key) # check if entry point exists