diff --git a/.vscode/tasks.json b/.vscode/tasks.json
index f7896a038b4..967f8f9f3e4 100644
--- a/.vscode/tasks.json
+++ b/.vscode/tasks.json
@@ -4,7 +4,6 @@
     "version": "2.0.0",
     "tasks": [
         {
-            // setup python env
             "label": "setup_python_env",
             "type": "shell",
             "linux": {
@@ -15,7 +14,6 @@
             }
         },
         {
-            // run formatter
             "label": "run_formatter",
             "type": "shell",
             "linux": {
@@ -23,7 +21,8 @@
             },
             "windows": {
                 "command": "${workspaceFolder}/isaaclab.bat --format"
-            }
+            },
+            "problemMatcher": []
         }
     ]
 }
diff --git a/=1.4.2 b/=1.4.2
new file mode 100644
index 00000000000..9a4925bdea1
--- /dev/null
+++ b/=1.4.2
@@ -0,0 +1,27 @@
+Requirement already satisfied: skrl in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (1.4.1)
+Collecting skrl
+  Downloading skrl-1.4.2-py3-none-any.whl.metadata (4.8 kB)
+Requirement already satisfied: gymnasium in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from skrl) (1.0.0)
+Requirement already satisfied: packaging in /home/xuezhi/.local/lib/python3.10/site-packages (from skrl) (24.2)
+Requirement already satisfied: tensorboard in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from skrl) (2.18.0)
+Requirement already satisfied: tqdm in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from skrl) (4.67.1)
+Requirement already satisfied: numpy>=1.21.0 in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from gymnasium->skrl) (1.26.4)
+Requirement already satisfied: cloudpickle>=1.2.0 in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from gymnasium->skrl) (3.1.1)
+Requirement already satisfied: typing-extensions>=4.3.0 in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from gymnasium->skrl) (4.12.2)
+Requirement already satisfied: farama-notifications>=0.0.1 in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from gymnasium->skrl) (0.0.4)
+Requirement already satisfied: absl-py>=0.4 in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from tensorboard->skrl) (2.1.0)
+Requirement already satisfied: grpcio>=1.48.2 in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from tensorboard->skrl) (1.70.0)
+Requirement already satisfied: markdown>=2.6.8 in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from tensorboard->skrl) (3.7)
+Requirement already satisfied: protobuf!=4.24.0,>=3.19.6 in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from tensorboard->skrl) (3.20.3)
+Requirement already satisfied: setuptools>=41.0.0 in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from tensorboard->skrl) (75.8.0)
+Requirement already satisfied: six>1.9 in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from tensorboard->skrl) (1.17.0)
+Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from tensorboard->skrl) (0.7.2)
+Requirement already satisfied: werkzeug>=1.0.1 in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from tensorboard->skrl) (3.1.3)
+Requirement already satisfied: MarkupSafe>=2.1.1 in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from werkzeug>=1.0.1->tensorboard->skrl) (3.0.2)
+Downloading skrl-1.4.2-py3-none-any.whl (403 kB)
+Installing collected packages: skrl
+  Attempting uninstall: skrl
+    Found existing installation: skrl 1.4.1
+    Uninstalling skrl-1.4.1:
+      Successfully uninstalled skrl-1.4.1
+Successfully installed skrl-1.4.2
diff --git a/materials/textures/joint1.png b/materials/textures/joint1.png
new file mode 100644
index 00000000000..4d46d2ae1bd
Binary files /dev/null and b/materials/textures/joint1.png differ
diff --git a/materials/textures/joint2.png b/materials/textures/joint2.png
new file mode 100644
index 00000000000..ba2700b3522
Binary files /dev/null and b/materials/textures/joint2.png differ
diff --git a/materials/textures/joint3.png b/materials/textures/joint3.png
new file mode 100644
index 00000000000..ba2700b3522
Binary files /dev/null and b/materials/textures/joint3.png differ
diff --git a/materials/textures/joint4.png b/materials/textures/joint4.png
new file mode 100644
index 00000000000..ba2700b3522
Binary files /dev/null and b/materials/textures/joint4.png differ
diff --git a/materials/textures/joint6.png b/materials/textures/joint6.png
new file mode 100644
index 00000000000..406e3f77687
Binary files /dev/null and b/materials/textures/joint6.png differ
diff --git a/materials/textures/joint7.png b/materials/textures/joint7.png
new file mode 100644
index 00000000000..ed421fafe0d
Binary files /dev/null and b/materials/textures/joint7.png differ
diff --git a/scripts/demos/arms.py b/scripts/demos/arms.py
index 7d80ae54efb..0a0e270ab2c 100644
--- a/scripts/demos/arms.py
+++ b/scripts/demos/arms.py
@@ -52,6 +52,7 @@
     KINOVA_JACO2_N6S300_CFG,
     KINOVA_GEN3_N7_CFG,
     SAWYER_CFG,
+    RIDGEBACK_FRANKA_PANDA_CFG,
 )
 
 # isort: on
@@ -83,7 +84,7 @@ def design_scene() -> tuple[dict, list[list[float]]]:
 
     # Create separate groups called "Origin1", "Origin2", "Origin3"
     # Each group will have a mount and a robot on top of it
-    origins = define_origins(num_origins=6, spacing=2.0)
+    origins = define_origins(num_origins=7, spacing=2.0)
 
     # Origin 1 with Franka Panda
     prim_utils.create_prim("/World/Origin1", "Xform", translation=origins[0])
@@ -149,6 +150,14 @@ def design_scene() -> tuple[dict, list[list[float]]]:
     sawyer_arm_cfg.init_state.pos = (0.0, 0.0, 1.03)
     sawyer = Articulation(cfg=sawyer_arm_cfg)
 
+    # Origin 5 with mobile franka
+    prim_utils.create_prim("/World/Origin7", "Xform", translation=origins[6])
+    # -- Robot
+    mobileFranka_cfg = RIDGEBACK_FRANKA_PANDA_CFG.replace(prim_path="/World/Origin7/Robot")
+    mobileFranka_cfg.init_state.pos = (0.0, 0.0, 0.0)
+    mobileFranka_cfg.init_state.rot = (0.7071068, 0.0, 0.7071068, 0.0)
+    mobileFranka = Articulation(cfg=mobileFranka_cfg)
+
     # return the scene information
     scene_entities = {
         "franka_panda": franka_panda,
@@ -157,6 +166,7 @@ def design_scene() -> tuple[dict, list[list[float]]]:
         "kinova_j2n6s300": kinova_j2n6s300,
         "kinova_gen3n7": kinova_gen3n7,
         "sawyer": sawyer,
+        "mobile_franka": mobileFranka,
     }
     return scene_entities, origins
 
diff --git a/source/isaaclab_assets/isaaclab_assets/robots/.vscode/extensions.json b/source/isaaclab_assets/isaaclab_assets/robots/.vscode/extensions.json
new file mode 100644
index 00000000000..86e786b31f9
--- /dev/null
+++ b/source/isaaclab_assets/isaaclab_assets/robots/.vscode/extensions.json
@@ -0,0 +1,8 @@
+{
+	// See http://go.microsoft.com/fwlink/?LinkId=827846
+	// for the documentation about the extensions.json format
+	"recommendations": [
+		"ms-python.python",
+		"ms-python.vscode-pylance"
+	],
+}
diff --git a/source/isaaclab_assets/isaaclab_assets/robots/.vscode/launch.json b/source/isaaclab_assets/isaaclab_assets/robots/.vscode/launch.json
new file mode 100644
index 00000000000..8a22f73af48
--- /dev/null
+++ b/source/isaaclab_assets/isaaclab_assets/robots/.vscode/launch.json
@@ -0,0 +1,25 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Python: Attach (windows-x86_64/linux-x86_64)",
+            "type": "python",
+            "request": "attach",
+            "pathMappings": [
+            {
+                "localRoot": "${workspaceFolder}",
+                "remoteRoot": "${workspaceFolder}"
+            }],
+            "port": 3000,
+            "host": "127.0.0.1",
+            "subProcess": true,
+            "runtimeArgs": [
+                "--preserve-symlinks",
+                "--preserve-symlinks-main"
+            ]
+        }
+    ]
+}
diff --git a/source/isaaclab_assets/isaaclab_assets/robots/.vscode/settings.json b/source/isaaclab_assets/isaaclab_assets/robots/.vscode/settings.json
new file mode 100644
index 00000000000..63a0b3e70ae
--- /dev/null
+++ b/source/isaaclab_assets/isaaclab_assets/robots/.vscode/settings.json
@@ -0,0 +1,10 @@
+{
+    "editor.rulers": [120],
+    "python.languageServer": "Pylance",
+    "python.formatting.provider": "black",
+    "python.formatting.blackArgs": ["--line-length", "120"],
+    "python.linting.enabled": true,
+    "python.linting.pylintEnabled": false,
+    "python.linting.flake8Enabled": true,
+    "python.linting.flake8Args": ["--max-line-length=120"]
+}
diff --git a/source/isaaclab_assets/isaaclab_assets/robots/__init__.py b/source/isaaclab_assets/isaaclab_assets/robots/__init__.py
index c23cf8e3ce2..f7041fd1add 100644
--- a/source/isaaclab_assets/isaaclab_assets/robots/__init__.py
+++ b/source/isaaclab_assets/isaaclab_assets/robots/__init__.py
@@ -17,6 +17,7 @@
 from .humanoid import *
 from .humanoid_28 import *
 from .kinova import *
+from .mobile_franka import *
 from .quadcopter import *
 from .ridgeback_franka import *
 from .sawyer import *
diff --git a/source/isaaclab_assets/isaaclab_assets/robots/mobile_franka.py b/source/isaaclab_assets/isaaclab_assets/robots/mobile_franka.py
new file mode 100644
index 00000000000..34f7c74e7ed
--- /dev/null
+++ b/source/isaaclab_assets/isaaclab_assets/robots/mobile_franka.py
@@ -0,0 +1,82 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import isaaclab.sim as sim_utils
+from isaaclab.actuators import ImplicitActuatorCfg
+from isaaclab.assets import ArticulationCfg
+from isaaclab.utils.assets import ISAACLAB_NUCLEUS_DIR
+
+##
+# Configuration
+##
+
+MOBILE_FRANKA_CFG = ArticulationCfg(
+    spawn=sim_utils.UsdFileCfg(
+        usd_path=f"/home/xuezhi/Downloads/ridgeback_franka6_instanceable.usd",
+        rigid_props=sim_utils.RigidBodyPropertiesCfg(
+            rigid_body_enabled=True,
+            max_linear_velocity=1000.0,
+            max_angular_velocity=1000.0,
+            max_depenetration_velocity=100.0,
+            enable_gyroscopic_forces=True,
+        ),
+        articulation_props=sim_utils.ArticulationRootPropertiesCfg(
+            enabled_self_collisions=False,
+            solver_position_iteration_count=12,
+            solver_velocity_iteration_count=1,
+            sleep_threshold=0.005,
+            stabilization_threshold=0.001,
+        ),
+    ),
+    init_state=ArticulationCfg.InitialStateCfg(
+        joint_pos={
+            # base
+            "dummy_base_prismatic_x_joint": 0.0,
+            "dummy_base_prismatic_y_joint": 0.0,
+            "dummy_base_revolute_z_joint": 0.0,
+            # franka_panda
+            "panda_joint1": 0.0,
+            "panda_joint2": -1.0,
+            "panda_joint3": 0.0,
+            "panda_joint4": -2.2,
+            "panda_joint5": 0.0,
+            "panda_joint6": 2.4,
+            "panda_joint7": 0.8,
+            "panda_finger_joint1": 0.035,
+            "panda_finger_joint2": 0.035,
+        },
+        joint_vel={".*": 0.0},
+    ),
+    actuators={
+        "arm_actuators": ImplicitActuatorCfg(
+            joint_names_expr=["panda_joint[1-7]"],
+            effort_limit=87.0,
+            velocity_limit=2.175,
+            stiffness=400.0,
+            damping=80.0,
+        ),
+        "gripper_actuators": ImplicitActuatorCfg(
+            joint_names_expr=["panda_finger_joint1", "panda_finger_joint2"],
+            effort_limit=200.0,
+            velocity_limit=0.2,
+            stiffness=100000.0,
+            damping=1000.0,
+        ),
+        "base_actuators": ImplicitActuatorCfg(
+            joint_names_expr=["dummy_base_prismatic_x_joint", "dummy_base_prismatic_y_joint"],
+            effort_limit=1000.0,
+            velocity_limit=100.0,  # Assuming position control
+            stiffness=999999986991104.0,
+            damping=100000.0,
+        ),
+        "base_rot_actuators": ImplicitActuatorCfg(
+            joint_names_expr=["dummy_base_revolute_z_joint"],
+            effort_limit=1000.0,
+            velocity_limit=100.0,  # Assuming position control
+            stiffness=17453292716032.0,
+            damping=1745.32922,
+        ),
+    },
+)
diff --git a/source/isaaclab_assets/isaaclab_assets/robots/ridgeback_franka.py b/source/isaaclab_assets/isaaclab_assets/robots/ridgeback_franka.py
index 0e1a4a8415b..9bd7e8a8fec 100644
--- a/source/isaaclab_assets/isaaclab_assets/robots/ridgeback_franka.py
+++ b/source/isaaclab_assets/isaaclab_assets/robots/ridgeback_franka.py
@@ -24,14 +24,15 @@
 RIDGEBACK_FRANKA_PANDA_CFG = ArticulationCfg(
     spawn=sim_utils.UsdFileCfg(
         usd_path=f"{ISAAC_NUCLEUS_DIR}/Robots/Clearpath/RidgebackFranka/ridgeback_franka.usd",
+        # usd_path=f"/home/xuezhi/Downloads/ridgeback_franka6_instanceable.usd",
         articulation_props=sim_utils.ArticulationRootPropertiesCfg(enabled_self_collisions=False),
         activate_contact_sensors=False,
     ),
     init_state=ArticulationCfg.InitialStateCfg(
         joint_pos={
             # base
-            "dummy_base_prismatic_y_joint": 0.0,
             "dummy_base_prismatic_x_joint": 0.0,
+            "dummy_base_prismatic_y_joint": 0.0,
             "dummy_base_revolute_z_joint": 0.0,
             # franka arm
             "panda_joint1": 0.0,
diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/agents/skrl_mappo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/agents/skrl_mappo_cfg.yaml
index dcd794f57a5..479c40fed7c 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/agents/skrl_mappo_cfg.yaml
+++ b/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/agents/skrl_mappo_cfg.yaml
@@ -1,5 +1,4 @@
-seed: 42
-
+seed: 500
 
 # Models are instantiated using skrl's model instantiator utility
 # https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
@@ -78,5 +77,5 @@ agent:
 # https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
 trainer:
   class: SequentialTrainer
-  timesteps: 4800
+  timesteps: 6000
   environment_info: log
diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/cart_double_pendulum_env.py b/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/cart_double_pendulum_env.py
index 5d956e5c073..b94e968b8d0 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/cart_double_pendulum_env.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/cart_double_pendulum_env.py
@@ -53,14 +53,14 @@ class CartDoublePendulumEnvCfg(DirectMARLEnvCfg):
     pendulum_action_scale = 50.0  # [Nm]
 
     # reward scales
-    rew_scale_alive = 1.0
-    rew_scale_terminated = -2.0
-    rew_scale_cart_pos = 0
-    rew_scale_cart_vel = -0.01
-    rew_scale_pole_pos = -1.0
-    rew_scale_pole_vel = -0.01
-    rew_scale_pendulum_pos = -1.0
-    rew_scale_pendulum_vel = -0.01
+    eps_alive = 1.0
+    eps_terminated = -2.0
+    eps_cart_pos = 0
+    eps_cart_vel = -0.01
+    eps_pole_pos = -1.0
+    eps_pole_vel = -0.01
+    eps_pendulum_pos = -1.0
+    eps_pendulum_vel = -0.01
 
 
 class CartDoublePendulumEnv(DirectMARLEnv):
@@ -124,23 +124,29 @@ def _get_observations(self) -> dict[str, torch.Tensor]:
         return observations
 
     def _get_rewards(self) -> dict[str, torch.Tensor]:
-        total_reward = compute_rewards(
-            self.cfg.rew_scale_alive,
-            self.cfg.rew_scale_terminated,
-            self.cfg.rew_scale_cart_pos,
-            self.cfg.rew_scale_cart_vel,
-            self.cfg.rew_scale_pole_pos,
-            self.cfg.rew_scale_pole_vel,
-            self.cfg.rew_scale_pendulum_pos,
-            self.cfg.rew_scale_pendulum_vel,
-            self.joint_pos[:, self._cart_dof_idx[0]],
-            self.joint_vel[:, self._cart_dof_idx[0]],
-            normalize_angle(self.joint_pos[:, self._pole_dof_idx[0]]),
-            self.joint_vel[:, self._pole_dof_idx[0]],
-            normalize_angle(self.joint_pos[:, self._pendulum_dof_idx[0]]),
-            self.joint_vel[:, self._pendulum_dof_idx[0]],
-            math.prod(self.terminated_dict.values()),
+        P_cart_0, P_pendulum_0, Delta_P_cart, Delta_P_pendulum, total_reward = compute_rewards(
+            1.0,  # alpha
+            1.0,  # beta
+            self.cfg.eps_alive,  # eps_alive
+            self.cfg.eps_terminated,  # eps_terminated
+            self.cfg.eps_cart_vel,  # eps_cart_vel
+            self.cfg.eps_pole_pos,  # eps_pole_pos
+            self.cfg.eps_pole_vel,  # eps_pole_vel
+            self.cfg.eps_pendulum_pos,  # eps_pendulum_pos
+            self.cfg.eps_pendulum_vel,  # eps_pendulum_vel
+            self.joint_vel[:, self._cart_dof_idx[0]],  # cart_vel
+            normalize_angle(self.joint_pos[:, self._pole_dof_idx[0]]),  # pole_pos
+            self.joint_vel[:, self._pole_dof_idx[0]],  # pole_vel
+            normalize_angle(self.joint_pos[:, self._pendulum_dof_idx[0]]),  # pendulum_pos
+            self.joint_vel[:, self._pendulum_dof_idx[0]],  # pendulum_vel
+            math.prod(self.terminated_dict.values()),  # reset_terminated
         )
+        if "log" not in self.extras:
+            self.extras["log"] = dict()
+        self.extras["log"]["P_cart_0"] = P_cart_0.mean()
+        self.extras["log"]["P_pendulum_0"] = P_pendulum_0.mean()
+        self.extras["log"]["Delta_P_cart"] = Delta_P_cart.mean()
+        self.extras["log"]["Delta_P_pendulum"] = Delta_P_pendulum.mean()
         return total_reward
 
     def _get_dones(self) -> tuple[dict[str, torch.Tensor], dict[str, torch.Tensor]]:
@@ -193,15 +199,15 @@ def normalize_angle(angle):
 
 @torch.jit.script
 def compute_rewards(
-    rew_scale_alive: float,
-    rew_scale_terminated: float,
-    rew_scale_cart_pos: float,
-    rew_scale_cart_vel: float,
-    rew_scale_pole_pos: float,
-    rew_scale_pole_vel: float,
-    rew_scale_pendulum_pos: float,
-    rew_scale_pendulum_vel: float,
-    cart_pos: torch.Tensor,
+    alpha: float,
+    beta: float,
+    eps_alive: float,
+    eps_terminated: float,
+    eps_cart_vel: float,
+    eps_pole_pos: float,
+    eps_pole_vel: float,
+    eps_pendulum_pos: float,
+    eps_pendulum_vel: float,
     cart_vel: torch.Tensor,
     pole_pos: torch.Tensor,
     pole_vel: torch.Tensor,
@@ -209,18 +215,28 @@ def compute_rewards(
     pendulum_vel: torch.Tensor,
     reset_terminated: torch.Tensor,
 ):
-    rew_alive = rew_scale_alive * (1.0 - reset_terminated.float())
-    rew_termination = rew_scale_terminated * reset_terminated.float()
-    rew_pole_pos = rew_scale_pole_pos * torch.sum(torch.square(pole_pos).unsqueeze(dim=1), dim=-1)
-    rew_pendulum_pos = rew_scale_pendulum_pos * torch.sum(
-        torch.square(pole_pos + pendulum_pos).unsqueeze(dim=1), dim=-1
+    # Base reward components
+    P_cart_0 = (
+        eps_alive * (1.0 - reset_terminated.float())
+        + eps_terminated * reset_terminated.float()
+        + eps_cart_vel * torch.sum(torch.abs(cart_vel).unsqueeze(dim=1), dim=-1)
     )
-    rew_cart_vel = rew_scale_cart_vel * torch.sum(torch.abs(cart_vel).unsqueeze(dim=1), dim=-1)
-    rew_pole_vel = rew_scale_pole_vel * torch.sum(torch.abs(pole_vel).unsqueeze(dim=1), dim=-1)
-    rew_pendulum_vel = rew_scale_pendulum_vel * torch.sum(torch.abs(pendulum_vel).unsqueeze(dim=1), dim=-1)
-
-    total_reward = {
-        "cart": rew_alive + rew_termination + rew_pole_pos + rew_cart_vel + rew_pole_vel,
-        "pendulum": rew_alive + rew_termination + rew_pendulum_pos + rew_pendulum_vel,
-    }
-    return total_reward
+
+    P_pendulum_0 = eps_alive * (1.0 - reset_terminated.float()) + eps_terminated * reset_terminated.float()
+
+    # Cooperative (mutualistic) terms
+    Delta_P_cart = eps_pole_pos * torch.sum(torch.square(pole_pos).unsqueeze(dim=1), dim=-1) + eps_pole_vel * torch.sum(
+        torch.abs(pole_vel).unsqueeze(dim=1), dim=-1
+    )
+
+    Delta_P_pendulum = eps_pendulum_pos * torch.sum(
+        torch.square(pole_pos + pendulum_pos).unsqueeze(dim=1), dim=-1
+    ) + eps_pendulum_vel * torch.sum(torch.abs(pendulum_vel).unsqueeze(dim=1), dim=-1)
+
+    # Final rewards incorporating mutualistic principles
+    R_cart = alpha * P_cart_0 + beta * Delta_P_cart
+    R_pendulum = alpha * P_pendulum_0 + beta * Delta_P_pendulum
+
+    total_reward = {"cart": R_cart, "pendulum": R_pendulum}
+
+    return P_cart_0, P_pendulum_0, Delta_P_cart, Delta_P_pendulum, total_reward
diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/mobile_franka/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/direct/mobile_franka/__init__.py
new file mode 100644
index 00000000000..019c967d5ef
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/direct/mobile_franka/__init__.py
@@ -0,0 +1,26 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""
+ShadowHand Over environment.
+"""
+
+import gymnasium as gym
+
+from . import agents
+
+##
+# Register Gym environments.
+##
+
+gym.register(
+    id="MobileFrankaMARL",
+    entry_point=f"{__name__}.mobile_franka_marl:MobileFrankaEnv",
+    disable_env_checker=True,
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}.mobile_franka_marl_cfg:MobileFrankaMARLCfg",
+        "skrl_mappo_cfg_entry_point": f"{agents.__name__}:skrl_mappo_cfg.yaml",
+    },
+)
diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/mobile_franka/agents/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/direct/mobile_franka/agents/__init__.py
new file mode 100644
index 00000000000..e75ca2bc3f9
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/direct/mobile_franka/agents/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/mobile_franka/agents/skrl_mappo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/mobile_franka/agents/skrl_mappo_cfg.yaml
new file mode 100644
index 00000000000..bca15cf352d
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/direct/mobile_franka/agents/skrl_mappo_cfg.yaml
@@ -0,0 +1,82 @@
+seed: 42
+
+# Models are instantiated using skrl's model instantiator utility
+# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
+
+models:
+  separate: True
+  policy:  # see gaussian_model parameters
+    class: GaussianMixin
+    clip_actions: False
+    clip_log_std: True
+    min_log_std: -20.0
+    max_log_std: 2.0
+    initial_log_std: 0.0
+    network:
+      - name: net
+        input: STATES
+        layers: [512, 512, 256, 128]
+        activations: elu
+    output: ACTIONS
+  value:  # see deterministic_model parameters
+    class: DeterministicMixin
+    clip_actions: False
+    network:
+      - name: net
+        input: STATES
+        layers: [512, 512, 256, 128]
+        activations: elu
+    output: ONE
+
+
+# Rollout memory
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+memory:
+  class: RandomMemory
+  memory_size: -1  # automatically determined (same as agent:rollouts)
+
+
+# MAPPO agent configuration (field names are from MAPPO_DEFAULT_CONFIG)
+# https://skrl.readthedocs.io/en/latest/api/multi_agents/mappo.html
+agent:
+  class: MAPPO
+  rollouts: 16
+  learning_epochs: 5
+  mini_batches: 4
+  discount_factor: 0.99
+  lambda: 0.95
+  learning_rate: 5.0e-04
+  learning_rate_scheduler: KLAdaptiveLR
+  learning_rate_scheduler_kwargs:
+    kl_threshold: 0.016
+  state_preprocessor: RunningStandardScaler
+  state_preprocessor_kwargs: null
+  shared_state_preprocessor: RunningStandardScaler
+  shared_state_preprocessor_kwargs: null
+  value_preprocessor: RunningStandardScaler
+  value_preprocessor_kwargs: null
+  random_timesteps: 0
+  learning_starts: 0
+  grad_norm_clip: 1.0
+  ratio_clip: 0.2
+  value_clip: 0.2
+  clip_predicted_values: True
+  entropy_loss_scale: 0.0
+  value_loss_scale: 2.0
+  kl_threshold: 0.0
+  rewards_shaper_scale: 1.0
+  time_limit_bootstrap: False
+  # logging and checkpoint
+  experiment:
+    directory: "mobile_franka_mappo"
+    experiment_name: ""
+    write_interval: auto
+    checkpoint_interval: auto
+
+
+# Sequential trainer
+# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
+trainer:
+  class: SequentialTrainer
+  timesteps: 36000
+  environment_info: log
diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/mobile_franka/mobile_franka_marl.py b/source/isaaclab_tasks/isaaclab_tasks/direct/mobile_franka/mobile_franka_marl.py
new file mode 100644
index 00000000000..8033ffcf846
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/direct/mobile_franka/mobile_franka_marl.py
@@ -0,0 +1,353 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+# Isaac Lab 2.0.1
+from __future__ import annotations
+
+import numpy as np
+import torch
+from collections.abc import Sequence
+
+import isaaclab.sim as sim_utils
+from isaaclab.assets import Articulation, RigidObject
+from isaaclab.envs import DirectMARLEnv
+from isaaclab.markers import VisualizationMarkers
+from isaaclab.sim.spawners.from_files import GroundPlaneCfg, spawn_ground_plane
+from isaaclab.utils.math import sample_uniform, saturate
+
+from .mobile_franka_marl_cfg import MobileFrankaMARLCfg
+
+
+class MobileFrankaEnv(DirectMARLEnv):
+    cfg: MobileFrankaMARLCfg
+
+    def __init__(self, cfg: MobileFrankaMARLCfg, render_mode: str | None = None, **kwargs):
+        super().__init__(cfg, render_mode, **kwargs)
+
+        self.action_scale = 7.5
+        # self.start_position_noise = 0.0
+        # self.start_rotation_noise = 0.0
+        # self.num_props = 4
+        self.dof_vel_scale = 0.1
+        self.dist_reward_scale = 2.0
+        self.rot_reward_scale = 0.5
+        self.around_handle_reward_scale = 10.0
+        self.open_reward_scale = 7.5
+        self.finger_dist_reward_scale = 100.0
+        self.action_penalty_scale = 0.01
+        self.finger_close_reward_scale = 10.0
+
+        # self.distX_offset = 0.04
+        # self.control_frequency = 120.0/2
+        # self.dt=1/self.control_frequency
+        self.num_franka_dofs = self.mobilefranka.num_joints
+        self._num_actions = 10
+
+        # buffers for franka targets
+        self.franka_dof_targets = torch.zeros(
+            (self.num_envs, self.num_franka_dofs), dtype=torch.float, device=self.device
+        )
+        self.franka_prev_targets = torch.zeros(
+            (self.num_envs, self.num_franka_dofs), dtype=torch.float, device=self.device
+        )
+        self.franka_curr_targets = torch.zeros(
+            (self.num_envs, self.num_franka_dofs), dtype=torch.float, device=self.device
+        )
+
+        # list of actuated joints
+        self.actuated_dof_indices = list()
+        for joint_name in cfg.actuated_joint_names:
+            self.actuated_dof_indices.append(self.mobilefranka.joint_names.index(joint_name))
+
+        # list of mobile base joints
+        self.actuated_mov_indices = list()
+        for joint_name in cfg.mobile_base_names:
+            self.actuated_mov_indices.append(self.mobilefranka.joint_names.index(joint_name))
+
+        # finger bodies
+        self.finger_bodies = list()
+        for body_name in self.cfg.finger_body_names:
+            self.finger_bodies.append(self.mobilefranka.body_names.index(body_name))
+        self.finger_bodies.sort()
+        self.num_finger = len(self.finger_bodies)
+
+        # xy base joints
+        self.xy_base_indices = list()
+        for joint_name in cfg.xy_base_names:
+            self.xy_base_indices.append(self.mobilefranka.joint_names.index(joint_name))
+
+        # set the ranges for the target randomization
+        self.x_lim = [-3, 3]
+        self.y_lim = [-3, 3]
+        self.z_lim = [0.2, 1.2]
+
+        # joint limits
+        joint_pos_limits = self.mobilefranka.root_physx_view.get_dof_limits().to(self.device)
+        self.lower_limits = joint_pos_limits[..., 0]
+        self.upper_limits = joint_pos_limits[..., 1]
+        # print("lower_limits", self.lower_limits[1,self.actuated_mov_indices], "upper_limits", self.upper_limits[1,:])
+
+        self.target_positions = torch.zeros((self.num_envs, 3), dtype=torch.float, device=self.device)
+        self.target_positions[:, :] = torch.tensor([2.0, 0.0, 0.5], device=self.device)
+        self.goal_rot = torch.zeros((self.num_envs, 4), dtype=torch.float, device=self.device)
+        self.goal_rot[:, 0] = 1.0
+
+        # initialize goal marker
+        self.goal_markers = VisualizationMarkers(self.cfg.goal_object_cfg)
+        # Set the default joint positions for the mobile franka
+        self.mobilefranka.data.default_joint_pos[:, :] = torch.tensor(
+            [0.0, 0.0, 0.0, 0.0, -0.7856, 0.0, -2.356, 0.0, 1.572, 0.7854, 0.035, 0.035], device=self.device
+        )  # base_x, base_y, base_z, joint1-7, finger1-2
+        self.mobilefranka.data.default_joint_vel[:, :] = torch.tensor(
+            [0.0] * self.num_franka_dofs, device=self.device
+        )  # base_x, base_y, base_z, joint1-7, finger1-2 (12)
+        self.default_joint_pos = self.mobilefranka.data.default_joint_pos
+        self.default_joint_vel = self.mobilefranka.data.default_joint_vel
+
+    def _setup_scene(self):
+        # add MobileFranka and goal object
+        self.mobilefranka = Articulation(self.cfg.mobile_franka_cfg)
+
+        # add ground plane
+        spawn_ground_plane(prim_path="/World/ground", cfg=GroundPlaneCfg())
+        # clone and replicate (no need to filter for this environment)
+        self.scene.clone_environments(copy_from_source=False)
+        # add articulation to scene - we must register to scene to randomize with EventManager
+        self.scene.articulations["mobilefranka"] = self.mobilefranka
+        # self.scene.rigid_objects["target_cube"] = self.target_cube
+        # add lights
+        light_cfg = sim_utils.DomeLightCfg(intensity=2000.0, color=(0.75, 0.75, 0.75))
+        light_cfg.func("/World/Light", light_cfg)
+
+    def _pre_physics_step(self, actions: dict[str, torch.Tensor]) -> None:
+        self.actions = actions
+
+    def _apply_action(self) -> None:
+        # print(f"Action franka shape: {self.actions['franka'].shape}, base shape: {self.actions['base'].shape}")
+        # joints
+        self.franka_curr_targets[:, self.actuated_dof_indices] = scale(
+            self.actions["franka"],
+            self.lower_limits[:, self.actuated_dof_indices],
+            self.upper_limits[:, self.actuated_dof_indices],
+        )
+        self.franka_curr_targets[:, self.actuated_dof_indices] = (
+            self.cfg.act_moving_average * self.franka_curr_targets[:, self.actuated_dof_indices]
+            + (1.0 - self.cfg.act_moving_average) * self.franka_prev_targets[:, self.actuated_dof_indices]
+        )
+        self.franka_curr_targets[:, self.actuated_dof_indices] = saturate(
+            self.franka_curr_targets[:, self.actuated_dof_indices],
+            self.lower_limits[:, self.actuated_dof_indices],
+            self.upper_limits[:, self.actuated_dof_indices],
+        )
+
+        # Last 2 values for mobile base (x, y position)
+        self.franka_curr_targets[:, self.actuated_mov_indices] = scale(
+            self.actions["base"],
+            self.lower_limits[:, self.actuated_mov_indices],
+            self.upper_limits[:, self.actuated_mov_indices],
+        )
+        self.franka_curr_targets[:, self.actuated_mov_indices] = (
+            self.cfg.act_moving_average * self.franka_curr_targets[:, self.actuated_mov_indices]
+            + (1.0 - self.cfg.act_moving_average) * self.franka_prev_targets[:, self.actuated_mov_indices]
+        )
+        self.franka_curr_targets[:, self.actuated_mov_indices] = saturate(
+            self.franka_curr_targets[:, self.actuated_mov_indices],
+            self.lower_limits[:, self.actuated_mov_indices],
+            self.upper_limits[:, self.actuated_mov_indices],
+        )
+
+        # save current targets
+        self.franka_curr_targets[:, self.actuated_dof_indices] = self.franka_curr_targets[:, self.actuated_dof_indices]
+        self.franka_curr_targets[:, self.actuated_mov_indices] = self.franka_curr_targets[:, self.actuated_mov_indices]
+
+        # set targets
+        self.mobilefranka.set_joint_position_target(
+            self.franka_curr_targets[:, self.actuated_dof_indices], joint_ids=self.actuated_dof_indices
+        )
+        self.mobilefranka.set_joint_position_target(
+            self.franka_curr_targets[:, self.actuated_mov_indices], joint_ids=self.actuated_mov_indices
+        )
+
+    def _get_observations(self) -> dict[str, torch.Tensor]:
+        # print("joint position", self.mobilefranka.data.joint_pos)
+        observations = {
+            "franka": torch.cat(
+                (
+                    # -------arm--------
+                    # DOF positions (12)
+                    unscale(self.joint_pos, self.lower_limits, self.upper_limits),
+                    # DOF velocities (12)
+                    self.dof_vel_scale * self.joint_vel,
+                    # finger positions (3*2)
+                    self.finger_body_pos.view(self.num_envs, self.num_finger * 3),
+                    # actions (7)
+                    self.actions["franka"],
+                    # actions (3)
+                    self.actions["base"],
+                    # positions (3)
+                    self.target_positions,
+                ),
+                dim=-1,
+            ),
+            "base": torch.cat(
+                (
+                    # -------base--------
+                    # DOF positions (3)
+                    unscale(self.joint_pos, self.lower_limits, self.upper_limits),
+                    # DOF velocities (3)
+                    self.dof_vel_scale * self.joint_vel,
+                    # finger positions (3*2)
+                    self.finger_body_pos.view(self.num_envs, self.num_finger * 3),
+                    # actions (7)
+                    self.actions["franka"],
+                    # actions (3)
+                    self.actions["base"],
+                    # positions (3)
+                    self.target_positions,
+                ),
+                dim=-1,
+            ),
+        }
+        return observations
+
+    def _get_states(self) -> torch.Tensor:
+        states = torch.cat(
+            (
+                # DOF positions (12)
+                unscale(self.joint_pos, self.lower_limits, self.upper_limits),
+                # DOF velocities (12)
+                self.dof_vel_scale * self.joint_vel,
+                # finger positions (3*2)
+                self.finger_body_pos.view(self.num_envs, self.num_finger * 3),
+                # actions (7)
+                self.actions["franka"],
+                # actions (3)
+                self.actions["base"],
+                # positions (3)
+                self.target_positions,
+            ),
+            dim=-1,
+        )
+        return states
+
+    def _get_rewards(self) -> dict[str, torch.Tensor]:
+        # Calculate distance from each finger to the target separately
+        finger_to_target_dists = torch.zeros((self.num_envs, self.num_finger), device=self.device)
+
+        # For each finger, calculate its distance to target
+        for i in range(self.num_finger):
+            finger_to_target_dists[:, i] = torch.norm(self.finger_body_pos[:, i] - self.target_positions, p=2, dim=-1)
+
+        # Mean distance across all fingers to target
+        goal_dist = torch.mean(finger_to_target_dists, dim=1)
+
+        rew_dist = 5 * torch.exp(-self.cfg.dist_reward_scale * goal_dist)
+
+        # log reward components
+        if "log" not in self.extras:
+            self.extras["log"] = dict()
+        self.extras["log"]["dist_reward"] = rew_dist.mean()
+        self.extras["log"]["dist_goal"] = goal_dist.mean()
+
+        return {"franka": rew_dist, "base": rew_dist}
+
+    def _get_dones(self) -> tuple[dict[str, torch.Tensor], dict[str, torch.Tensor]]:
+        self.joint_pos = self.mobilefranka.data.joint_pos
+        out_of_bounds = torch.any(torch.abs(self.joint_pos[:, self.xy_base_indices]) > self.cfg.max_base_pos, dim=1)
+        time_out = self.episode_length_buf >= self.max_episode_length - 1
+
+        terminated = {agent: out_of_bounds for agent in self.cfg.possible_agents}
+        time_outs = {agent: time_out for agent in self.cfg.possible_agents}
+        return terminated, time_outs
+
+    def _reset_idx(self, env_ids: Sequence[int] | torch.Tensor | None):
+        if env_ids is None:
+            env_ids = self.mobilefranka._ALL_INDICES
+        # reset articulation and rigid body attributes
+        super()._reset_idx(env_ids)
+
+        # reset goals
+        self._reset_target_pose(env_ids)
+
+        # reset franka
+        # delta_max = self.upper_limits[env_ids] - self.default_joint_pos[env_ids]
+        # delta_min = self.lower_limits[env_ids] - self.default_joint_pos[env_ids]
+
+        # dof_pos_noise = sample_uniform(-1.0, 1.0, (len(env_ids), self.num_franka_dofs), device=self.device)
+        # rand_delta = delta_min + (delta_max - delta_min) * 0.5 * dof_pos_noise
+        # dof_pos = self.default_joint_pos[env_ids] + self.cfg.reset_dof_pos_noise * rand_delta
+
+        # dof_vel_noise = sample_uniform(-1.0, 1.0, (len(env_ids), self.num_franka_dofs), device=self.device)
+        # dof_vel = self.default_joint_vel[env_ids] + self.cfg.reset_dof_vel_noise * dof_vel_noise
+
+        # # print("dof_pos", dof_pos[env_ids,0:3])
+        # dof_pos[env_ids,0:3]=torch.tensor([0.0, 0.0, 0.0], device=self.device)
+        # dof_vel[env_ids,0:3]=torch.tensor([0.0, 0.0, 0.0], device=self.device)
+
+        # Reset franka - get default joint positions
+        dof_pos = self.default_joint_pos[env_ids]
+        self.franka_prev_targets[env_ids] = dof_pos
+        self.franka_curr_targets[env_ids] = dof_pos
+        self.franka_dof_targets[env_ids] = dof_pos
+
+        # Get default root state and modify it
+        default_root_state = self.mobilefranka.data.default_root_state[env_ids].clone()
+
+        # Add environment origins for proper placement in each env
+        default_root_state[:, :2] += self.scene.env_origins[env_ids, :2]  # Only x,y
+
+        # Important: Set Z position to proper ground contact height
+        # This depends on your robot's geometry - adjust as needed
+        # robot_base_height = 0.05  # Height of robot base from ground
+        # default_root_state[:, 2] = robot_base_height  # Set appropriate Z height
+
+        # Write the corrected pose and velocity
+        self.mobilefranka.write_root_pose_to_sim(default_root_state[:, :7], env_ids)
+        self.mobilefranka.write_root_velocity_to_sim(default_root_state[:, 7:], env_ids)
+
+        # Write joint states
+        self.mobilefranka.write_joint_state_to_sim(
+            self.default_joint_vel[env_ids], self.default_joint_pos[env_ids], env_ids=env_ids
+        )
+        self.mobilefranka.reset(env_ids)
+
+        # No need for long sleep - robots should be stable on ground
+        # Compute intermediate values for observation
+        self._compute_intermediate_values()
+
+    def _reset_target_pose(self, env_ids):
+        # Reset goal position
+        rand_pos = sample_uniform(0.0, 1.0, (len(env_ids), 3), device=self.device)
+        pos = torch.zeros((len(env_ids), 3), device=self.device)
+        pos[:, 0] = rand_pos[:, 0] * (self.x_lim[1] - self.x_lim[0]) + self.x_lim[0]
+        pos[:, 1] = rand_pos[:, 1] * (self.y_lim[1] - self.y_lim[0]) + self.y_lim[0]
+        pos[:, 2] = rand_pos[:, 2] * (self.z_lim[1] - self.z_lim[0]) + self.z_lim[0]
+        self.target_positions[env_ids] = pos
+
+        # Reset goal rotation
+        rot = torch.zeros((len(env_ids), 4), dtype=torch.float, device=self.device)
+        rot[:, 0] = 1.0
+        self.goal_rot[env_ids] = rot
+        goal_pos = self.target_positions + self.scene.env_origins
+        self.goal_markers.visualize(goal_pos, self.goal_rot)
+
+    def _compute_intermediate_values(self):
+        self.finger_body_pos = self.mobilefranka.data.body_pos_w[:, self.finger_bodies]
+        self.finger_body_pos -= self.scene.env_origins.repeat((1, self.num_finger)).reshape(
+            self.num_envs, self.num_finger, 3
+        )
+
+        self.joint_pos = self.mobilefranka.data.joint_pos
+        self.joint_vel = self.mobilefranka.data.joint_vel
+
+
+@torch.jit.script
+def scale(x, lower, upper):
+    return 0.5 * (x + 1.0) * (upper - lower) + lower
+
+
+@torch.jit.script
+def unscale(x, lower, upper):
+    return (2.0 * x - upper - lower) / (upper - lower)
diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/mobile_franka/mobile_franka_marl_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/direct/mobile_franka/mobile_franka_marl_cfg.py
new file mode 100644
index 00000000000..5e37460f456
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/direct/mobile_franka/mobile_franka_marl_cfg.py
@@ -0,0 +1,148 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from isaaclab_assets.robots.mobile_franka import MOBILE_FRANKA_CFG
+from isaaclab_assets.robots.ridgeback_franka import RIDGEBACK_FRANKA_PANDA_CFG
+
+import isaaclab.sim as sim_utils
+from isaaclab.assets import ArticulationCfg, RigidObjectCfg
+from isaaclab.envs import DirectMARLEnvCfg
+from isaaclab.markers import VisualizationMarkersCfg
+from isaaclab.scene import InteractiveSceneCfg
+from isaaclab.sim import PhysxCfg, SimulationCfg
+from isaaclab.sim.spawners.materials.physics_materials_cfg import RigidBodyMaterialCfg
+
+# from isaaclab.managers import EventTermCfg as EventTerm, SceneEntityCfg
+from isaaclab.utils import configclass
+
+
+@configclass
+class MobileFrankaMARLCfg(DirectMARLEnvCfg):
+    # Environment settings
+    decimation = 2
+    episode_length_s = 500 / (120 / 2)  # Adjusted for control frequency
+    possible_agents = ["franka", "base"]
+    action_spaces = {"franka": 7, "base": 3}
+    observation_spaces = {"franka": 40, "base": 40}
+    state_space = -1
+
+    # Simulation settings
+    sim: SimulationCfg = SimulationCfg(
+        dt=1 / 120,
+        render_interval=decimation,
+        gravity=(0.0, 0.0, -9.81),
+        physics_material=RigidBodyMaterialCfg(
+            static_friction=1.0,
+            dynamic_friction=1.0,
+        ),
+        physx=PhysxCfg(
+            # solver_position_iteration_count=12,
+            # solver_velocity_iteration_count=6,
+            # contact_offset=0.005,
+            # rest_offset=0.0,
+            bounce_threshold_velocity=0.2,
+            # enable_sleeping=True,
+            # max_depenetration_velocity=1000.0,
+        ),
+    )
+
+    # Robot configuration
+    mobile_franka_cfg: ArticulationCfg = MOBILE_FRANKA_CFG.replace(prim_path="/World/envs/env_.*/MobileFranka").replace(
+        init_state=ArticulationCfg.InitialStateCfg(
+            pos=(0.0, 0.0, 0.0),
+            # rot=(0.7071068, 0.0, 0.7071068, 0.0),
+            # rot=(1.0, 0.0, 0.0, 0.0),
+            joint_pos={".*": 0.0},
+        ),
+        # solver_position_iteration_count=12,
+        # solver_velocity_iteration_count=1,
+        # enable_self_collisions=False,
+        # enable_gyroscopic_forces=True,
+    )
+
+    actuated_joint_names = [
+        "panda_joint1",
+        "panda_joint2",
+        "panda_joint3",
+        "panda_joint4",
+        "panda_joint5",
+        "panda_joint6",
+        "panda_joint7",
+    ]
+
+    mobile_base_names = [
+        "dummy_base_prismatic_x_joint",
+        "dummy_base_prismatic_y_joint",
+        "dummy_base_revolute_z_joint",
+    ]
+
+    xy_base_names = [
+        "dummy_base_prismatic_x_joint",
+        "dummy_base_prismatic_y_joint",
+    ]
+
+    finger_joint_names = [
+        "panda_finger_joint1",
+        "panda_finger_joint2",
+    ]
+
+    finger_body_names = [
+        "panda_leftfinger",
+        # "panda_finger2",
+    ]
+
+    # object configuration
+    # target_cube_cfg: RigidObjectCfg = RigidObjectCfg(
+    #     prim_path="/World/envs/env_.*/object",
+    #     spawn=sim_utils.SphereCfg(
+    #         radius=0.1,
+    #         visual_material=sim_utils.PreviewSurfaceCfg(diffuse_color=(1.0, 0.0, 0.0)),
+    #         physics_material=sim_utils.RigidBodyMaterialCfg(static_friction=0.7),
+    #         rigid_props=sim_utils.RigidBodyPropertiesCfg(
+    #             kinematic_enabled=False,
+    #             disable_gravity=False,
+    #             enable_gyroscopic_forces=True,
+    #             solver_position_iteration_count=8,
+    #             solver_velocity_iteration_count=0,
+    #             sleep_threshold=0.005,
+    #             stabilization_threshold=0.0025,
+    #             max_depenetration_velocity=1000.0,
+    #         ),
+    #         collision_props=sim_utils.CollisionPropertiesCfg(),
+    #         mass_props=sim_utils.MassPropertiesCfg(density=500.0),
+    #     ),
+    #     init_state=RigidObjectCfg.InitialStateCfg(pos=(2.0, 0.0, 0.5), rot=(1.0, 0.0, 0.0, 0.0)),
+    # )
+    # goal object
+    goal_object_cfg: VisualizationMarkersCfg = VisualizationMarkersCfg(
+        prim_path="/Visuals/goal_marker",
+        markers={
+            "goal": sim_utils.SphereCfg(
+                radius=0.1,
+                visual_material=sim_utils.PreviewSurfaceCfg(diffuse_color=(1.0, 0.0, 0.0)),
+            ),
+        },
+    )
+
+    # Scene settings
+    scene: InteractiveSceneCfg = InteractiveSceneCfg(num_envs=512, env_spacing=3.0, replicate_physics=True)
+
+    action_scale = 7.5
+    dof_velocity_scale = 0.1
+    max_base_pos = 3.0
+
+    # Reward scales
+    dist_reward_scale = 20
+    rot_reward_scale = 0.5
+    around_handle_reward_scale = 10.0
+    open_reward_scale = 7.5
+    finger_dist_reward_scale = 100.0
+    action_penalty_scale = 0.01
+    finger_close_reward_scale = 10.0
+    act_moving_average = 1.0
+    # Reset noise
+    reset_position_noise = 0.0
+    reset_dof_pos_noise = 0.0
+    reset_dof_vel_noise = 0.0
diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/agents/skrl_mappo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/agents/skrl_mappo_cfg.yaml
index f67cc31b249..634c62869c7 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/agents/skrl_mappo_cfg.yaml
+++ b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/agents/skrl_mappo_cfg.yaml
@@ -1,4 +1,4 @@
-seed: 42
+seed: 500
 
 
 # Models are instantiated using skrl's model instantiator utility
diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/shadow_hand_over_env.py b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/shadow_hand_over_env.py
index 6e01214f015..e313f93a0f9 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/shadow_hand_over_env.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/shadow_hand_over_env.py
@@ -408,6 +408,17 @@ def _compute_intermediate_values(self):
 
 @torch.jit.script
 def scale(x, lower, upper):
+    """
+    Scales the input tensor x from the range [-1, 1] to the range [lower, upper].
+
+    Args:
+        x (torch.Tensor): Input tensor to be scaled.
+        lower (torch.Tensor): Lower bound of the target range.
+        upper (torch.Tensor): Upper bound of the target range.
+
+    Returns:
+        torch.Tensor: Scaled tensor.
+    """
     return 0.5 * (x + 1.0) * (upper - lower) + lower
 
 
@@ -418,6 +429,18 @@ def unscale(x, lower, upper):
 
 @torch.jit.script
 def randomize_rotation(rand0, rand1, x_unit_tensor, y_unit_tensor):
+    """
+    Randomizes the rotation based on random values and unit tensors.
+
+    Args:
+        rand0 (torch.Tensor): Random values for the first rotation axis.
+        rand1 (torch.Tensor): Random values for the second rotation axis.
+        x_unit_tensor (torch.Tensor): Unit tensor for the x-axis.
+        y_unit_tensor (torch.Tensor): Unit tensor for the y-axis.
+
+    Returns:
+        torch.Tensor: The resulting quaternion after applying the random rotations.
+    """
     return quat_mul(
         quat_from_angle_axis(rand0 * np.pi, x_unit_tensor), quat_from_angle_axis(rand1 * np.pi, y_unit_tensor)
     )
diff --git a/source/isaaclab_tasks/isaaclab_tasks/utils/parse_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/utils/parse_cfg.py
index 93fa7547b1f..262efa2214d 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/utils/parse_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/utils/parse_cfg.py
@@ -51,6 +51,7 @@ def load_cfg_from_registry(task_name: str, entry_point_key: str) -> dict | objec
     Raises:
         ValueError: If the entry point key is not available in the gym registry for the task.
     """
+
     # obtain the configuration entry point
     cfg_entry_point = gym.spec(task_name).kwargs.get(entry_point_key)
     # check if entry point exists