pytorch
diff --git a/‎benchmarks/test_objectives_benchmarks.py
Lines changed: 172 additions & 16 deletions b/‎benchmarks/test_objectives_benchmarks.py
Lines changed: 172 additions & 16 deletions
diff --git a/‎torchrl/envs/transforms/transforms.py
Lines changed: 6 additions & 2 deletions b/‎torchrl/envs/transforms/transforms.py
Lines changed: 6 additions & 2 deletions
@@ -9,6 +9,7 @@
 
 from tensordict import TensorDict
 from tensordict.nn import (
+    InteractionType,
     NormalParamExtractor,
     ProbabilisticTensorDictModule as ProbMod,
     ProbabilisticTensorDictSequential as ProbSeq,
@@ -137,7 +138,10 @@ def test_gae_speed(benchmark, gae_fn, gamma_tensor, batches, timesteps):
     )
 
 
-def test_dqn_speed(benchmark, n_obs=8, n_act=4, depth=3, ncells=128, batch=128):
+@pytest.mark.parametrize("compile", [False, True, "reduce-overhead"])
+def test_dqn_speed(
+    benchmark, compile, n_obs=8, n_act=4, depth=3, ncells=128, batch=128
+):
     net = MLP(in_features=n_obs, out_features=n_act, depth=depth, num_cells=ncells)
     action_space = "one-hot"
     mod = QValueActor(net, in_keys=["obs"], action_space=action_space)
@@ -155,10 +159,23 @@ def test_dqn_speed(benchmark, n_obs=8, n_act=4, depth=3, ncells=128, batch=128):
         [batch],
     )
     loss(td)
+
+    if compile:
+        if isinstance(compile, str):
+            loss = torch.compile(loss, mode=compile, fullgraph=True)
+        else:
+            loss = torch.compile(loss, fullgraph=True)
+
+        loss(td)
+        loss(td)
+
     benchmark(loss, td)
 
 
-def test_ddpg_speed(benchmark, n_obs=8, n_act=4, ncells=128, batch=128, n_hidden=64):
+@pytest.mark.parametrize("compile", [False, True, "reduce-overhead"])
+def test_ddpg_speed(
+    benchmark, compile, n_obs=8, n_act=4, ncells=128, batch=128, n_hidden=64
+):
     common = MLP(
         num_cells=ncells,
         in_features=n_obs,
@@ -200,10 +217,23 @@ def test_ddpg_speed(benchmark, n_obs=8, n_act=4, ncells=128, batch=128, n_hidden
     loss = DDPGLoss(actor, value)
 
     loss(td)
+
+    if compile:
+        if isinstance(compile, str):
+            loss = torch.compile(loss, mode=compile, fullgraph=True)
+        else:
+            loss = torch.compile(loss, fullgraph=True)
+
+        loss(td)
+        loss(td)
+
     benchmark(loss, td)
 
 
-def test_sac_speed(benchmark, n_obs=8, n_act=4, ncells=128, batch=128, n_hidden=64):
+@pytest.mark.parametrize("compile", [False, True, "reduce-overhead"])
+def test_sac_speed(
+    benchmark, compile, n_obs=8, n_act=4, ncells=128, batch=128, n_hidden=64
+):
     common = MLP(
         num_cells=ncells,
         in_features=n_obs,
@@ -245,6 +275,7 @@ def test_sac_speed(benchmark, n_obs=8, n_act=4, ncells=128, batch=128, n_hidden=
             in_keys=["loc", "scale"],
             out_keys=["action"],
             distribution_class=TanhNormal,
+            distribution_kwargs={"safe_tanh": False},
         ),
     )
     value_head = Mod(
@@ -256,10 +287,23 @@ def test_sac_speed(benchmark, n_obs=8, n_act=4, ncells=128, batch=128, n_hidden=
     loss = SACLoss(actor, value, action_spec=Unbounded(shape=(n_act,)))
 
     loss(td)
+
+    if compile:
+        if isinstance(compile, str):
+            loss = torch.compile(loss, mode=compile, fullgraph=True)
+        else:
+            loss = torch.compile(loss, fullgraph=True)
+
+        loss(td)
+        loss(td)
+
     benchmark(loss, td)
 
 
-def test_redq_speed(benchmark, n_obs=8, n_act=4, ncells=128, batch=128, n_hidden=64):
+@pytest.mark.parametrize("compile", [False, True, "reduce-overhead"])
+def test_redq_speed(
+    benchmark, compile, n_obs=8, n_act=4, ncells=128, batch=128, n_hidden=64
+):
     common = MLP(
         num_cells=ncells,
         in_features=n_obs,
@@ -313,11 +357,22 @@ def test_redq_speed(benchmark, n_obs=8, n_act=4, ncells=128, batch=128, n_hidden
     loss = REDQLoss(actor, value, action_spec=Unbounded(shape=(n_act,)))
 
     loss(td)
+
+    if compile:
+        if isinstance(compile, str):
+            loss = torch.compile(loss, mode=compile, fullgraph=True)
+        else:
+            loss = torch.compile(loss, fullgraph=True)
+
+        loss(td)
+        loss(td)
+
     benchmark(loss, td)
 
 
+@pytest.mark.parametrize("compile", [False, True, "reduce-overhead"])
 def test_redq_deprec_speed(
-    benchmark, n_obs=8, n_act=4, ncells=128, batch=128, n_hidden=64
+    benchmark, compile, n_obs=8, n_act=4, ncells=128, batch=128, n_hidden=64
 ):
     common = MLP(
         num_cells=ncells,
@@ -372,10 +427,23 @@ def test_redq_deprec_speed(
     loss = REDQLoss_deprecated(actor, value, action_spec=Unbounded(shape=(n_act,)))
 
     loss(td)
+
+    if compile:
+        if isinstance(compile, str):
+            loss = torch.compile(loss, mode=compile, fullgraph=True)
+        else:
+            loss = torch.compile(loss, fullgraph=True)
+
+        loss(td)
+        loss(td)
+
     benchmark(loss, td)
 
 
-def test_td3_speed(benchmark, n_obs=8, n_act=4, ncells=128, batch=128, n_hidden=64):
+@pytest.mark.parametrize("compile", [False, True, "reduce-overhead"])
+def test_td3_speed(
+    benchmark, compile, n_obs=8, n_act=4, ncells=128, batch=128, n_hidden=64
+):
     common = MLP(
         num_cells=ncells,
         in_features=n_obs,
@@ -417,14 +485,23 @@ def test_td3_speed(benchmark, n_obs=8, n_act=4, ncells=128, batch=128, n_hidden=
             in_keys=["loc", "scale"],
             out_keys=["action"],
             distribution_class=TanhNormal,
+            distribution_kwargs={"safe_tanh": False},
             return_log_prob=True,
+            default_interaction_type=InteractionType.DETERMINISTIC,
         ),
     )
     value_head = Mod(
         value, in_keys=["hidden", "action"], out_keys=["state_action_value"]
     )
     value = Seq(common, value_head)
-    value(actor(td))
+    value(actor(td.clone()))
+    if compile:
+        actor_c = torch.compile(actor.get_dist, fullgraph=True)
+        actor_c(td)
+        actor_c = torch.compile(actor, fullgraph=True)
+        actor_c(td)
+        value_c = torch.compile(value, fullgraph=True)
+        value_c(td)
 
     loss = TD3Loss(
         actor,
@@ -433,10 +510,23 @@ def test_td3_speed(benchmark, n_obs=8, n_act=4, ncells=128, batch=128, n_hidden=
     )
 
     loss(td)
+
+    if compile:
+        if isinstance(compile, str):
+            loss = torch.compile(loss, mode=compile, fullgraph=True)
+        else:
+            loss = torch.compile(loss, fullgraph=True)
+
+        loss(td)
+        loss(td)
+
     benchmark.pedantic(loss, args=(td,), rounds=100, iterations=10)
 
 
-def test_cql_speed(benchmark, n_obs=8, n_act=4, ncells=128, batch=128, n_hidden=64):
+@pytest.mark.parametrize("compile", [False, True, "reduce-overhead"])
+def test_cql_speed(
+    benchmark, compile, n_obs=8, n_act=4, ncells=128, batch=128, n_hidden=64
+):
     common = MLP(
         num_cells=ncells,
         in_features=n_obs,
@@ -475,7 +565,10 @@ def test_cql_speed(benchmark, n_obs=8, n_act=4, ncells=128, batch=128, n_hidden=
         Mod(actor_net, in_keys=["hidden"], out_keys=["param"]),
         Mod(NormalParamExtractor(), in_keys=["param"], out_keys=["loc", "scale"]),
         ProbMod(
-            in_keys=["loc", "scale"], out_keys=["action"], distribution_class=TanhNormal
+            in_keys=["loc", "scale"],
+            out_keys=["action"],
+            distribution_class=TanhNormal,
+            distribution_kwargs={"safe_tanh": False},
         ),
     )
     value_head = Mod(
@@ -487,11 +580,22 @@ def test_cql_speed(benchmark, n_obs=8, n_act=4, ncells=128, batch=128, n_hidden=
     loss = CQLLoss(actor, value, action_spec=Unbounded(shape=(n_act,)))
 
     loss(td)
+
+    if compile:
+        if isinstance(compile, str):
+            loss = torch.compile(loss, mode=compile, fullgraph=True)
+        else:
+            loss = torch.compile(loss, fullgraph=True)
+
+        loss(td)
+        loss(td)
+
     benchmark(loss, td)
 
 
+@pytest.mark.parametrize("compile", [False, True, "reduce-overhead"])
 def test_a2c_speed(
-    benchmark, n_obs=8, n_act=4, n_hidden=64, ncells=128, batch=128, T=10
+    benchmark, compile, n_obs=8, n_act=4, n_hidden=64, ncells=128, batch=128, T=10
 ):
     common_net = MLP(
         num_cells=ncells,
@@ -533,7 +637,10 @@ def test_a2c_speed(
         Mod(actor_net, in_keys=["hidden"], out_keys=["param"]),
         Mod(NormalParamExtractor(), in_keys=["param"], out_keys=["loc", "scale"]),
         ProbMod(
-            in_keys=["loc", "scale"], out_keys=["action"], distribution_class=TanhNormal
+            in_keys=["loc", "scale"],
+            out_keys=["action"],
+            distribution_class=TanhNormal,
+            distribution_kwargs={"safe_tanh": False},
         ),
     )
     critic = Seq(common, Mod(value_net, in_keys=["hidden"], out_keys=["state_value"]))
@@ -544,11 +651,22 @@ def test_a2c_speed(
     advantage = GAE(value_network=critic, gamma=0.99, lmbda=0.95, shifted=True)
     advantage(td)
     loss(td)
+
+    if compile:
+        if isinstance(compile, str):
+            loss = torch.compile(loss, mode=compile, fullgraph=True)
+        else:
+            loss = torch.compile(loss, fullgraph=True)
+
+        loss(td)
+        loss(td)
+
     benchmark(loss, td)
 
 
+@pytest.mark.parametrize("compile", [False, True, "reduce-overhead"])
 def test_ppo_speed(
-    benchmark, n_obs=8, n_act=4, n_hidden=64, ncells=128, batch=128, T=10
+    benchmark, compile, n_obs=8, n_act=4, n_hidden=64, ncells=128, batch=128, T=10
 ):
     common_net = MLP(
         num_cells=ncells,
@@ -590,7 +708,10 @@ def test_ppo_speed(
         Mod(actor_net, in_keys=["hidden"], out_keys=["param"]),
         Mod(NormalParamExtractor(), in_keys=["param"], out_keys=["loc", "scale"]),
         ProbMod(
-            in_keys=["loc", "scale"], out_keys=["action"], distribution_class=TanhNormal
+            in_keys=["loc", "scale"],
+            out_keys=["action"],
+            distribution_class=TanhNormal,
+            distribution_kwargs={"safe_tanh": False},
         ),
     )
     critic = Seq(common, Mod(value_net, in_keys=["hidden"], out_keys=["state_value"]))
@@ -601,11 +722,22 @@ def test_ppo_speed(
     advantage = GAE(value_network=critic, gamma=0.99, lmbda=0.95, shifted=True)
     advantage(td)
     loss(td)
+
+    if compile:
+        if isinstance(compile, str):
+            loss = torch.compile(loss, mode=compile, fullgraph=True)
+        else:
+            loss = torch.compile(loss, fullgraph=True)
+
+        loss(td)
+        loss(td)
+
     benchmark(loss, td)
 
 
+@pytest.mark.parametrize("compile", [False, True, "reduce-overhead"])
 def test_reinforce_speed(
-    benchmark, n_obs=8, n_act=4, n_hidden=64, ncells=128, batch=128, T=10
+    benchmark, compile, n_obs=8, n_act=4, n_hidden=64, ncells=128, batch=128, T=10
 ):
     common_net = MLP(
         num_cells=ncells,
@@ -647,7 +779,10 @@ def test_reinforce_speed(
         Mod(actor_net, in_keys=["hidden"], out_keys=["param"]),
         Mod(NormalParamExtractor(), in_keys=["param"], out_keys=["loc", "scale"]),
         ProbMod(
-            in_keys=["loc", "scale"], out_keys=["action"], distribution_class=TanhNormal
+            in_keys=["loc", "scale"],
+            out_keys=["action"],
+            distribution_class=TanhNormal,
+            distribution_kwargs={"safe_tanh": False},
         ),
     )
     critic = Seq(common, Mod(value_net, in_keys=["hidden"], out_keys=["state_value"]))
@@ -658,11 +793,22 @@ def test_reinforce_speed(
     advantage = GAE(value_network=critic, gamma=0.99, lmbda=0.95, shifted=True)
     advantage(td)
     loss(td)
+
+    if compile:
+        if isinstance(compile, str):
+            loss = torch.compile(loss, mode=compile, fullgraph=True)
+        else:
+            loss = torch.compile(loss, fullgraph=True)
+
+        loss(td)
+        loss(td)
+
     benchmark(loss, td)
 
 
+@pytest.mark.parametrize("compile", [False, True, "reduce-overhead"])
 def test_iql_speed(
-    benchmark, n_obs=8, n_act=4, n_hidden=64, ncells=128, batch=128, T=10
+    benchmark, compile, n_obs=8, n_act=4, n_hidden=64, ncells=128, batch=128, T=10
 ):
     common_net = MLP(
         num_cells=ncells,
@@ -723,6 +869,16 @@ def test_iql_speed(
 
     loss = IQLLoss(actor_network=actor, value_network=value, qvalue_network=qvalue)
     loss(td)
+
+    if compile:
+        if isinstance(compile, str):
+            loss = torch.compile(loss, mode=compile, fullgraph=True)
+        else:
+            loss = torch.compile(loss, fullgraph=True)
+
+        loss(td)
+        loss(td)
+
     benchmark(loss, td)
 
 
 
@@ -39,9 +39,13 @@
     unravel_key,
     unravel_key_list,
 )
-from tensordict._C import _unravel_key_to_tuple
 from tensordict.nn import dispatch, TensorDictModuleBase
-from tensordict.utils import expand_as_right, expand_right, NestedKey
+from tensordict.utils import (
+    _unravel_key_to_tuple,
+    expand_as_right,
+    expand_right,
+    NestedKey,
+)
 from torch import nn, Tensor
 from torch.utils._pytree import tree_map