pytorch
diff --git a/‎test/test_cost.py
Lines changed: 489 additions & 3 deletions b/‎test/test_cost.py
Lines changed: 489 additions & 3 deletions
diff --git a/‎torchrl/objectives/cql.py
Lines changed: 11 additions & 2 deletions b/‎torchrl/objectives/cql.py
Lines changed: 11 additions & 2 deletions
diff --git a/‎torchrl/objectives/crossq.py
Lines changed: 9 additions & 1 deletion b/‎torchrl/objectives/crossq.py
Lines changed: 9 additions & 1 deletion
diff --git a/‎torchrl/objectives/deprecated.py
Lines changed: 8 additions & 1 deletion b/‎torchrl/objectives/deprecated.py
Lines changed: 8 additions & 1 deletion
diff --git a/‎torchrl/objectives/iql.py
Lines changed: 25 additions & 4 deletions b/‎torchrl/objectives/iql.py
Lines changed: 25 additions & 4 deletions
diff --git a/‎torchrl/objectives/redq.py
Lines changed: 11 additions & 2 deletions b/‎torchrl/objectives/redq.py
Lines changed: 11 additions & 2 deletions
diff --git a/‎torchrl/objectives/sac.py
Lines changed: 25 additions & 4 deletions b/‎torchrl/objectives/sac.py
Lines changed: 25 additions & 4 deletions
@@ -92,6 +92,8 @@ class CQLLoss(LossModule):
             ``"none"`` | ``"mean"`` | ``"sum"``. ``"none"``: no reduction will be applied,
             ``"mean"``: the sum of the output will be divided by the number of
             elements in the output, ``"sum"``: the output will be summed. Default: ``"mean"``.
+        deactivate_vmap (bool, optional): whether to deactivate vmap calls and replace them with a plain for loop.
+            Defaults to ``False``.
 
     Examples:
         >>> import torch
@@ -290,6 +292,7 @@ def __init__(
         with_lagrange: bool = False,
         lagrange_thresh: float = 0.0,
         reduction: str = None,
+        deactivate_vmap: bool = False,
     ) -> None:
         self._out_keys = None
         if reduction is None:
@@ -303,6 +306,7 @@ def __init__(
             "actor_network",
             create_target_params=self.delay_actor,
         )
+        self.deactivate_vmap = deactivate_vmap
 
         # Q value
         self.delay_qvalue = delay_qvalue
@@ -376,10 +380,15 @@ def __init__(
 
     def _make_vmap(self):
         self._vmap_qvalue_networkN0 = _vmap_func(
-            self.qvalue_network, (None, 0), randomness=self.vmap_randomness
+            self.qvalue_network,
+            (None, 0),
+            randomness=self.vmap_randomness,
+            pseudo_vmap=self.deactivate_vmap,
         )
         self._vmap_qvalue_network00 = _vmap_func(
-            self.qvalue_network, randomness=self.vmap_randomness
+            self.qvalue_network,
+            randomness=self.vmap_randomness,
+            pseudo_vmap=self.deactivate_vmap,
         )
 
     @property
 
@@ -92,6 +92,8 @@ class CrossQLoss(LossModule):
             ``"none"`` | ``"mean"`` | ``"sum"``. ``"none"``: no reduction will be applied,
             ``"mean"``: the sum of the output will be divided by the number of
             elements in the output, ``"sum"``: the output will be summed. Default: ``"mean"``.
+        deactivate_vmap (bool, optional): whether to deactivate vmap calls and replace them with a plain for loop.
+            Defaults to ``False``.
 
     Examples:
         >>> import torch
@@ -267,6 +269,7 @@ def __init__(
         priority_key: str = None,
         separate_losses: bool = False,
         reduction: str = None,
+        deactivate_vmap: bool = False,
     ) -> None:
         self._in_keys = None
         self._out_keys = None
@@ -275,6 +278,8 @@ def __init__(
         super().__init__()
         self._set_deprecated_ctor_keys(priority_key=priority_key)
 
+        self.deactivate_vmap = deactivate_vmap
+
         # Actor
         self.convert_to_functional(
             actor_network,
@@ -344,7 +349,10 @@ def __init__(
 
     def _make_vmap(self):
         self._vmap_qnetworkN0 = _vmap_func(
-            self.qvalue_network, (None, 0), randomness=self.vmap_randomness
+            self.qvalue_network,
+            (None, 0),
+            randomness=self.vmap_randomness,
+            pseudo_vmap=self.deactivate_vmap,
         )
 
     @property
 
@@ -86,6 +86,8 @@ class REDQLoss_deprecated(LossModule):
             ``"none"`` | ``"mean"`` | ``"sum"``. ``"none"``: no reduction will be applied,
             ``"mean"``: the sum of the output will be divided by the number of
             elements in the output, ``"sum"``: the output will be summed. Default: ``"mean"``.
+        deactivate_vmap (bool, optional): whether to deactivate vmap calls and replace them with a plain for loop.
+            Defaults to ``False``.
     """
 
     @dataclass
@@ -164,6 +166,7 @@ def __init__(
         priority_key: str = None,
         separate_losses: bool = False,
         reduction: str = None,
+        deactivate_vmap: bool = False,
     ):
         self._in_keys = None
         self._out_keys = None
@@ -172,6 +175,8 @@ def __init__(
         super().__init__()
         self._set_deprecated_ctor_keys(priority_key=priority_key)
 
+        self.deactivate_vmap = deactivate_vmap
+
         self.convert_to_functional(
             actor_network,
             "actor_network",
@@ -234,7 +239,9 @@ def __init__(
             raise TypeError(_GAMMA_LMBDA_DEPREC_ERROR)
 
     def _make_vmap(self):
-        self._vmap_qvalue_networkN0 = _vmap_func(self.qvalue_network, (None, 0))
+        self._vmap_qvalue_networkN0 = _vmap_func(
+            self.qvalue_network, (None, 0), pseudo_vmap=self.deactivate_vmap
+        )
 
     @property
     def target_entropy(self):
 
@@ -19,6 +19,7 @@
 from torchrl.objectives.common import LossModule
 from torchrl.objectives.utils import (
     _GAMMA_LMBDA_DEPREC_ERROR,
+    _pseudo_vmap,
     _reduce,
     _vmap_func,
     default_value_kwargs,
@@ -68,6 +69,8 @@ class IQLLoss(LossModule):
             ``"none"`` | ``"mean"`` | ``"sum"``. ``"none"``: no reduction will be applied,
             ``"mean"``: the sum of the output will be divided by the number of
             elements in the output, ``"sum"``: the output will be summed. Default: ``"mean"``.
+        deactivate_vmap (bool, optional): whether to deactivate vmap calls and replace them with a plain for loop.
+            Defaults to ``False``.
 
     Examples:
         >>> import torch
@@ -266,6 +269,7 @@ def __init__(
         priority_key: str = None,
         separate_losses: bool = False,
         reduction: str = None,
+        deactivate_vmap: bool = False,
     ) -> None:
         self._in_keys = None
         self._out_keys = None
@@ -274,6 +278,8 @@ def __init__(
         super().__init__()
         self._set_deprecated_ctor_keys(priority=priority_key)
 
+        self.deactivate_vmap = deactivate_vmap
+
         # IQL parameter
         self.temperature = temperature
         self.expectile = expectile
@@ -323,7 +329,10 @@ def __init__(
 
     def _make_vmap(self):
         self._vmap_qvalue_networkN0 = _vmap_func(
-            self.qvalue_network, (None, 0), randomness=self.vmap_randomness
+            self.qvalue_network,
+            (None, 0),
+            randomness=self.vmap_randomness,
+            pseudo_vmap=self.deactivate_vmap,
         )
 
     @property
@@ -824,7 +833,11 @@ def actor_loss(self, tensordict: TensorDictBase) -> tuple[Tensor, dict]:
             if action.ndim < (state_action_value.ndim - (td_q.ndim - tensordict.ndim)):
                 # unsqueeze the action if it lacks on trailing singleton dim
                 action = action.unsqueeze(-1)
-            chosen_state_action_value = torch.vmap(
+            if self.deactivate_vmap:
+                vmap = _pseudo_vmap
+            else:
+                vmap = torch.vmap
+            chosen_state_action_value = vmap(
                 lambda state_action_value, action: torch.gather(
                     state_action_value, -1, index=action
                 ).squeeze(-1),
@@ -883,7 +896,11 @@ def value_loss(self, tensordict: TensorDictBase) -> tuple[Tensor, dict]:
                 ):
                     # unsqueeze the action if it lacks on trailing singleton dim
                     action = action.unsqueeze(-1)
-                chosen_state_action_value = torch.vmap(
+                if self.deactivate_vmap:
+                    vmap = _pseudo_vmap
+                else:
+                    vmap = torch.vmap
+                chosen_state_action_value = vmap(
                     lambda state_action_value, action: torch.gather(
                         state_action_value, -1, index=action
                     ).squeeze(-1),
@@ -932,7 +949,11 @@ def qvalue_loss(self, tensordict: TensorDictBase) -> tuple[Tensor, dict]:
             if action.ndim < (state_action_value.ndim - (td_q.ndim - tensordict.ndim)):
                 # unsqueeze the action if it lacks on trailing singleton dim
                 action = action.unsqueeze(-1)
-            pred_val = torch.vmap(
+            if self.deactivate_vmap:
+                vmap = _pseudo_vmap
+            else:
+                vmap = torch.vmap
+            pred_val = vmap(
                 lambda state_action_value, action: torch.gather(
                     state_action_value, -1, index=action
                 ).squeeze(-1),
 
@@ -86,6 +86,8 @@ class REDQLoss(LossModule):
             ``"none"`` | ``"mean"`` | ``"sum"``. ``"none"``: no reduction will be applied,
             ``"mean"``: the sum of the output will be divided by the number of
             elements in the output, ``"sum"``: the output will be summed. Default: ``"mean"``.
+        deactivate_vmap (bool, optional): whether to deactivate vmap calls and replace them with a plain for loop.
+            Defaults to ``False``.
 
     Examples:
         >>> import torch
@@ -280,6 +282,7 @@ def __init__(
         priority_key: str = None,
         separate_losses: bool = False,
         reduction: str = None,
+        deactivate_vmap: bool = False,
     ):
         if reduction is None:
             reduction = "mean"
@@ -295,6 +298,7 @@ def __init__(
 
         # let's make sure that actor_network has `return_log_prob` set to True
         self.actor_network.return_log_prob = True
+        self.deactivate_vmap = deactivate_vmap
         if separate_losses:
             # we want to make sure there are no duplicates in the params: the
             # params of critic must be refs to actor if they're shared
@@ -351,10 +355,15 @@ def __init__(
 
     def _make_vmap(self):
         self._vmap_qvalue_network00 = _vmap_func(
-            self.qvalue_network, randomness=self.vmap_randomness
+            self.qvalue_network,
+            randomness=self.vmap_randomness,
+            pseudo_vmap=self.deactivate_vmap,
         )
         self._vmap_getdist = _vmap_func(
-            self.actor_network, func="get_dist_params", randomness=self.vmap_randomness
+            self.actor_network,
+            func="get_dist_params",
+            randomness=self.vmap_randomness,
+            pseudo_vmap=self.deactivate_vmap,
         )
 
     @property
 
@@ -130,6 +130,8 @@ class SACLoss(LossModule):
             valid, non-terminating next states. If ``True``, it is assumed that the done state can be broadcast to the
             shape of the data and that masking the data results in a valid data structure. Among other things, this may
             not be true in MARL settings or when using RNNs. Defaults to ``False``.
+        deactivate_vmap (bool, optional): whether to deactivate vmap calls and replace them with a plain for loop.
+            Defaults to ``False``.
 
     Examples:
         >>> import torch
@@ -334,6 +336,7 @@ def __init__(
         separate_losses: bool = False,
         reduction: str = None,
         skip_done_states: bool = False,
+        deactivate_vmap: bool = False,
     ) -> None:
         self._in_keys = None
         self._out_keys = None
@@ -344,6 +347,7 @@ def __init__(
 
         # Actor
         self.delay_actor = delay_actor
+        self.deactivate_vmap = deactivate_vmap
         self.convert_to_functional(
             actor_network,
             "actor_network",
@@ -445,11 +449,16 @@ def __init__(
 
     def _make_vmap(self):
         self._vmap_qnetworkN0 = _vmap_func(
-            self.qvalue_network, (None, 0), randomness=self.vmap_randomness
+            self.qvalue_network,
+            (None, 0),
+            randomness=self.vmap_randomness,
+            pseudo_vmap=self.deactivate_vmap,
         )
         if self._version == 1:
             self._vmap_qnetwork00 = _vmap_func(
-                self.qvalue_network, randomness=self.vmap_randomness
+                self.qvalue_network,
+                randomness=self.vmap_randomness,
+                pseudo_vmap=self.deactivate_vmap,
             )
 
     @property
@@ -527,11 +536,13 @@ def make_value_estimator(self, value_type: ValueEstimators = None, **hyperparams
             self._value_estimator = TD1Estimator(
                 **hp,
                 value_network=value_net,
+                deactivate_vmap=self.deactivate_vmap,
             )
         elif value_type is ValueEstimators.TD0:
             self._value_estimator = TD0Estimator(
                 **hp,
                 value_network=value_net,
+                deactivate_vmap=self.deactivate_vmap,
             )
         elif value_type is ValueEstimators.GAE:
             raise NotImplementedError(
@@ -541,6 +552,7 @@ def make_value_estimator(self, value_type: ValueEstimators = None, **hyperparams
             self._value_estimator = TDLambdaEstimator(
                 **hp,
                 value_network=value_net,
+                deactivate_vmap=self.deactivate_vmap,
             )
         else:
             raise NotImplementedError(f"Unknown value type {value_type}")
@@ -673,7 +685,6 @@ def _actor_loss(
             raise RuntimeError(
                 f"Losses shape mismatch: {log_prob.shape} and {min_q_logprob.shape}"
             )
-
         return self._alpha * log_prob - min_q_logprob, {"log_prob": log_prob.detach()}
 
     @property
@@ -922,6 +933,8 @@ class DiscreteSACLoss(LossModule):
             valid, non-terminating next states. If ``True``, it is assumed that the done state can be broadcast to the
             shape of the data and that masking the data results in a valid data structure. Among other things, this may
             not be true in MARL settings or when using RNNs. Defaults to ``False``.
+        deactivate_vmap (bool, optional): whether to deactivate vmap calls and replace them with a plain for loop.
+            Defaults to ``False``.
 
     Examples:
         >>> import torch
@@ -1098,6 +1111,7 @@ def __init__(
         separate_losses: bool = False,
         reduction: str = None,
         skip_done_states: bool = False,
+        deactivate_vmap: bool = False,
     ):
         if reduction is None:
             reduction = "mean"
@@ -1110,6 +1124,7 @@ def __init__(
             "actor_network",
             create_target_params=self.delay_actor,
         )
+        self.deactivate_vmap = deactivate_vmap
         if separate_losses:
             # we want to make sure there are no duplicates in the params: the
             # params of critic must be refs to actor if they're shared
@@ -1184,7 +1199,10 @@ def __init__(
 
     def _make_vmap(self):
         self._vmap_qnetworkN0 = _vmap_func(
-            self.qvalue_network, (None, 0), randomness=self.vmap_randomness
+            self.qvalue_network,
+            (None, 0),
+            randomness=self.vmap_randomness,
+            pseudo_vmap=self.deactivate_vmap,
         )
 
     def _forward_value_estimator_keys(self, **kwargs) -> None:
@@ -1436,11 +1454,13 @@ def make_value_estimator(self, value_type: ValueEstimators = None, **hyperparams
             self._value_estimator = TD1Estimator(
                 **hp,
                 value_network=None,
+                deactivate_vmap=self.deactivate_vmap,
             )
         elif value_type is ValueEstimators.TD0:
             self._value_estimator = TD0Estimator(
                 **hp,
                 value_network=None,
+                deactivate_vmap=self.deactivate_vmap,
             )
         elif value_type is ValueEstimators.GAE:
             raise NotImplementedError(
@@ -1450,6 +1470,7 @@ def make_value_estimator(self, value_type: ValueEstimators = None, **hyperparams
             self._value_estimator = TDLambdaEstimator(
                 **hp,
                 value_network=None,
+                deactivate_vmap=self.deactivate_vmap,
             )
         else:
             raise NotImplementedError(f"Unknown value type {value_type}")