From 7b700cfc4d1f3ceaf09cccd95981e06385f7cf08 Mon Sep 17 00:00:00 2001
From: Michael Panchenko <m.panchenko@appliedai.de>
Date: Sat, 10 Aug 2024 16:56:39 +0200
Subject: [PATCH 1/6] Batch, breaking: turn to_torch and to_numpy into instance
 methods

---
 tianshou/data/batch.py | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/tianshou/data/batch.py b/tianshou/data/batch.py
index 4ea99dbcb..60738b255 100644
--- a/tianshou/data/batch.py
+++ b/tianshou/data/batch.py
@@ -720,8 +720,8 @@ def __eq__(self, other: Any) -> bool:
         if not isinstance(other, self.__class__):
             return False
 
-        this_batch_no_torch_tensor: Batch = Batch.to_numpy(self)
-        other_batch_no_torch_tensor: Batch = Batch.to_numpy(other)
+        this_batch_no_torch_tensor = self.to_numpy()
+        other_batch_no_torch_tensor = other.to_numpy()
         # DeepDiff 7.0.1 cannot compare 0-dimensional arrays
         # so, we ensure with this transform that all array values have at least 1 dim
         this_batch_no_torch_tensor.apply_values_transform(
@@ -836,9 +836,8 @@ def __repr__(self) -> str:
             self_str = self.__class__.__name__ + "()"
         return self_str
 
-    @staticmethod
-    def to_numpy(batch: TBatch) -> TBatch:
-        result = deepcopy(batch)
+    def to_numpy(self) -> Self:
+        result = deepcopy(self)
         result.to_numpy_()
         return result
 
@@ -850,13 +849,12 @@ def arr_to_numpy(arr: TArr) -> TArr:
 
         self.apply_values_transform(arr_to_numpy, inplace=True)
 
-    @staticmethod
     def to_torch(
-        batch: TBatch,
+        self,
         dtype: torch.dtype | None = None,
         device: str | int | torch.device = "cpu",
-    ) -> TBatch:
-        result = deepcopy(batch)
+    ) -> Self:
+        result = deepcopy(self)
         result.to_torch_(dtype=dtype, device=device)
         return result
 

From 4680e9a1d2f4f9fc1fe3048fe28ed274cbc54493 Mon Sep 17 00:00:00 2001
From: Michael Panchenko <m.panchenko@appliedai.de>
Date: Sat, 10 Aug 2024 17:12:24 +0200
Subject: [PATCH 2/6] EnvFactoryRegistered: restored backwards compat

---
 test/base/test_batch.py   |  4 ++--
 tianshou/highlevel/env.py | 14 ++++++++++++++
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/test/base/test_batch.py b/test/base/test_batch.py
index 8839fe482..813e5131e 100644
--- a/test/base/test_batch.py
+++ b/test/base/test_batch.py
@@ -669,7 +669,7 @@ class TestBatchConversions:
     @staticmethod
     def test_to_numpy() -> None:
         batch = Batch(a=1, b=torch.arange(5), c={"d": torch.tensor([1, 2, 3])})
-        new_batch: Batch = Batch.to_numpy(batch)
+        new_batch = batch.to_numpy()
         assert id(batch) != id(new_batch)
         assert isinstance(batch.b, torch.Tensor)
         assert isinstance(batch.c.d, torch.Tensor)
@@ -689,7 +689,7 @@ def test_to_numpy_() -> None:
     @staticmethod
     def test_to_torch() -> None:
         batch = Batch(a=1, b=np.arange(5), c={"d": np.array([1, 2, 3])})
-        new_batch: Batch = Batch.to_torch(batch)
+        new_batch = batch.to_torch(batch)
         assert id(batch) != id(new_batch)
         assert isinstance(batch.b, np.ndarray)
         assert isinstance(batch.c.d, np.ndarray)
diff --git a/tianshou/highlevel/env.py b/tianshou/highlevel/env.py
index b69e9c0b8..e61e0ed36 100644
--- a/tianshou/highlevel/env.py
+++ b/tianshou/highlevel/env.py
@@ -8,6 +8,7 @@
 import gymnasium as gym
 import gymnasium.spaces
 from gymnasium import Env
+from sensai.util.pickle import setstate
 from sensai.util.string import ToStringMixin
 
 from tianshou.env import (
@@ -452,6 +453,19 @@ def __init__(
         }
         self.make_kwargs = make_kwargs
 
+    def __setstate__(self, state: dict) -> None:
+        if "seed" in state:
+            if "test_seed" in state or "train_seed" in state:
+                raise RuntimeError(
+                    f"Cannot have both 'seed' and 'test_seed'/'train_seed' in state. "
+                    f"Something went wrong during serialization/deserialization: "
+                    f"{state=}",
+                )
+            state["test_seed"] = state["seed"]
+            state["train_seed"] = state["seed"]
+            del state["seed"]
+        setstate(EnvFactoryRegistered, self, state)
+
     def _create_kwargs(self, mode: EnvMode) -> dict:
         """Adapts the keyword arguments for the given mode.
 

From 3d0cd3b5fc74e59d9ac6bb86dad102ea33e11705 Mon Sep 17 00:00:00 2001
From: Michael Panchenko <m.panchenko@appliedai.de>
Date: Sat, 10 Aug 2024 17:12:32 +0200
Subject: [PATCH 3/6] Changelog

---
 CHANGELOG.md | 300 ++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 203 insertions(+), 97 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7e12b0991..0af64a53f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,133 +2,239 @@
 
 ## Release 1.1.0
 
+### Highlights
+
+This release introduces a new package `evaluation` that integrates best
+practices for running experiments (seeding test and train environmets) and for
+evaluating them using the [rliable](https://github.com/google-research/rliable) library. This should be especially useful
+for algorithm developers for
+comparing performances and creating meaningful visualizations. This functionality is currently in `alpha` state and will be further improved in the next releases. 
+You will need to install tianshou with the extra `eval` to use it.
+
+The creation of multiple experiments with varying random seeds has been greatly facilitated. Moreover,
+the `ExpLauncher` interface has been introduced and implemented with several backeds to 
+support the execution of multiple experiments in parallel. 
+
+An example for this using the high-level interfaces can be
+found [here](examples/mujoco/mujoco_ppo_hl_multi.py), examples that use
+low-level interfaces
+will follow soon.
+this feature
+Apart from that, several important extensions have been added to internal data structures,
+most notably to `Batch`. Batches now implement `__eq__` and can be meaningfully compared.
+Applying operations in a nested fashion has been significantly simplified, and
+checking for NaNs and dropping them is now possible.
+
+One more notable change is that torch `Distribution` objects are now sliced
+when slicing a batch. Previously a Batch with say 10 actions and a dist corresponding to them was sliced to `[:3]`, the `dist` would still correspond to all 10 actions. Now, the dist is also "sliced" to the first 3 actions.
+
 ### Changes/Improvements
-- `evaluation`: New package for repeating the same experiment with multiple seeds and aggregating the results. #1074 #1141 #1183
+
+- `evaluation`: New package for repeating the same experiment with multiple
+  seeds and aggregating the results. #1074 #1141 #1183
 - `data`:
-  - `Batch`:
-    - Add methods `to_dict` and `to_list_of_dicts`. #1063 #1098
-    - Add methods `to_numpy_` and `to_torch_`. #1098, #1117
-    - Add `__eq__` (semantic equality check). #1098
-    - `keys()` deprecated in favor of `get_keys()` (needed to make iteration consistent with naming) #1105.
-    - Major: new methods for applying functions to values, to check for NaNs and drop them, and to set values. #1181
-    - Slicing a batch with a torch distribution now also slices the distribution. #1181
-  - `data.collector`:
-    - `Collector`:
-      - Introduced `BaseCollector` as a base class for all collectors. #1123
-      - Add method `close` #1063
-      - Method `reset` is now more granular (new flags controlling behavior). #1063
-    - `CollectStats`: Add convenience constructor `with_autogenerated_stats`. #1063
+    - `Batch`:
+        - Add methods `to_dict` and `to_list_of_dicts`. #1063 #1098
+        - Add methods `to_numpy_` and `to_torch_`. #1098, #1117
+        - Add `__eq__` (semantic equality check). #1098
+        - `keys()` deprecated in favor of `get_keys()` (needed to make iteration
+          consistent with naming) #1105.
+        - Major: new methods for applying functions to values, to check for NaNs
+          and drop them, and to set values. #1181
+        - Slicing a batch with a torch distribution now also slices the
+          distribution. #1181
+    - `data.collector`:
+        - `Collector`:
+            - Introduced `BaseCollector` as a base class for all collectors.
+              #1123
+            - Add method `close` #1063
+            - Method `reset` is now more granular (new flags controlling
+              behavior). #1063
+        - `CollectStats`: Add convenience
+          constructor `with_autogenerated_stats`. #1063
 - `trainer`:
-  - Trainers can now control whether collectors should be reset prior to training. #1063
-- policy:
-  - introduced attribute `in_training_step` that is controlled by the trainer. #1123
-  - policy automatically set to `eval` mode when collecting and to `train` mode when updating. #1123
-  - Extended interface of `compute_action` to also support array-like inputs #1169
+    - Trainers can now control whether collectors should be reset prior to
+      training. #1063
+- `policy`:
+    - introduced attribute `in_training_step` that is controlled by the trainer.
+      #1123
+    - policy automatically set to `eval` mode when collecting and to `train`
+      mode when updating. #1123
+    - Extended interface of `compute_action` to also support array-like inputs
+      #1169
 - `highlevel`:
-  - `SamplingConfig`:
-    - Add support for `batch_size=None`. #1077 
-    - Add `training_seed` for explicit seeding of training and test environments, the `test_seed` is inferred from `training_seed`. #1074
-  - `experiment`: 
-     - `Experiment` now has a `name` attribute, which can be set using `ExperimentBuilder.with_name` and 
-       which determines the default run name and therefore the persistence subdirectory.
-       It can still be overridden in `Experiment.run()`, the new parameter name being `run_name` rather than
-       `experiment_name` (although the latter will still be interpreted correctly). #1074 #1131
-     - Add class `ExperimentCollection` for the convenient execution of multiple experiment runs #1131
-     - `ExperimentBuilder`: 
-         - Add method `build_seeded_collection` for the sound creation of multiple
-           experiments with varying random seeds #1131
-         - Add method `copy` to facilitate the creation of multiple experiments from a single builder #1131
-  - `env`:
-    - Added new `VectorEnvType` called `SUBPROC_SHARED_MEM_AUTO` and used in for Atari and Mujoco venv creation. #1141
-- Loggers can now restore the logged data into python by using the new `restore_logged_data` method. #1074
-- Wandb logger extended #1183
-- `utils`: 
-  - `net.continuous.Critic`:
-    - Add flag `apply_preprocess_net_to_obs_only` to allow the
-      preprocessing network to be applied to the observations only (without
-      the actions concatenated), which is essential for the case where we want
-      to reuse the actor's preprocessing network #1128
-  - `torch_utils` (new module)
-    - Added context managers `torch_train_mode` and `policy_within_training_step` #1123
-  - `print`
-    - `DataclassPPrintMixin` now supports outputting a string, not just printing the pretty repr. #1141
+    - `SamplingConfig`:
+        - Add support for `batch_size=None`. #1077
+        - Add `training_seed` for explicit seeding of training and test
+          environments, the `test_seed` is inferred from `training_seed`. #1074
+    - `experiment`:
+        - `Experiment` now has a `name` attribute, which can be set
+          using `ExperimentBuilder.with_name` and
+          which determines the default run name and therefore the persistence
+          subdirectory.
+          It can still be overridden in `Experiment.run()`, the new parameter
+          name being `run_name` rather than
+          `experiment_name` (although the latter will still be interpreted
+          correctly). #1074 #1131
+        - Add class `ExperimentCollection` for the convenient execution of
+          multiple experiment runs #1131
+        - `ExperimentBuilder`:
+            - Add method `build_seeded_collection` for the sound creation of
+              multiple
+              experiments with varying random seeds #1131
+            - Add method `copy` to facilitate the creation of multiple
+              experiments from a single builder #1131
+    - `env`:
+        - Added new `VectorEnvType` called `SUBPROC_SHARED_MEM_AUTO` and used in
+          for Atari and Mujoco venv creation. #1141
+- `utils`:
+    - `logger`:
+        - Loggers can now restore the logged data into python by using the
+          new `restore_logged_data` method. #1074
+        - Wandb logger extended #1183
+    - `net.continuous.Critic`:
+        - Add flag `apply_preprocess_net_to_obs_only` to allow the
+          preprocessing network to be applied to the observations only (without
+          the actions concatenated), which is essential for the case where we
+          want
+          to reuse the actor's preprocessing network #1128
+    - `torch_utils` (new module)
+        - Added context managers `torch_train_mode`
+          and `policy_within_training_step` #1123
+    - `print`
+        - `DataclassPPrintMixin` now supports outputting a string, not just
+          printing the pretty repr. #1141
 
 ### Fixes
+
 - `highlevel`:
-  - `CriticFactoryReuseActor`: Enable the Critic flag `apply_preprocess_net_to_obs_only` for continuous critics, 
-    fixing the case where we want to reuse an actor's preprocessing network for the critic (affects usages
-    of the experiment builder method `with_critic_factory_use_actor` with continuous environments) #1128
-  - Policy parameter `action_scaling` value `"default"` was not correctly transformed to a Boolean value for 
-    algorithms SAC, DDPG, TD3 and REDQ. The value `"default"` being truthy caused action scaling to be enabled
-    even for discrete action spaces. #1191
+    - `CriticFactoryReuseActor`: Enable the Critic
+      flag `apply_preprocess_net_to_obs_only` for continuous critics,
+      fixing the case where we want to reuse an actor's preprocessing network
+      for the critic (affects usages
+      of the experiment builder method `with_critic_factory_use_actor` with
+      continuous environments) #1128
+    - Policy parameter `action_scaling` value `"default"` was not correctly
+      transformed to a Boolean value for
+      algorithms SAC, DDPG, TD3 and REDQ. The value `"default"` being truthy
+      caused action scaling to be enabled
+      even for discrete action spaces. #1191
 - `atari_network.DQN`:
-  - Fix constructor input validation #1128
-  - Fix `output_dim` not being set if `features_only`=True and `output_dim_added_layer` is not None #1128
+    - Fix constructor input validation #1128
+    - Fix `output_dim` not being set if `features_only`=True
+      and `output_dim_added_layer` is not None #1128
 - `PPOPolicy`:
-  - Fix `max_batchsize` not being used in `logp_old` computation inside `process_fn` #1168
+    - Fix `max_batchsize` not being used in `logp_old` computation
+      inside `process_fn` #1168
 - Fix `Batch.__eq__` to allow comparing Batches with scalar array values #1185
 
 ### Internal Improvements
-- `Collector`s rely less on state, the few stateful things are stored explicitly instead of through a `.data` attribute. #1063
-- Introduced a first iteration of a naming convention for vars in `Collector`s. #1063
-- Generally improved readability of Collector code and associated tests (still quite some way to go). #1063
+
+- `Collector`s rely less on state, the few stateful things are stored explicitly
+  instead of through a `.data` attribute. #1063
+- Introduced a first iteration of a naming convention for vars in `Collector`s.
+  #1063
+- Generally improved readability of Collector code and associated tests (still
+  quite some way to go). #1063
 - Improved typing for `exploration_noise` and within Collector. #1063
-- Better variable names related to model outputs (logits, dist input etc.). #1032
-- Improved typing for actors and critics, using Tianshou classes like `Actor`, `ActorProb`, etc., 
-instead of just `nn.Module`. #1032
-- Added interfaces for most `Actor` and `Critic` classes to enforce the presence of `forward` methods. #1032
-- Simplified `PGPolicy` forward by unifying the `dist_fn` interface (see associated breaking change). #1032
-- Use `.mode` of distribution instead of relying on knowledge of the distribution type. #1032
+- Better variable names related to model outputs (logits, dist input etc.).
+  #1032
+- Improved typing for actors and critics, using Tianshou classes
+  like `Actor`, `ActorProb`, etc.,
+  instead of just `nn.Module`. #1032
+- Added interfaces for most `Actor` and `Critic` classes to enforce the presence
+  of `forward` methods. #1032
+- Simplified `PGPolicy` forward by unifying the `dist_fn` interface (see
+  associated breaking change). #1032
+- Use `.mode` of distribution instead of relying on knowledge of the
+  distribution type. #1032
 - Exception no longer raised on `len` of empty `Batch`. #1084
 - tests and examples are covered by `mypy`. #1077
 - `NetBase` is more used, stricter typing by making it generic. #1077
-- Use explicit multiprocessing context for creating `Pipe` in `subproc.py`. #1102
+- Use explicit multiprocessing context for creating `Pipe` in `subproc.py`.
+  #1102
 
 ### Breaking Changes
+
 - `data`:
-  - `Collector`:
-    - Removed `.data` attribute. #1063
-    - Collectors no longer reset the environment on initialization. 
-      Instead, the user might have to call `reset` expicitly or pass `reset_before_collect=True` . #1063
-    - Removed `no_grad` argument from `collect` method (was unused in tianshou). #1123
-  - `Batch`:
-    - Fixed `iter(Batch(...)` which now behaves the same way as `Batch(...).__iter__()`. 
-      Can be considered a bugfix. #1063
-    - The methods `to_numpy` and `to_torch` in are not in-place anymore 
-      (use `to_numpy_` or `to_torch_` instead). #1098, #1117
-    - The method `Batch.is_empty` has been removed. Instead, the user can simply check for emptiness of Batch by using `len` on dicts. #1144
-    - Stricter `cat_`, only concatenation of batches with the same structure is allowed. #1181
-- Logging:
-  - `BaseLogger.prepare_dict_for_logging` is now abstract. #1074
-  - Removed deprecated and unused `BasicLogger` (only affects users who subclassed it). #1074
-- VectorEnvs now return an array of info-dicts on reset instead of a list. #1063
-- Changed interface of `dist_fn` in `PGPolicy` and all subclasses to take a single argument in both
-continuous and discrete cases. #1032
+    - `Collector`:
+        - Removed `.data` attribute. #1063
+        - Collectors no longer reset the environment on initialization.
+          Instead, the user might have to call `reset` expicitly or
+          pass `reset_before_collect=True` . #1063
+        - Removed `no_grad` argument from `collect` method (was unused in
+          tianshou). #1123
+    - `Batch`:
+        - Fixed `iter(Batch(...)` which now behaves the same way
+          as `Batch(...).__iter__()`.
+          Can be considered a bugfix. #1063
+        - The methods `to_numpy` and `to_torch` in are not in-place anymore
+          (use `to_numpy_` or `to_torch_` instead). #1098, #1117
+        - The method `Batch.is_empty` has been removed. Instead, the user can
+          simply check for emptiness of Batch by using `len` on dicts. #1144
+        - Stricter `cat_`, only concatenation of batches with the same structure
+          is allowed. #1181
+        - `to_torch` and `to_numpy` are no longer static methods.
+          So `Batch.to_numpy(batch)` should be replaced by `batch.to_numpy()`.
+          #1200
 - `utils`:
-  - Modules with code that was copied from sensAI have been replaced by imports from new dependency sensAI-utils:
-     - `tianshou.utils.logging` is replaced with `sensai.util.logging`
-     - `tianshou.utils.string` is replaced with `sensai.util.string`
-     - `tianshou.utils.pickle` is replaced with `sensai.util.pickle`
-  - `utils.net.common.Recurrent` now receives and returns a `RecurrentStateBatch` instead of a dict. #1077
-- `AtariEnvFactory` constructor (in examples, so not really breaking) now requires explicit train and test seeds. #1074
-- `EnvFactoryRegistered` now requires an explicit `test_seed` in the constructor. #1074
+    - `logger`:
+        - `BaseLogger.prepare_dict_for_logging` is now abstract. #1074
+        - Removed deprecated and unused `BasicLogger` (only affects users who
+          subclassed it). #1074
+    - `utils.net`:
+        - `Recurrent` now receives and returns
+          a `RecurrentStateBatch` instead of a dict. #1077
+    - Modules with code that was copied from sensAI have been replaced by
+      imports from new dependency sensAI-utils:
+        - `tianshou.utils.logging` is replaced with `sensai.util.logging`
+        - `tianshou.utils.string` is replaced with `sensai.util.string`
+        - `tianshou.utils.pickle` is replaced with `sensai.util.pickle`
+- `env`:
+    - All VectorEnvs now return a numpy array of info-dicts on reset instead of
+      a list. #1063
+- `policy`:
+    - Changed interface of `dist_fn` in `PGPolicy` and all subclasses to take a
+      single argument in both
+      continuous and discrete cases. #1032
+- `AtariEnvFactory` constructor (in examples, so not really breaking) now
+  requires explicit train and test seeds. #1074
+- `EnvFactoryRegistered` now requires an explicit `test_seed` in the
+  constructor. #1074
 - `highlevel`:
-  - The parameter `dist_fn` has been removed from the parameter objects (`PGParams`, `A2CParams`, `PPOParams`, `NPGParams`, `TRPOParams`).
-    The correct distribution is now determined automatically based on the actor factory being used, avoiding the possibility of 
-    misspecification. Persisted configurations/policies continue to work as expected, but code must not specify the `dist_fn` parameter.
-    #1194 #1195
-
+    - `params`: The parameter `dist_fn` has been removed from the parameter
+      objects (`PGParams`, `A2CParams`, `PPOParams`, `NPGParams`, `TRPOParams`).
+      The correct distribution is now determined automatically based on the
+      actor factory being used, avoiding the possibility of
+      misspecification. Persisted configurations/policies continue to work as
+      expected, but code must not specify the `dist_fn` parameter.
+      #1194 #1195
+    - `env`:
+        - `EnvFactoryRegistered`: parameter `seed` has been replaced by the pair
+          of parameters `train_seed` and `test_seed`
+          Persisted instances will continue to work correctly.
+          Subclasses such as `AtariEnvFactory` are also affected requires
+          explicit train and test seeds. #1074
+        - `VectorEnvType`: `SUBPROC_SHARED_MEM` has been replaced
+          by `SUBPROC_SHARED_MEM_DEFAULT`. It is recommended to
+          use `SUBPROC_SHARED_MEM_AUTO` instead. However, persisted configs will
+          continue working. #1141
 
 ### Tests
-- Fixed env seeding it `test_sac_with_il.py` so that the test doesn't fail randomly. #1081
+
+- Fixed env seeding it `test_sac_with_il.py` so that the test doesn't fail
+  randomly. #1081
 - Improved CI triggers and added telemetry (if requested by user) #1177
 - Improved environment used in tests.
 - Improved tests bach equality to check with scalar values #1185
 
 ### Dependencies
-- [DeepDiff](https://github.com/seperman/deepdiff) added to help with diffs of batches in tests. #1098
+
+- [DeepDiff](https://github.com/seperman/deepdiff) added to help with diffs of
+  batches in tests. #1098
 - Bumped black, idna, pillow
 - New extra "eval"
 - Bumped numba to >=60.0.0, permitting installation on python 3.12 # 1177
+- New dependency sensai-utils
 
 Started after v1.0.0

From 56f0c9ee27af3a64cc0ae3f6716919a7785d36f1 Mon Sep 17 00:00:00 2001
From: Michael Panchenko <m.panchenko@appliedai.de>
Date: Sat, 10 Aug 2024 17:16:58 +0200
Subject: [PATCH 4/6] Typing, typo in test

---
 test/base/test_batch.py |  2 +-
 tianshou/data/batch.py  | 12 +++++-------
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/test/base/test_batch.py b/test/base/test_batch.py
index 813e5131e..9fd12b8ff 100644
--- a/test/base/test_batch.py
+++ b/test/base/test_batch.py
@@ -689,7 +689,7 @@ def test_to_numpy_() -> None:
     @staticmethod
     def test_to_torch() -> None:
         batch = Batch(a=1, b=np.arange(5), c={"d": np.array([1, 2, 3])})
-        new_batch = batch.to_torch(batch)
+        new_batch = batch.to_torch()
         assert id(batch) != id(new_batch)
         assert isinstance(batch.b, np.ndarray)
         assert isinstance(batch.c.d, np.ndarray)
diff --git a/tianshou/data/batch.py b/tianshou/data/batch.py
index 577dea7b5..6e7c9cf12 100644
--- a/tianshou/data/batch.py
+++ b/tianshou/data/batch.py
@@ -357,8 +357,7 @@ def __repr__(self) -> str:
     def __eq__(self, other: Any) -> bool:
         raise ProtocolCalledException
 
-    @staticmethod
-    def to_numpy(batch: TBatch) -> TBatch:
+    def to_numpy(self: Self) -> Self:
         """Change all torch.Tensor to numpy.ndarray and return a new Batch."""
         raise ProtocolCalledException
 
@@ -366,12 +365,11 @@ def to_numpy_(self) -> None:
         """Change all torch.Tensor to numpy.ndarray in-place."""
         raise ProtocolCalledException
 
-    @staticmethod
     def to_torch(
-        batch: TBatch,
+        self: Self,
         dtype: torch.dtype | None = None,
         device: str | int | torch.device = "cpu",
-    ) -> TBatch:
+    ) -> Self:
         """Change all numpy.ndarray to torch.Tensor and return a new Batch."""
         raise ProtocolCalledException
 
@@ -835,7 +833,7 @@ def __repr__(self) -> str:
             self_str = self.__class__.__name__ + "()"
         return self_str
 
-    def to_numpy(self) -> Self:
+    def to_numpy(self: Self) -> Self:
         result = deepcopy(self)
         result.to_numpy_()
         return result
@@ -849,7 +847,7 @@ def arr_to_numpy(arr: TArr) -> TArr:
         self.apply_values_transform(arr_to_numpy, inplace=True)
 
     def to_torch(
-        self,
+        self: Self,
         dtype: torch.dtype | None = None,
         device: str | int | torch.device = "cpu",
     ) -> Self:

From 7f8300836fa38d7066f445d2b028d614007d0a27 Mon Sep 17 00:00:00 2001
From: Michael Panchenko <m.panchenko@appliedai.de>
Date: Sat, 10 Aug 2024 17:28:09 +0200
Subject: [PATCH 5/6] Dependencies: use the official rliable repo instead of
 aAI institute fork

The necessary changes were merged there meanwhile
---
 poetry.lock    | 12 ++++++------
 pyproject.toml |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index cdae2222c..907b74db0 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -5000,7 +5000,7 @@ files = [
 
 [[package]]
 name = "rliable"
-version = "1.0.8"
+version = "1.1.0"
 description = "rliable: Reliable evaluation on reinforcement learning and machine learning benchmarks."
 optional = true
 python-versions = "*"
@@ -5009,16 +5009,16 @@ develop = false
 
 [package.dependencies]
 absl-py = ">=0.9.0"
-arch = "5.3.1"
+arch = ">=5.3.1,<8.0"
 numpy = ">=1.16.4"
 scipy = ">=1.7.0"
 seaborn = ">=0.11.2"
 
 [package.source]
 type = "git"
-url = "https://github.com/aai-institute/rliable.git"
-reference = "HEAD"
-resolved_reference = "c756ac408d15507481166edb252f5b61cf5628ff"
+url = "https://github.com/google-research/rliable.git"
+reference = "1171833f6706b6c25bbf042e2cb185a96fcf2ce6"
+resolved_reference = "1171833f6706b6c25bbf042e2cb185a96fcf2ce6"
 
 [[package]]
 name = "rpds-py"
@@ -6840,4 +6840,4 @@ vizdoom = ["vizdoom"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.11"
-content-hash = "200077246f10046fe1d0494977e5565420e0c166ef905a1d22608e84fcfb3459"
+content-hash = "eb478c2c355fda1ed70c5bf12ae9aea6a6051ec5ae3c614cdf353f4bf260fbf4"
diff --git a/pyproject.toml b/pyproject.toml
index 66c0740ab..2b2180e10 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -68,7 +68,7 @@ mujoco-py = { version = ">=2.1,<2.2", optional = true }
 opencv_python = { version = "*", optional = true }
 pybullet = { version = "*", optional = true }
 pygame = { version = ">=2.1.3", optional = true }
-rliable = {optional = true, git = "https://github.com/aai-institute/rliable.git"}
+rliable = {optional = true, git = "https://github.com/google-research/rliable.git", rev = "1171833f6706b6c25bbf042e2cb185a96fcf2ce6"}
 scipy = { version = "*", optional = true }
 shimmy = { version = ">=0.1.0,<1.0", optional = true }
 swig = { version = "4.*", optional = true }

From 007c3ca63caa6abfb4bc659c548aecf3e082e2da Mon Sep 17 00:00:00 2001
From: Michael Panchenko <m.panchenko@appliedai.de>
Date: Sat, 10 Aug 2024 17:36:35 +0200
Subject: [PATCH 6/6] Changelog [ci skip]

---
 CHANGELOG.md | 49 ++++++++++++++++++++++++++++++-------------------
 1 file changed, 30 insertions(+), 19 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0af64a53f..265f85733 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,29 +4,40 @@
 
 ### Highlights
 
+#### Evaluation Package
+
 This release introduces a new package `evaluation` that integrates best
 practices for running experiments (seeding test and train environmets) and for
-evaluating them using the [rliable](https://github.com/google-research/rliable) library. This should be especially useful
-for algorithm developers for
-comparing performances and creating meaningful visualizations. This functionality is currently in `alpha` state and will be further improved in the next releases. 
+evaluating them using the [rliable](https://github.com/google-research/rliable)
+library. This should be especially useful for algorithm developers for comparing
+performances and creating meaningful visualizations. **This functionality is
+currently in alpha state** and will be further improved in the next releases.
 You will need to install tianshou with the extra `eval` to use it.
 
-The creation of multiple experiments with varying random seeds has been greatly facilitated. Moreover,
-the `ExpLauncher` interface has been introduced and implemented with several backeds to 
-support the execution of multiple experiments in parallel. 
-
-An example for this using the high-level interfaces can be
-found [here](examples/mujoco/mujoco_ppo_hl_multi.py), examples that use
-low-level interfaces
-will follow soon.
-this feature
-Apart from that, several important extensions have been added to internal data structures,
-most notably to `Batch`. Batches now implement `__eq__` and can be meaningfully compared.
-Applying operations in a nested fashion has been significantly simplified, and
-checking for NaNs and dropping them is now possible.
-
-One more notable change is that torch `Distribution` objects are now sliced
-when slicing a batch. Previously a Batch with say 10 actions and a dist corresponding to them was sliced to `[:3]`, the `dist` would still correspond to all 10 actions. Now, the dist is also "sliced" to the first 3 actions.
+The creation of multiple experiments with varying random seeds has been greatly
+facilitated. Moreover, the `ExpLauncher` interface has been introduced and
+implemented with several backends to support the execution of multiple
+experiments in parallel.
+
+An example for this using the high-level interfaces can be found
+[here](examples/mujoco/mujoco_ppo_hl_multi.py), examples that use low-level
+interfaces will follow soon.
+
+#### Improvements in Batch
+
+Apart from that, several important
+extensions have been added to internal data structures, most notably to `Batch`.
+Batches now implement `__eq__` and can be meaningfully compared. Applying
+operations in a nested fashion has been significantly simplified, and checking
+for NaNs and dropping them is now possible.
+
+One more notable change is that torch `Distribution` objects are now sliced when
+slicing a batch. Previously, when a Batch with say 10 actions and a dist
+corresponding to them was sliced to `[:3]`, the `dist` in the result would still
+correspond to all 10 actions. Now, the dist is also "sliced" to be the
+distribution of the first 3 actions.
+
+A detailed list of changes can be found below.
 
 ### Changes/Improvements