From 7b700cfc4d1f3ceaf09cccd95981e06385f7cf08 Mon Sep 17 00:00:00 2001 From: Michael Panchenko Date: Sat, 10 Aug 2024 16:56:39 +0200 Subject: [PATCH 1/6] Batch, breaking: turn to_torch and to_numpy into instance methods --- tianshou/data/batch.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/tianshou/data/batch.py b/tianshou/data/batch.py index 4ea99dbcb..60738b255 100644 --- a/tianshou/data/batch.py +++ b/tianshou/data/batch.py @@ -720,8 +720,8 @@ def __eq__(self, other: Any) -> bool: if not isinstance(other, self.__class__): return False - this_batch_no_torch_tensor: Batch = Batch.to_numpy(self) - other_batch_no_torch_tensor: Batch = Batch.to_numpy(other) + this_batch_no_torch_tensor = self.to_numpy() + other_batch_no_torch_tensor = other.to_numpy() # DeepDiff 7.0.1 cannot compare 0-dimensional arrays # so, we ensure with this transform that all array values have at least 1 dim this_batch_no_torch_tensor.apply_values_transform( @@ -836,9 +836,8 @@ def __repr__(self) -> str: self_str = self.__class__.__name__ + "()" return self_str - @staticmethod - def to_numpy(batch: TBatch) -> TBatch: - result = deepcopy(batch) + def to_numpy(self) -> Self: + result = deepcopy(self) result.to_numpy_() return result @@ -850,13 +849,12 @@ def arr_to_numpy(arr: TArr) -> TArr: self.apply_values_transform(arr_to_numpy, inplace=True) - @staticmethod def to_torch( - batch: TBatch, + self, dtype: torch.dtype | None = None, device: str | int | torch.device = "cpu", - ) -> TBatch: - result = deepcopy(batch) + ) -> Self: + result = deepcopy(self) result.to_torch_(dtype=dtype, device=device) return result From 4680e9a1d2f4f9fc1fe3048fe28ed274cbc54493 Mon Sep 17 00:00:00 2001 From: Michael Panchenko Date: Sat, 10 Aug 2024 17:12:24 +0200 Subject: [PATCH 2/6] EnvFactoryRegistered: restored backwards compat --- test/base/test_batch.py | 4 ++-- tianshou/highlevel/env.py | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/test/base/test_batch.py b/test/base/test_batch.py index 8839fe482..813e5131e 100644 --- a/test/base/test_batch.py +++ b/test/base/test_batch.py @@ -669,7 +669,7 @@ class TestBatchConversions: @staticmethod def test_to_numpy() -> None: batch = Batch(a=1, b=torch.arange(5), c={"d": torch.tensor([1, 2, 3])}) - new_batch: Batch = Batch.to_numpy(batch) + new_batch = batch.to_numpy() assert id(batch) != id(new_batch) assert isinstance(batch.b, torch.Tensor) assert isinstance(batch.c.d, torch.Tensor) @@ -689,7 +689,7 @@ def test_to_numpy_() -> None: @staticmethod def test_to_torch() -> None: batch = Batch(a=1, b=np.arange(5), c={"d": np.array([1, 2, 3])}) - new_batch: Batch = Batch.to_torch(batch) + new_batch = batch.to_torch(batch) assert id(batch) != id(new_batch) assert isinstance(batch.b, np.ndarray) assert isinstance(batch.c.d, np.ndarray) diff --git a/tianshou/highlevel/env.py b/tianshou/highlevel/env.py index b69e9c0b8..e61e0ed36 100644 --- a/tianshou/highlevel/env.py +++ b/tianshou/highlevel/env.py @@ -8,6 +8,7 @@ import gymnasium as gym import gymnasium.spaces from gymnasium import Env +from sensai.util.pickle import setstate from sensai.util.string import ToStringMixin from tianshou.env import ( @@ -452,6 +453,19 @@ def __init__( } self.make_kwargs = make_kwargs + def __setstate__(self, state: dict) -> None: + if "seed" in state: + if "test_seed" in state or "train_seed" in state: + raise RuntimeError( + f"Cannot have both 'seed' and 'test_seed'/'train_seed' in state. " + f"Something went wrong during serialization/deserialization: " + f"{state=}", + ) + state["test_seed"] = state["seed"] + state["train_seed"] = state["seed"] + del state["seed"] + setstate(EnvFactoryRegistered, self, state) + def _create_kwargs(self, mode: EnvMode) -> dict: """Adapts the keyword arguments for the given mode. From 3d0cd3b5fc74e59d9ac6bb86dad102ea33e11705 Mon Sep 17 00:00:00 2001 From: Michael Panchenko Date: Sat, 10 Aug 2024 17:12:32 +0200 Subject: [PATCH 3/6] Changelog --- CHANGELOG.md | 300 ++++++++++++++++++++++++++++++++++----------------- 1 file changed, 203 insertions(+), 97 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e12b0991..0af64a53f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,133 +2,239 @@ ## Release 1.1.0 +### Highlights + +This release introduces a new package `evaluation` that integrates best +practices for running experiments (seeding test and train environmets) and for +evaluating them using the [rliable](https://github.com/google-research/rliable) library. This should be especially useful +for algorithm developers for +comparing performances and creating meaningful visualizations. This functionality is currently in `alpha` state and will be further improved in the next releases. +You will need to install tianshou with the extra `eval` to use it. + +The creation of multiple experiments with varying random seeds has been greatly facilitated. Moreover, +the `ExpLauncher` interface has been introduced and implemented with several backeds to +support the execution of multiple experiments in parallel. + +An example for this using the high-level interfaces can be +found [here](examples/mujoco/mujoco_ppo_hl_multi.py), examples that use +low-level interfaces +will follow soon. +this feature +Apart from that, several important extensions have been added to internal data structures, +most notably to `Batch`. Batches now implement `__eq__` and can be meaningfully compared. +Applying operations in a nested fashion has been significantly simplified, and +checking for NaNs and dropping them is now possible. + +One more notable change is that torch `Distribution` objects are now sliced +when slicing a batch. Previously a Batch with say 10 actions and a dist corresponding to them was sliced to `[:3]`, the `dist` would still correspond to all 10 actions. Now, the dist is also "sliced" to the first 3 actions. + ### Changes/Improvements -- `evaluation`: New package for repeating the same experiment with multiple seeds and aggregating the results. #1074 #1141 #1183 + +- `evaluation`: New package for repeating the same experiment with multiple + seeds and aggregating the results. #1074 #1141 #1183 - `data`: - - `Batch`: - - Add methods `to_dict` and `to_list_of_dicts`. #1063 #1098 - - Add methods `to_numpy_` and `to_torch_`. #1098, #1117 - - Add `__eq__` (semantic equality check). #1098 - - `keys()` deprecated in favor of `get_keys()` (needed to make iteration consistent with naming) #1105. - - Major: new methods for applying functions to values, to check for NaNs and drop them, and to set values. #1181 - - Slicing a batch with a torch distribution now also slices the distribution. #1181 - - `data.collector`: - - `Collector`: - - Introduced `BaseCollector` as a base class for all collectors. #1123 - - Add method `close` #1063 - - Method `reset` is now more granular (new flags controlling behavior). #1063 - - `CollectStats`: Add convenience constructor `with_autogenerated_stats`. #1063 + - `Batch`: + - Add methods `to_dict` and `to_list_of_dicts`. #1063 #1098 + - Add methods `to_numpy_` and `to_torch_`. #1098, #1117 + - Add `__eq__` (semantic equality check). #1098 + - `keys()` deprecated in favor of `get_keys()` (needed to make iteration + consistent with naming) #1105. + - Major: new methods for applying functions to values, to check for NaNs + and drop them, and to set values. #1181 + - Slicing a batch with a torch distribution now also slices the + distribution. #1181 + - `data.collector`: + - `Collector`: + - Introduced `BaseCollector` as a base class for all collectors. + #1123 + - Add method `close` #1063 + - Method `reset` is now more granular (new flags controlling + behavior). #1063 + - `CollectStats`: Add convenience + constructor `with_autogenerated_stats`. #1063 - `trainer`: - - Trainers can now control whether collectors should be reset prior to training. #1063 -- policy: - - introduced attribute `in_training_step` that is controlled by the trainer. #1123 - - policy automatically set to `eval` mode when collecting and to `train` mode when updating. #1123 - - Extended interface of `compute_action` to also support array-like inputs #1169 + - Trainers can now control whether collectors should be reset prior to + training. #1063 +- `policy`: + - introduced attribute `in_training_step` that is controlled by the trainer. + #1123 + - policy automatically set to `eval` mode when collecting and to `train` + mode when updating. #1123 + - Extended interface of `compute_action` to also support array-like inputs + #1169 - `highlevel`: - - `SamplingConfig`: - - Add support for `batch_size=None`. #1077 - - Add `training_seed` for explicit seeding of training and test environments, the `test_seed` is inferred from `training_seed`. #1074 - - `experiment`: - - `Experiment` now has a `name` attribute, which can be set using `ExperimentBuilder.with_name` and - which determines the default run name and therefore the persistence subdirectory. - It can still be overridden in `Experiment.run()`, the new parameter name being `run_name` rather than - `experiment_name` (although the latter will still be interpreted correctly). #1074 #1131 - - Add class `ExperimentCollection` for the convenient execution of multiple experiment runs #1131 - - `ExperimentBuilder`: - - Add method `build_seeded_collection` for the sound creation of multiple - experiments with varying random seeds #1131 - - Add method `copy` to facilitate the creation of multiple experiments from a single builder #1131 - - `env`: - - Added new `VectorEnvType` called `SUBPROC_SHARED_MEM_AUTO` and used in for Atari and Mujoco venv creation. #1141 -- Loggers can now restore the logged data into python by using the new `restore_logged_data` method. #1074 -- Wandb logger extended #1183 -- `utils`: - - `net.continuous.Critic`: - - Add flag `apply_preprocess_net_to_obs_only` to allow the - preprocessing network to be applied to the observations only (without - the actions concatenated), which is essential for the case where we want - to reuse the actor's preprocessing network #1128 - - `torch_utils` (new module) - - Added context managers `torch_train_mode` and `policy_within_training_step` #1123 - - `print` - - `DataclassPPrintMixin` now supports outputting a string, not just printing the pretty repr. #1141 + - `SamplingConfig`: + - Add support for `batch_size=None`. #1077 + - Add `training_seed` for explicit seeding of training and test + environments, the `test_seed` is inferred from `training_seed`. #1074 + - `experiment`: + - `Experiment` now has a `name` attribute, which can be set + using `ExperimentBuilder.with_name` and + which determines the default run name and therefore the persistence + subdirectory. + It can still be overridden in `Experiment.run()`, the new parameter + name being `run_name` rather than + `experiment_name` (although the latter will still be interpreted + correctly). #1074 #1131 + - Add class `ExperimentCollection` for the convenient execution of + multiple experiment runs #1131 + - `ExperimentBuilder`: + - Add method `build_seeded_collection` for the sound creation of + multiple + experiments with varying random seeds #1131 + - Add method `copy` to facilitate the creation of multiple + experiments from a single builder #1131 + - `env`: + - Added new `VectorEnvType` called `SUBPROC_SHARED_MEM_AUTO` and used in + for Atari and Mujoco venv creation. #1141 +- `utils`: + - `logger`: + - Loggers can now restore the logged data into python by using the + new `restore_logged_data` method. #1074 + - Wandb logger extended #1183 + - `net.continuous.Critic`: + - Add flag `apply_preprocess_net_to_obs_only` to allow the + preprocessing network to be applied to the observations only (without + the actions concatenated), which is essential for the case where we + want + to reuse the actor's preprocessing network #1128 + - `torch_utils` (new module) + - Added context managers `torch_train_mode` + and `policy_within_training_step` #1123 + - `print` + - `DataclassPPrintMixin` now supports outputting a string, not just + printing the pretty repr. #1141 ### Fixes + - `highlevel`: - - `CriticFactoryReuseActor`: Enable the Critic flag `apply_preprocess_net_to_obs_only` for continuous critics, - fixing the case where we want to reuse an actor's preprocessing network for the critic (affects usages - of the experiment builder method `with_critic_factory_use_actor` with continuous environments) #1128 - - Policy parameter `action_scaling` value `"default"` was not correctly transformed to a Boolean value for - algorithms SAC, DDPG, TD3 and REDQ. The value `"default"` being truthy caused action scaling to be enabled - even for discrete action spaces. #1191 + - `CriticFactoryReuseActor`: Enable the Critic + flag `apply_preprocess_net_to_obs_only` for continuous critics, + fixing the case where we want to reuse an actor's preprocessing network + for the critic (affects usages + of the experiment builder method `with_critic_factory_use_actor` with + continuous environments) #1128 + - Policy parameter `action_scaling` value `"default"` was not correctly + transformed to a Boolean value for + algorithms SAC, DDPG, TD3 and REDQ. The value `"default"` being truthy + caused action scaling to be enabled + even for discrete action spaces. #1191 - `atari_network.DQN`: - - Fix constructor input validation #1128 - - Fix `output_dim` not being set if `features_only`=True and `output_dim_added_layer` is not None #1128 + - Fix constructor input validation #1128 + - Fix `output_dim` not being set if `features_only`=True + and `output_dim_added_layer` is not None #1128 - `PPOPolicy`: - - Fix `max_batchsize` not being used in `logp_old` computation inside `process_fn` #1168 + - Fix `max_batchsize` not being used in `logp_old` computation + inside `process_fn` #1168 - Fix `Batch.__eq__` to allow comparing Batches with scalar array values #1185 ### Internal Improvements -- `Collector`s rely less on state, the few stateful things are stored explicitly instead of through a `.data` attribute. #1063 -- Introduced a first iteration of a naming convention for vars in `Collector`s. #1063 -- Generally improved readability of Collector code and associated tests (still quite some way to go). #1063 + +- `Collector`s rely less on state, the few stateful things are stored explicitly + instead of through a `.data` attribute. #1063 +- Introduced a first iteration of a naming convention for vars in `Collector`s. + #1063 +- Generally improved readability of Collector code and associated tests (still + quite some way to go). #1063 - Improved typing for `exploration_noise` and within Collector. #1063 -- Better variable names related to model outputs (logits, dist input etc.). #1032 -- Improved typing for actors and critics, using Tianshou classes like `Actor`, `ActorProb`, etc., -instead of just `nn.Module`. #1032 -- Added interfaces for most `Actor` and `Critic` classes to enforce the presence of `forward` methods. #1032 -- Simplified `PGPolicy` forward by unifying the `dist_fn` interface (see associated breaking change). #1032 -- Use `.mode` of distribution instead of relying on knowledge of the distribution type. #1032 +- Better variable names related to model outputs (logits, dist input etc.). + #1032 +- Improved typing for actors and critics, using Tianshou classes + like `Actor`, `ActorProb`, etc., + instead of just `nn.Module`. #1032 +- Added interfaces for most `Actor` and `Critic` classes to enforce the presence + of `forward` methods. #1032 +- Simplified `PGPolicy` forward by unifying the `dist_fn` interface (see + associated breaking change). #1032 +- Use `.mode` of distribution instead of relying on knowledge of the + distribution type. #1032 - Exception no longer raised on `len` of empty `Batch`. #1084 - tests and examples are covered by `mypy`. #1077 - `NetBase` is more used, stricter typing by making it generic. #1077 -- Use explicit multiprocessing context for creating `Pipe` in `subproc.py`. #1102 +- Use explicit multiprocessing context for creating `Pipe` in `subproc.py`. + #1102 ### Breaking Changes + - `data`: - - `Collector`: - - Removed `.data` attribute. #1063 - - Collectors no longer reset the environment on initialization. - Instead, the user might have to call `reset` expicitly or pass `reset_before_collect=True` . #1063 - - Removed `no_grad` argument from `collect` method (was unused in tianshou). #1123 - - `Batch`: - - Fixed `iter(Batch(...)` which now behaves the same way as `Batch(...).__iter__()`. - Can be considered a bugfix. #1063 - - The methods `to_numpy` and `to_torch` in are not in-place anymore - (use `to_numpy_` or `to_torch_` instead). #1098, #1117 - - The method `Batch.is_empty` has been removed. Instead, the user can simply check for emptiness of Batch by using `len` on dicts. #1144 - - Stricter `cat_`, only concatenation of batches with the same structure is allowed. #1181 -- Logging: - - `BaseLogger.prepare_dict_for_logging` is now abstract. #1074 - - Removed deprecated and unused `BasicLogger` (only affects users who subclassed it). #1074 -- VectorEnvs now return an array of info-dicts on reset instead of a list. #1063 -- Changed interface of `dist_fn` in `PGPolicy` and all subclasses to take a single argument in both -continuous and discrete cases. #1032 + - `Collector`: + - Removed `.data` attribute. #1063 + - Collectors no longer reset the environment on initialization. + Instead, the user might have to call `reset` expicitly or + pass `reset_before_collect=True` . #1063 + - Removed `no_grad` argument from `collect` method (was unused in + tianshou). #1123 + - `Batch`: + - Fixed `iter(Batch(...)` which now behaves the same way + as `Batch(...).__iter__()`. + Can be considered a bugfix. #1063 + - The methods `to_numpy` and `to_torch` in are not in-place anymore + (use `to_numpy_` or `to_torch_` instead). #1098, #1117 + - The method `Batch.is_empty` has been removed. Instead, the user can + simply check for emptiness of Batch by using `len` on dicts. #1144 + - Stricter `cat_`, only concatenation of batches with the same structure + is allowed. #1181 + - `to_torch` and `to_numpy` are no longer static methods. + So `Batch.to_numpy(batch)` should be replaced by `batch.to_numpy()`. + #1200 - `utils`: - - Modules with code that was copied from sensAI have been replaced by imports from new dependency sensAI-utils: - - `tianshou.utils.logging` is replaced with `sensai.util.logging` - - `tianshou.utils.string` is replaced with `sensai.util.string` - - `tianshou.utils.pickle` is replaced with `sensai.util.pickle` - - `utils.net.common.Recurrent` now receives and returns a `RecurrentStateBatch` instead of a dict. #1077 -- `AtariEnvFactory` constructor (in examples, so not really breaking) now requires explicit train and test seeds. #1074 -- `EnvFactoryRegistered` now requires an explicit `test_seed` in the constructor. #1074 + - `logger`: + - `BaseLogger.prepare_dict_for_logging` is now abstract. #1074 + - Removed deprecated and unused `BasicLogger` (only affects users who + subclassed it). #1074 + - `utils.net`: + - `Recurrent` now receives and returns + a `RecurrentStateBatch` instead of a dict. #1077 + - Modules with code that was copied from sensAI have been replaced by + imports from new dependency sensAI-utils: + - `tianshou.utils.logging` is replaced with `sensai.util.logging` + - `tianshou.utils.string` is replaced with `sensai.util.string` + - `tianshou.utils.pickle` is replaced with `sensai.util.pickle` +- `env`: + - All VectorEnvs now return a numpy array of info-dicts on reset instead of + a list. #1063 +- `policy`: + - Changed interface of `dist_fn` in `PGPolicy` and all subclasses to take a + single argument in both + continuous and discrete cases. #1032 +- `AtariEnvFactory` constructor (in examples, so not really breaking) now + requires explicit train and test seeds. #1074 +- `EnvFactoryRegistered` now requires an explicit `test_seed` in the + constructor. #1074 - `highlevel`: - - The parameter `dist_fn` has been removed from the parameter objects (`PGParams`, `A2CParams`, `PPOParams`, `NPGParams`, `TRPOParams`). - The correct distribution is now determined automatically based on the actor factory being used, avoiding the possibility of - misspecification. Persisted configurations/policies continue to work as expected, but code must not specify the `dist_fn` parameter. - #1194 #1195 - + - `params`: The parameter `dist_fn` has been removed from the parameter + objects (`PGParams`, `A2CParams`, `PPOParams`, `NPGParams`, `TRPOParams`). + The correct distribution is now determined automatically based on the + actor factory being used, avoiding the possibility of + misspecification. Persisted configurations/policies continue to work as + expected, but code must not specify the `dist_fn` parameter. + #1194 #1195 + - `env`: + - `EnvFactoryRegistered`: parameter `seed` has been replaced by the pair + of parameters `train_seed` and `test_seed` + Persisted instances will continue to work correctly. + Subclasses such as `AtariEnvFactory` are also affected requires + explicit train and test seeds. #1074 + - `VectorEnvType`: `SUBPROC_SHARED_MEM` has been replaced + by `SUBPROC_SHARED_MEM_DEFAULT`. It is recommended to + use `SUBPROC_SHARED_MEM_AUTO` instead. However, persisted configs will + continue working. #1141 ### Tests -- Fixed env seeding it `test_sac_with_il.py` so that the test doesn't fail randomly. #1081 + +- Fixed env seeding it `test_sac_with_il.py` so that the test doesn't fail + randomly. #1081 - Improved CI triggers and added telemetry (if requested by user) #1177 - Improved environment used in tests. - Improved tests bach equality to check with scalar values #1185 ### Dependencies -- [DeepDiff](https://github.com/seperman/deepdiff) added to help with diffs of batches in tests. #1098 + +- [DeepDiff](https://github.com/seperman/deepdiff) added to help with diffs of + batches in tests. #1098 - Bumped black, idna, pillow - New extra "eval" - Bumped numba to >=60.0.0, permitting installation on python 3.12 # 1177 +- New dependency sensai-utils Started after v1.0.0 From 56f0c9ee27af3a64cc0ae3f6716919a7785d36f1 Mon Sep 17 00:00:00 2001 From: Michael Panchenko Date: Sat, 10 Aug 2024 17:16:58 +0200 Subject: [PATCH 4/6] Typing, typo in test --- test/base/test_batch.py | 2 +- tianshou/data/batch.py | 12 +++++------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/test/base/test_batch.py b/test/base/test_batch.py index 813e5131e..9fd12b8ff 100644 --- a/test/base/test_batch.py +++ b/test/base/test_batch.py @@ -689,7 +689,7 @@ def test_to_numpy_() -> None: @staticmethod def test_to_torch() -> None: batch = Batch(a=1, b=np.arange(5), c={"d": np.array([1, 2, 3])}) - new_batch = batch.to_torch(batch) + new_batch = batch.to_torch() assert id(batch) != id(new_batch) assert isinstance(batch.b, np.ndarray) assert isinstance(batch.c.d, np.ndarray) diff --git a/tianshou/data/batch.py b/tianshou/data/batch.py index 577dea7b5..6e7c9cf12 100644 --- a/tianshou/data/batch.py +++ b/tianshou/data/batch.py @@ -357,8 +357,7 @@ def __repr__(self) -> str: def __eq__(self, other: Any) -> bool: raise ProtocolCalledException - @staticmethod - def to_numpy(batch: TBatch) -> TBatch: + def to_numpy(self: Self) -> Self: """Change all torch.Tensor to numpy.ndarray and return a new Batch.""" raise ProtocolCalledException @@ -366,12 +365,11 @@ def to_numpy_(self) -> None: """Change all torch.Tensor to numpy.ndarray in-place.""" raise ProtocolCalledException - @staticmethod def to_torch( - batch: TBatch, + self: Self, dtype: torch.dtype | None = None, device: str | int | torch.device = "cpu", - ) -> TBatch: + ) -> Self: """Change all numpy.ndarray to torch.Tensor and return a new Batch.""" raise ProtocolCalledException @@ -835,7 +833,7 @@ def __repr__(self) -> str: self_str = self.__class__.__name__ + "()" return self_str - def to_numpy(self) -> Self: + def to_numpy(self: Self) -> Self: result = deepcopy(self) result.to_numpy_() return result @@ -849,7 +847,7 @@ def arr_to_numpy(arr: TArr) -> TArr: self.apply_values_transform(arr_to_numpy, inplace=True) def to_torch( - self, + self: Self, dtype: torch.dtype | None = None, device: str | int | torch.device = "cpu", ) -> Self: From 7f8300836fa38d7066f445d2b028d614007d0a27 Mon Sep 17 00:00:00 2001 From: Michael Panchenko Date: Sat, 10 Aug 2024 17:28:09 +0200 Subject: [PATCH 5/6] Dependencies: use the official rliable repo instead of aAI institute fork The necessary changes were merged there meanwhile --- poetry.lock | 12 ++++++------ pyproject.toml | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/poetry.lock b/poetry.lock index cdae2222c..907b74db0 100644 --- a/poetry.lock +++ b/poetry.lock @@ -5000,7 +5000,7 @@ files = [ [[package]] name = "rliable" -version = "1.0.8" +version = "1.1.0" description = "rliable: Reliable evaluation on reinforcement learning and machine learning benchmarks." optional = true python-versions = "*" @@ -5009,16 +5009,16 @@ develop = false [package.dependencies] absl-py = ">=0.9.0" -arch = "5.3.1" +arch = ">=5.3.1,<8.0" numpy = ">=1.16.4" scipy = ">=1.7.0" seaborn = ">=0.11.2" [package.source] type = "git" -url = "https://github.com/aai-institute/rliable.git" -reference = "HEAD" -resolved_reference = "c756ac408d15507481166edb252f5b61cf5628ff" +url = "https://github.com/google-research/rliable.git" +reference = "1171833f6706b6c25bbf042e2cb185a96fcf2ce6" +resolved_reference = "1171833f6706b6c25bbf042e2cb185a96fcf2ce6" [[package]] name = "rpds-py" @@ -6840,4 +6840,4 @@ vizdoom = ["vizdoom"] [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "200077246f10046fe1d0494977e5565420e0c166ef905a1d22608e84fcfb3459" +content-hash = "eb478c2c355fda1ed70c5bf12ae9aea6a6051ec5ae3c614cdf353f4bf260fbf4" diff --git a/pyproject.toml b/pyproject.toml index 66c0740ab..2b2180e10 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -68,7 +68,7 @@ mujoco-py = { version = ">=2.1,<2.2", optional = true } opencv_python = { version = "*", optional = true } pybullet = { version = "*", optional = true } pygame = { version = ">=2.1.3", optional = true } -rliable = {optional = true, git = "https://github.com/aai-institute/rliable.git"} +rliable = {optional = true, git = "https://github.com/google-research/rliable.git", rev = "1171833f6706b6c25bbf042e2cb185a96fcf2ce6"} scipy = { version = "*", optional = true } shimmy = { version = ">=0.1.0,<1.0", optional = true } swig = { version = "4.*", optional = true } From 007c3ca63caa6abfb4bc659c548aecf3e082e2da Mon Sep 17 00:00:00 2001 From: Michael Panchenko Date: Sat, 10 Aug 2024 17:36:35 +0200 Subject: [PATCH 6/6] Changelog [ci skip] --- CHANGELOG.md | 49 ++++++++++++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0af64a53f..265f85733 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,29 +4,40 @@ ### Highlights +#### Evaluation Package + This release introduces a new package `evaluation` that integrates best practices for running experiments (seeding test and train environmets) and for -evaluating them using the [rliable](https://github.com/google-research/rliable) library. This should be especially useful -for algorithm developers for -comparing performances and creating meaningful visualizations. This functionality is currently in `alpha` state and will be further improved in the next releases. +evaluating them using the [rliable](https://github.com/google-research/rliable) +library. This should be especially useful for algorithm developers for comparing +performances and creating meaningful visualizations. **This functionality is +currently in alpha state** and will be further improved in the next releases. You will need to install tianshou with the extra `eval` to use it. -The creation of multiple experiments with varying random seeds has been greatly facilitated. Moreover, -the `ExpLauncher` interface has been introduced and implemented with several backeds to -support the execution of multiple experiments in parallel. - -An example for this using the high-level interfaces can be -found [here](examples/mujoco/mujoco_ppo_hl_multi.py), examples that use -low-level interfaces -will follow soon. -this feature -Apart from that, several important extensions have been added to internal data structures, -most notably to `Batch`. Batches now implement `__eq__` and can be meaningfully compared. -Applying operations in a nested fashion has been significantly simplified, and -checking for NaNs and dropping them is now possible. - -One more notable change is that torch `Distribution` objects are now sliced -when slicing a batch. Previously a Batch with say 10 actions and a dist corresponding to them was sliced to `[:3]`, the `dist` would still correspond to all 10 actions. Now, the dist is also "sliced" to the first 3 actions. +The creation of multiple experiments with varying random seeds has been greatly +facilitated. Moreover, the `ExpLauncher` interface has been introduced and +implemented with several backends to support the execution of multiple +experiments in parallel. + +An example for this using the high-level interfaces can be found +[here](examples/mujoco/mujoco_ppo_hl_multi.py), examples that use low-level +interfaces will follow soon. + +#### Improvements in Batch + +Apart from that, several important +extensions have been added to internal data structures, most notably to `Batch`. +Batches now implement `__eq__` and can be meaningfully compared. Applying +operations in a nested fashion has been significantly simplified, and checking +for NaNs and dropping them is now possible. + +One more notable change is that torch `Distribution` objects are now sliced when +slicing a batch. Previously, when a Batch with say 10 actions and a dist +corresponding to them was sliced to `[:3]`, the `dist` in the result would still +correspond to all 10 actions. Now, the dist is also "sliced" to be the +distribution of the first 3 actions. + +A detailed list of changes can be found below. ### Changes/Improvements