diff --git a/.github/workflows/linux-tutorials-test.yml b/.github/workflows/linux-tutorials-test.yml index f74a9b3c5..d559302eb 100644 --- a/.github/workflows/linux-tutorials-test.yml +++ b/.github/workflows/linux-tutorials-test.yml @@ -15,9 +15,10 @@ jobs: runs-on: ubuntu-latest strategy: fail-fast: false + matrix: python-version: ['3.8', '3.9', '3.10', '3.11'] - tutorial: [Tianshou, CustomEnvironment, CleanRL, SB3/kaz, SB3/waterworld, SB3/connect_four, SB3/test] # TODO: fix tutorials and add back Ray + tutorial: [Tianshou, CustomEnvironment, CleanRL, SB3/kaz, SB3/waterworld, SB3/test] # TODO: fix tutorials and add back Ray, fix SB3/connect_four tutorial steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} diff --git a/docs/tutorials/sb3/connect_four.md b/docs/tutorials/sb3/connect_four.md index 8b85f8cca..eef34deac 100644 --- a/docs/tutorials/sb3/connect_four.md +++ b/docs/tutorials/sb3/connect_four.md @@ -4,6 +4,13 @@ title: "SB3: Action Masked PPO for Connect Four" # SB3: Action Masked PPO for Connect Four +```{eval-rst} +.. warning:: + + Currently, this tutorial doesn't work with versions of gymnasium>0.29.1. We are looking into fixing it but it might take some time. + +``` + This tutorial shows how to train a agents using Maskable [Proximal Policy Optimization](https://sb3-contrib.readthedocs.io/en/master/modules/ppo_mask.html) (PPO) on the [Connect Four](/environments/classic/chess/) environment ([AEC](/api/aec/)). It creates a custom Wrapper to convert to a [Gymnasium](https://gymnasium.farama.org/)-like environment which is compatible with [SB3 action masking](https://sb3-contrib.readthedocs.io/en/master/modules/ppo_mask.html). diff --git a/tutorials/SB3/connect_four/requirements.txt b/tutorials/SB3/connect_four/requirements.txt index bf7c59673..e8ed650ab 100644 --- a/tutorials/SB3/connect_four/requirements.txt +++ b/tutorials/SB3/connect_four/requirements.txt @@ -1,3 +1,4 @@ pettingzoo[classic]>=1.24.0 stable-baselines3>=2.0.0 sb3-contrib>=2.0.0 +gymnasium<=0.29.1 diff --git a/tutorials/SB3/connect_four/sb3_connect_four_action_mask.py b/tutorials/SB3/connect_four/sb3_connect_four_action_mask.py index 29d623251..e3dc63d34 100644 --- a/tutorials/SB3/connect_four/sb3_connect_four_action_mask.py +++ b/tutorials/SB3/connect_four/sb3_connect_four_action_mask.py @@ -9,6 +9,7 @@ import os import time +import gymnasium as gym from sb3_contrib import MaskablePPO from sb3_contrib.common.maskable.policies import MaskableActorCriticPolicy from sb3_contrib.common.wrappers import ActionMasker @@ -174,6 +175,11 @@ def eval_action_mask(env_fn, num_games=100, render_mode=None, **env_kwargs): if __name__ == "__main__": + if gym.__version__ > "0.29.1": + raise ImportError( + f"This script requires gymnasium version 0.29.1 or lower, but you have version {gym.__version__}." + ) + env_fn = connect_four_v3 env_kwargs = {}