diff --git a/pettingzoo/classic/rlcard_envs/texas_holdem.py b/pettingzoo/classic/rlcard_envs/texas_holdem.py
index 0e5699b11..c4fbeb1bd 100644
--- a/pettingzoo/classic/rlcard_envs/texas_holdem.py
+++ b/pettingzoo/classic/rlcard_envs/texas_holdem.py
@@ -9,7 +9,7 @@
This environment is part of the classic environments. Please read that page first for general information.
-| Import | `from pettingzoo.classic import texas_holdem_v4` |
+| Import | `from pettingzoo.classic import texas_holdem_v5` |
|--------------------|--------------------------------------------------|
| Actions | Discrete |
| Parallel API | Yes |
@@ -25,10 +25,13 @@
## Arguments
``` python
-texas_holdem_v4.env(num_players=2)
+texas_holdem_v5.env(num_players=2)
```
`num_players`: Sets the number of players in the game. Minimum is 2.
+`num_rounds`: Sets the environment to run for a number of rounds before terminating. If this is set, `num_chips` must also be set.
+`num_chips`: Defines the amount of starting chips that each agent in the game has, this must be used in conjunction with num_rounds.
+
### Observation Space
@@ -69,6 +72,7 @@
### Version History
+* v5: Added multiround option (1.24.2)
* v4: Upgrade to RLCard 1.0.3 (1.11.0)
* v3: Fixed bug in arbitrary calls to observe() (1.8.0)
* v2: Bumped RLCard version, bug fixes, legal action mask in observation replaced illegal move list in infos (1.5.0)
@@ -108,17 +112,30 @@ def get_font(path, size):
def env(**kwargs):
+ # optionally chained games with number of chips
+ num_rounds = kwargs.pop("num_rounds", False)
+ num_chips = kwargs.pop("num_chips", False)
+ assert bool(num_rounds) == bool(
+ num_chips
+ ), f"If `num_rounds` is used, `num_chips` must also be declared, got {num_rounds=}, {num_chips=}"
+
env = raw_env(**kwargs)
env = wrappers.TerminateIllegalWrapper(env, illegal_reward=-1)
env = wrappers.AssertOutOfBoundsWrapper(env)
env = wrappers.OrderEnforcingWrapper(env)
+
+ if num_rounds:
+ env = wrappers.MultiEpisodeEnv(
+ env, num_episodes=num_rounds, starting_utility=num_chips
+ )
+
return env
class raw_env(RLCardBase, EzPickle):
metadata = {
"render_modes": ["human", "rgb_array"],
- "name": "texas_holdem_v4",
+ "name": "texas_holdem_v5",
"is_parallelizable": False,
"render_fps": 1,
}
diff --git a/pettingzoo/classic/rlcard_envs/texas_holdem_no_limit.py b/pettingzoo/classic/rlcard_envs/texas_holdem_no_limit.py
index 59101de69..ba17dd640 100644
--- a/pettingzoo/classic/rlcard_envs/texas_holdem_no_limit.py
+++ b/pettingzoo/classic/rlcard_envs/texas_holdem_no_limit.py
@@ -9,7 +9,7 @@
This environment is part of the classic environments. Please read that page first for general information.
-| Import | `from pettingzoo.classic import texas_holdem_no_limit_v6` |
+| Import | `from pettingzoo.classic import texas_holdem_no_limit_v7` |
|--------------------|-----------------------------------------------------------|
| Actions | Discrete |
| Parallel API | Yes |
@@ -29,7 +29,7 @@
### Arguments
``` python
-texas_holdem_no_limit_v6.env(num_players=2)
+texas_holdem_no_limit_v7.env(num_players=2)
```
`num_players`: Sets the number of players in the game. Minimum is 2.
@@ -79,6 +79,7 @@
### Version History
+* v7: Added multiround option (1.24.2)
* v6: Upgrade to RLCard 1.0.5, fixes to the action space as ACPC (1.12.0)
* v5: Upgrade to RLCard 1.0.4, fixes to rewards with greater than 2 players (1.11.1)
* v4: Upgrade to RLCard 1.0.3 (1.11.0)
@@ -121,17 +122,30 @@ def get_font(path, size):
def env(**kwargs):
+ # optionally chained games with number of chips
+ num_rounds = kwargs.pop("num_rounds", False)
+ num_chips = kwargs.pop("num_chips", False)
+ assert bool(num_rounds) == bool(
+ num_chips
+ ), f"If `num_rounds` is used, `num_chips` must also be declared, got {num_rounds=}, {num_chips=}"
+
env = raw_env(**kwargs)
env = wrappers.TerminateIllegalWrapper(env, illegal_reward=-1)
env = wrappers.AssertOutOfBoundsWrapper(env)
env = wrappers.OrderEnforcingWrapper(env)
+
+ if num_rounds:
+ env = wrappers.MultiEpisodeEnv(
+ env, num_episodes=num_rounds, starting_utility=num_chips
+ )
+
return env
class raw_env(RLCardBase, EzPickle):
metadata = {
"render_modes": ["human", "rgb_array"],
- "name": "texas_holdem_no_limit_v6",
+ "name": "texas_holdem_no_limit_v7",
"is_parallelizable": False,
"render_fps": 1,
}
diff --git a/pettingzoo/classic/texas_holdem_no_limit_v6.py b/pettingzoo/classic/texas_holdem_no_limit_v7.py
similarity index 100%
rename from pettingzoo/classic/texas_holdem_no_limit_v6.py
rename to pettingzoo/classic/texas_holdem_no_limit_v7.py
diff --git a/pettingzoo/classic/texas_holdem_v4.py b/pettingzoo/classic/texas_holdem_v5.py
similarity index 100%
rename from pettingzoo/classic/texas_holdem_v4.py
rename to pettingzoo/classic/texas_holdem_v5.py
diff --git a/pettingzoo/test/api_test.py b/pettingzoo/test/api_test.py
index f8718579c..f0ea19636 100644
--- a/pettingzoo/test/api_test.py
+++ b/pettingzoo/test/api_test.py
@@ -68,8 +68,8 @@ def action_mask():
list and observation_spaces, action_spaces dictionaries."""
env_obs_dicts = [
"leduc_holdem_v4",
- "texas_holdem_no_limit_v6",
- "texas_holdem_v4",
+ "texas_holdem_no_limit_v7",
+ "texas_holdem_v5",
"go_v5",
"chess_v6",
"connect_four_v3",
@@ -89,8 +89,8 @@ def action_mask():
env_all_zeros_obs = ["knights_archers_zombies_v10"]
env_obs_space = [
"leduc_holdem_v4",
- "texas_holdem_no_limit_v6",
- "texas_holdem_v4",
+ "texas_holdem_no_limit_v7",
+ "texas_holdem_v5",
"go_v5",
"hanabi_v5",
"knights_archers_zombies_v10",
diff --git a/pettingzoo/utils/all_modules.py b/pettingzoo/utils/all_modules.py
index e979ed4e6..530d8b2bf 100644
--- a/pettingzoo/utils/all_modules.py
+++ b/pettingzoo/utils/all_modules.py
@@ -37,8 +37,8 @@
hanabi_v5,
leduc_holdem_v4,
rps_v2,
- texas_holdem_no_limit_v6,
- texas_holdem_v4,
+ texas_holdem_no_limit_v7,
+ texas_holdem_v5,
tictactoe_v3,
)
from pettingzoo.mpe import (
@@ -93,8 +93,8 @@
"classic/connect_four_v3": connect_four_v3,
"classic/tictactoe_v3": tictactoe_v3,
"classic/leduc_holdem_v4": leduc_holdem_v4,
- "classic/texas_holdem_v4": texas_holdem_v4,
- "classic/texas_holdem_no_limit_v6": texas_holdem_no_limit_v6,
+ "classic/texas_holdem_v5": texas_holdem_v5,
+ "classic/texas_holdem_no_limit_v7": texas_holdem_no_limit_v7,
"classic/gin_rummy_v4": gin_rummy_v4,
"classic/go_v5": go_v5,
"classic/hanabi_v5": hanabi_v5,
diff --git a/pettingzoo/utils/wrappers/multi_episode_env.py b/pettingzoo/utils/wrappers/multi_episode_env.py
index d924a0c45..c7fcd4bbd 100644
--- a/pettingzoo/utils/wrappers/multi_episode_env.py
+++ b/pettingzoo/utils/wrappers/multi_episode_env.py
@@ -13,14 +13,22 @@ class MultiEpisodeEnv(BaseWrapper):
When there are no more valid agents in the underlying environment, the environment is automatically reset.
After `num_episodes` have been run internally, the environment terminates normally.
The result of this wrapper is that the environment is no longer Markovian around the environment reset.
+
+ When `starting_utility` is used, all agents start with a base amount of health points (think of this as poker chips).
+ Whenever the agent gets a negative reward, this value is subtracted from starting utility.
+ Whenever the agent gets a positive reward, it is added to the starting utility.
+ Agents which run out of starting utility are terminated.
"""
- def __init__(self, env: AECEnv, num_episodes: int):
+ def __init__(
+ self, env: AECEnv, num_episodes: int, starting_utility: float | None = None
+ ):
"""__init__.
Args:
env (AECEnv): env
num_episodes (int): num_episodes
+ starting_utility (Optional[float]): starting_utility
"""
assert isinstance(
env, AECEnv
@@ -28,6 +36,7 @@ def __init__(self, env: AECEnv, num_episodes: int):
super().__init__(env)
self._num_episodes = num_episodes
+ self._starting_utility = starting_utility
def reset(self, seed: int | None = None, options: dict | None = None) -> None:
"""reset.
@@ -39,10 +48,12 @@ def reset(self, seed: int | None = None, options: dict | None = None) -> None:
Returns:
None:
"""
+ super().reset(seed=seed, options=options)
self._episodes_elapsed = 1
self._seed = copy.deepcopy(seed)
self._options = copy.deepcopy(options)
- super().reset(seed=seed, options=options)
+ if self._starting_utility:
+ self._agent_utilities = {a: self._starting_utility for a in self.agents}
def step(self, action: ActionType) -> None:
"""Steps the underlying environment for `num_episodes`.
@@ -59,6 +70,18 @@ def step(self, action: ActionType) -> None:
None:
"""
super().step(action)
+
+ # adjust utilities if this param is enabled
+ if self._starting_utility:
+ for agent in self.agents:
+ self._agent_utilities[agent] = (
+ self._agent_utilities[agent] + self.rewards[agent]
+ )
+
+ if self._agent_utilities[agent] <= 0:
+ self.terminations[agent] = True
+
+ # if we still have agents, don't need to do anything
if self.agents:
return
diff --git a/pettingzoo/utils/wrappers/multi_episode_parallel_env.py b/pettingzoo/utils/wrappers/multi_episode_parallel_env.py
index bbc9c9b60..9a0c83888 100644
--- a/pettingzoo/utils/wrappers/multi_episode_parallel_env.py
+++ b/pettingzoo/utils/wrappers/multi_episode_parallel_env.py
@@ -13,14 +13,22 @@ class MultiEpisodeParallelEnv(BaseParallelWrapper):
When there are no more valid agents in the underlying environment, the environment is automatically reset.
When this happens, the `observation` and `info` returned by `step()` are replaced with that of the reset environment.
The result of this wrapper is that the environment is no longer Markovian around the environment reset.
+
+ When `starting_utility` is used, all agents start with a base amount of health points (think of this as poker chips).
+ Whenever the agent gets a negative reward, this value is subtracted from starting utility.
+ Whenever the agent gets a positive reward, it is added to the starting utility.
+ Agents which run out of starting utility are terminated.
"""
- def __init__(self, env: ParallelEnv, num_episodes: int):
+ def __init__(
+ self, env: ParallelEnv, num_episodes: int, starting_utility: float | None = None
+ ):
"""__init__.
Args:
env (AECEnv): the base environment
num_episodes (int): the number of episodes to run the underlying environment
+ starting_utility (Optional[float]): starting_utility
"""
super().__init__(env)
assert isinstance(
@@ -28,6 +36,7 @@ def __init__(self, env: ParallelEnv, num_episodes: int):
), "MultiEpisodeEnv is only compatible with ParallelEnv environments."
self._num_episodes = num_episodes
+ self._starting_utility = starting_utility
def reset(
self, seed: int | None = None, options: dict | None = None
@@ -46,6 +55,8 @@ def reset(
self._seed = copy.deepcopy(seed)
self._options = copy.deepcopy(options)
self._episodes_elapsed = 1
+ if self._starting_utility:
+ self._agent_utilities = {a: self._starting_utility for a in self.agents}
return obs, info
@@ -77,7 +88,17 @@ def step(
]:
"""
obs, rew, term, trunc, info = super().step(actions)
- term = {agent: False for agent in term}
+
+ # handle agent utilities if any
+ if self._starting_utility:
+ self._agent_utilities = {
+ u[a] + r[a] for u, r, a in zip(self._agent_utilities, rew, self.agents)
+ }
+ # termination only depends on available utility now
+ term = {agent: u <= 0 for agent, u in zip(term, self._agent_utilities)}
+ else:
+ term = {agent: False for agent in term}
+
trunc = {agent: False for agent in term}
if self.agents:
diff --git a/test/all_parameter_combs_test.py b/test/all_parameter_combs_test.py
index cff5a7cc3..63bfc6fe5 100644
--- a/test/all_parameter_combs_test.py
+++ b/test/all_parameter_combs_test.py
@@ -41,8 +41,8 @@
hanabi_v5,
leduc_holdem_v4,
rps_v2,
- texas_holdem_no_limit_v6,
- texas_holdem_v4,
+ texas_holdem_no_limit_v7,
+ texas_holdem_v5,
tictactoe_v3,
)
from pettingzoo.mpe import (
@@ -156,11 +156,21 @@
),
],
["classic/leduc_holdem_v4", leduc_holdem_v4, dict()],
- ["classic/texas_holdem_v4", texas_holdem_v4, dict(num_players=3)],
- ["classic/texas_holdem_v4", texas_holdem_v4, dict(num_players=4)],
- ["classic/texas_holdem_no_limit_v6", texas_holdem_no_limit_v6, dict()],
- ["classic/texas_holdem_no_limit_v6", texas_holdem_no_limit_v6, dict(num_players=3)],
- ["classic/texas_holdem_no_limit_v6", texas_holdem_no_limit_v6, dict(num_players=4)],
+ ["classic/texas_holdem_v5", texas_holdem_v5, dict(num_players=3)],
+ ["classic/texas_holdem_v5", texas_holdem_v5, dict(num_players=4)],
+ [
+ "classic/texas_holdem_v5",
+ texas_holdem_v5,
+ dict(num_players=4, num_rounds=3, num_chips=5),
+ ],
+ ["classic/texas_holdem_no_limit_v7", texas_holdem_no_limit_v7, dict()],
+ ["classic/texas_holdem_no_limit_v7", texas_holdem_no_limit_v7, dict(num_players=3)],
+ ["classic/texas_holdem_no_limit_v7", texas_holdem_no_limit_v7, dict(num_players=4)],
+ [
+ "classic/texas_holdem_no_limit_v7",
+ texas_holdem_no_limit_v7,
+ dict(num_players=4, num_rounds=3, num_chips=5),
+ ],
[
"butterfly/knights_archers_zombies_v10",
knights_archers_zombies_v10,
diff --git a/test/wrapper_test.py b/test/wrapper_test.py
index 650fe328b..80e413925 100644
--- a/test/wrapper_test.py
+++ b/test/wrapper_test.py
@@ -3,7 +3,7 @@
import pytest
from pettingzoo.butterfly import pistonball_v6
-from pettingzoo.classic import texas_holdem_no_limit_v6
+from pettingzoo.classic import texas_holdem_no_limit_v7
from pettingzoo.utils.wrappers import MultiEpisodeEnv, MultiEpisodeParallelEnv
@@ -16,7 +16,7 @@ def test_multi_episode_env_wrapper(num_episodes: int) -> None:
Args:
num_episodes: number of episodes to run the MultiEpisodeEnv
"""
- env = texas_holdem_no_limit_v6.env(num_players=3)
+ env = texas_holdem_no_limit_v7.env(num_players=3)
env = MultiEpisodeEnv(env, num_episodes=num_episodes)
env.reset(seed=42)
diff --git a/tutorials/LangChain/langchain_example.py b/tutorials/LangChain/langchain_example.py
index 5b88bfcc5..05aeb4587 100644
--- a/tutorials/LangChain/langchain_example.py
+++ b/tutorials/LangChain/langchain_example.py
@@ -48,9 +48,9 @@ def tic_tac_toe():
def texas_holdem_no_limit():
- from pettingzoo.classic import texas_holdem_no_limit_v6
+ from pettingzoo.classic import texas_holdem_no_limit_v7
- env = texas_holdem_no_limit_v6.env(num_players=4, render_mode="human")
+ env = texas_holdem_no_limit_v7.env(num_players=4, render_mode="human")
agents = {
name: ActionMaskAgent(name=name, model=ChatOpenAI(temperature=0.2), env=env)
for name in env.possible_agents
diff --git a/tutorials/SB3/test/test_sb3_action_mask.py b/tutorials/SB3/test/test_sb3_action_mask.py
index 3835af393..978b31398 100644
--- a/tutorials/SB3/test/test_sb3_action_mask.py
+++ b/tutorials/SB3/test/test_sb3_action_mask.py
@@ -8,8 +8,8 @@
go_v5,
hanabi_v5,
leduc_holdem_v4,
- texas_holdem_no_limit_v6,
- texas_holdem_v4,
+ texas_holdem_no_limit_v7,
+ texas_holdem_v5,
tictactoe_v3,
)
@@ -22,8 +22,8 @@
# These environments do better than random even after the minimum number of timesteps
EASY_ENVS = [
gin_rummy_v4,
- texas_holdem_no_limit_v6, # texas holdem human rendered game ends instantly, but with random actions it works fine
- texas_holdem_v4,
+ texas_holdem_no_limit_v7, # texas holdem human rendered game ends instantly, but with random actions it works fine
+ texas_holdem_v5,
]
# More difficult environments which will likely take more training time