Farama-Foundation · elliottower · Feb 14, 2024 · Jan 25, 2024 · Jan 26, 2024
diff --git a/docs/code_examples/parallel_rps.py b/docs/code_examples/parallel_rps.py
@@ -1,6 +1,7 @@
 import functools
 
 import gymnasium
+import numpy as np
 from gymnasium.spaces import Discrete
 
 from pettingzoo import ParallelEnv
@@ -9,7 +10,7 @@
 ROCK = 0
 PAPER = 1
 SCISSORS = 2
-NONE = 3
+NO_MOVE = 3
 MOVES = ["ROCK", "PAPER", "SCISSORS", "None"]
 NUM_ITERS = 100
 REWARD_MAP = {
@@ -83,6 +84,7 @@ def __init__(self, render_mode=None):
     @functools.lru_cache(maxsize=None)
     def observation_space(self, agent):
         # gymnasium spaces are defined and documented here: https://gymnasium.farama.org/api/spaces/
+        # Discrete(4) means an integer in range(0, 4)
         return Discrete(4)
 
     # Action space should be defined here.
@@ -128,7 +130,8 @@ def reset(self, seed=None, options=None):
         """
         self.agents = self.possible_agents[:]
         self.num_moves = 0
-        observations = {agent: NONE for agent in self.agents}
+        # the observations should be numpy arrays even if there is only one value
+        observations = {agent: np.array(NO_MOVE) for agent in self.agents}
         infos = {agent: {} for agent in self.agents}
         self.state = observations
 
@@ -161,9 +164,11 @@ def step(self, actions):
         env_truncation = self.num_moves >= NUM_ITERS
         truncations = {agent: env_truncation for agent in self.agents}
 
-        # current observation is just the other player's most recent action
+        # Current observation is just the other player's most recent action
+        # This is converted to a numpy value of type int to match the type
+        # that we declared in observation_space()
         observations = {
-            self.agents[i]: int(actions[self.agents[1 - i]])
+            self.agents[i]: np.array(actions[self.agents[1 - i]], dtype=np.int64)
             for i in range(len(self.agents))
         }
         self.state = observations