diff --git a/pettingzoo/butterfly/pistonball/pistonball.py b/pettingzoo/butterfly/pistonball/pistonball.py index f9304e43e..cd20910fa 100644 --- a/pettingzoo/butterfly/pistonball/pistonball.py +++ b/pettingzoo/butterfly/pistonball/pistonball.py @@ -31,12 +31,11 @@ **Actions**: Every piston can be acted on at each time step. In discrete mode, the action space is 0 to move down by 4 pixels, 1 to stay still, and 2 to move up by 4 pixels. In continuous mode, the value in the range [-1, 1] is proportional to the amount that the pistons are lowered or raised by. Continuous actions are scaled by a factor of 4, so that in both the discrete and continuous action space, the action 1 will move pistons 4 pixels up, and -1 will move pistons 4 pixels down. -**Rewards**: The same reward is provided to each agent based on how much the ball moved left in the last time-step plus a constant time-penalty. Specifically, there are three components to the distance reward. First, the x-distance in pixels travelled by the ball towards -the left-wall in the last time-step (moving right would provide a negative reward). Second, a scaling factor of 100. Third, a division by the distance in pixels between the ball at the start of the time-step and the left-wall. That final division component means moving -one unit left when close to the wall is far more valuable than moving one unit left when far from the wall. There is also a configurable time-penalty (default: -0.1) added to the distance-based reward at each time-step. For example, if the ball does not move in a -time-step, the reward will be -0.1 not 0. This is to incentivize solving the game faster. +**Rewards**: The same reward is provided to each agent based on how much the ball moved left in the last time-step (moving right results in a negative reward) plus a constant time-penalty. The distance component is the percentage of the initial total distance (i.e. at game-start) +to the left-wall travelled in the past timestep. For example, if the ball began the game 300 pixels away from the wall, began the time-step 180 pixels away and finished the time-step 175 pixels away, the distance reward would be 100 * 5/300 = 1.7. There is also a configurable +time-penalty (default: -0.1) added to the distance-based reward at each time-step. For example, if the ball does not move in a time-step, the reward will be -0.1 not 0. This is to incentivize solving the game faster. -Pistonball uses the chipmunk physics engine, and are thus the physics are about as realistic as in the game Angry Birds. +Pistonball uses the chipmunk physics engine, so the physics are about as realistic as in the game Angry Birds. Keys *a* and *d* control which piston is selected to move (initially the rightmost piston is selected) and keys *w* and *s* move the piston in the vertical direction. @@ -589,11 +588,12 @@ def render(self): def _get_ball_position(self) -> int: """Return the leftmost x-position of the ball. - If the ball extends beyond the leftmost wall, return the - position of that wall-edge. + That leftmost x-position is generally referred to and treated as the + balls' position in this class. If the ball extends beyond the leftmost + wall, return the position of that wall-edge. """ ball_position = int(self.ball.position[0] - self.ball_radius) - # check if the ball is touching/within the left-most wall. + # Check if the ball is touching/within the left-most wall. clipped_ball_position = max(self.wall_width, ball_position) return clipped_ball_position @@ -632,15 +632,15 @@ def step(self, action): # The negative one is included since the x-axis increases from left-to-right. And, if the x # position decreases we want the reward to be positive, since the ball would have gotten closer # to the left-wall. - global_reward = ( + reward = ( -1 * (ball_curr_pos - self.ball_prev_pos) * (100 / self.distance_to_wall_at_game_start) ) if not self.terminate: - global_reward += self.time_penalty + reward += self.time_penalty - self.rewards = {agent: global_reward for agent in self.agents} + self.rewards = {agent: reward for agent in self.agents} self.ball_prev_pos = ball_curr_pos self.frames += 1 else: