diff --git a/games/nodges-and-crosses/game.py b/games/nodges-and-crosses/game.py new file mode 100644 index 00000000..7038608e --- /dev/null +++ b/games/nodges-and-crosses/game.py @@ -0,0 +1,143 @@ +import numpy as np +import logging + +class Game: + + def __init__(self): + self.currentPlayer = 1 + self.gameState = GameState(np.array([0,0,0,0,0,0,0,0,0], dtype=np.int), 1) + self.actionSpace = np.array([0,0,0,0,0,0,0,0,0], dtype=np.int) + self.pieces = {'1':'X', '0': '-', '-1':'O'} + self.grid_shape = (3,3) + self.input_shape = (2,3,3) + self.name = 'nodges-and-crosses' + self.state_size = len(self.gameState.binary) + self.action_size = len(self.actionSpace) + + def reset(self): + self.gameState = GameState(np.array([0,0,0,0,0,0,0,0,0], dtype=np.int), 1) + self.currentPlayer = 1 + return self.gameState + + def step(self, action): + next_state, value, done = self.gameState.takeAction(action) + self.gameState = next_state + self.currentPlayer = -self.currentPlayer + info = None + return ((next_state, value, done, info)) + + def identities(self, state, actionValues): + identities = [(state,actionValues)] + + + return identities + + +class GameState(): + def __init__(self, board, playerTurn): + self.board = board + self.pieces = {'1':'X', '0': '-', '-1':'O'} + self.winners = [ + [0,1,2], + [3,4,5], + [6,7,8], + + [0,3,6], + [1,4,7], + [2,5,8], + + [0,4,8], + [2,4,6] + ] + self.playerTurn = playerTurn + self.binary = self._binary() + self.id = self._convertStateToId() + self.allowedActions = self._allowedActions() + self.isEndGame = self._checkForEndGame() + self.value = self._getValue() + self.score = self._getScore() + + def _allowedActions(self): + allowed = [] + for i in range(len(self.board)): + if self.board[i] == 0: + allowed.append(i) + + return allowed + + def _binary(self): + + currentplayer_position = np.zeros(len(self.board), dtype=np.int) + currentplayer_position[self.board==self.playerTurn] = 1 + + other_position = np.zeros(len(self.board), dtype=np.int) + other_position[self.board==-self.playerTurn] = 1 + + position = np.append(currentplayer_position,other_position) + + return (position) + + def _convertStateToId(self): + player1_position = np.zeros(len(self.board), dtype=np.int) + player1_position[self.board==1] = 1 + + other_position = np.zeros(len(self.board), dtype=np.int) + other_position[self.board==-1] = 1 + + position = np.append(player1_position,other_position) + + id = ''.join(map(str,position)) + + return id + + def _checkForEndGame(self): + if np.count_nonzero(self.board) == 9: + return 1 + + for x,y,z in self.winners: + if (self.board[x] + self.board[y] + self.board[z] == 3 * -self.playerTurn): + return 1 + return 0 + + + def _getValue(self): + # This is the value of the state for the current player + # i.e. if the previous player played a winning move, you lose + for x,y,z in self.winners: + if (self.board[x] + self.board[y] + self.board[z] == 3 * -self.playerTurn): + return (-1, -1, 1) + return (0, 0, 0) + + + def _getScore(self): + tmp = self.value + return (tmp[1], tmp[2]) + + + + + def takeAction(self, action): + newBoard = np.array(self.board) + newBoard[action]=self.playerTurn + + newState = GameState(newBoard, -self.playerTurn) + + value = 0 + done = 0 + + if newState.isEndGame: + value = newState.value[0] + done = 1 + + return (newState, value, done) + + + + + def render(self, logger): + for r in range(3): + logger.info([self.pieces[str(x)] for x in self.board[3*r : (3*r + 3)]]) + logger.info('--------------') + + +