diff --git a/games/reversi/game.py b/games/reversi/game.py new file mode 100644 index 00000000..2761e9b1 --- /dev/null +++ b/games/reversi/game.py @@ -0,0 +1,421 @@ +import numpy as np +import logging + + +# import cv2 as cv + + +class Game: + def __init__(self): + self.currentPlayer = 1 + self.gameState = GameState(np.array( + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 0, 0, -1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], dtype=np.int), 1) + self.actionSpace = np.array( + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], dtype=np.int) + self.pieces = {'1': 'X', '0': '-', '-1': 'O'} + self.grid_shape = (8, 8) + self.input_shape = (2, 8, 8) + self.name = 'reversi' + self.state_size = len(self.gameState.binary) + self.action_size = len(self.actionSpace) + + def reset(self): + self.gameState = GameState(np.array( + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 0, 0, -1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], dtype=np.int), 1) + self.currentPlayer = 1 + return self.gameState + + def step(self, action): + next_state, value, done = self.gameState.takeAction(action) + self.gameState = next_state + self.currentPlayer = -self.currentPlayer + info = None + return ((next_state, value, done, info)) + + def identities(self, state, actionValues): + identities = [(state, actionValues)] + + currentBoard = state.board + currentAV = actionValues + + currentBoard = np.array([ + currentBoard[7], currentBoard[6], currentBoard[5], currentBoard[4], currentBoard[3], currentBoard[2], + currentBoard[1], currentBoard[0] + , currentBoard[15], currentBoard[14], currentBoard[13], currentBoard[12], currentBoard[11], currentBoard[10], + currentBoard[9], currentBoard[8] + , currentBoard[23], currentBoard[22], currentBoard[21], currentBoard[20], currentBoard[19], currentBoard[18], + currentBoard[17], currentBoard[16] + , currentBoard[31], currentBoard[30], currentBoard[29], currentBoard[28], currentBoard[27], currentBoard[26], + currentBoard[25], currentBoard[24] + , currentBoard[39], currentBoard[38], currentBoard[37], currentBoard[36], currentBoard[35], currentBoard[34], + currentBoard[33], currentBoard[32] + , currentBoard[47], currentBoard[46], currentBoard[45], currentBoard[44], currentBoard[43], currentBoard[42], + currentBoard[41], currentBoard[40] + , currentBoard[55], currentBoard[54], currentBoard[53], currentBoard[52], currentBoard[51], currentBoard[50], + currentBoard[49], currentBoard[48] + , currentBoard[63], currentBoard[62], currentBoard[61], currentBoard[60], currentBoard[59], currentBoard[58], + currentBoard[57], currentBoard[56] + ]) + + currentAV = np.array([ + currentAV[7], currentAV[6], currentAV[5], currentAV[4], currentAV[3], currentAV[2], currentAV[1], currentAV[0] + , currentAV[15], currentAV[14], currentAV[13], currentAV[12], currentAV[11], currentAV[10], currentAV[9], + currentAV[8] + , currentAV[23], currentAV[22], currentAV[21], currentAV[20], currentAV[19], currentAV[18], currentAV[17], + currentAV[16] + , currentAV[31], currentAV[30], currentAV[29], currentAV[28], currentAV[27], currentAV[26], currentAV[25], + currentAV[24] + , currentAV[39], currentAV[38], currentAV[37], currentAV[36], currentAV[35], currentAV[34], currentAV[33], + currentAV[32] + , currentAV[47], currentAV[46], currentAV[45], currentAV[44], currentAV[43], currentAV[42], currentAV[41], + currentAV[40] + , currentAV[55], currentAV[54], currentAV[53], currentAV[52], currentAV[51], currentAV[50], currentAV[49], + currentAV[48] + , currentAV[63], currentAV[62], currentAV[61], currentAV[60], currentAV[59], currentAV[58], currentAV[57], + currentAV[56] + ]) + + identities.append((GameState(currentBoard, state.playerTurn), currentAV)) + + return identities + + +class GameState(): + def __init__(self, board, playerTurn): + self.board = board + self.pieces = {'1': 'X', '0': '-', '-1': 'O'} + self.winners = [ + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, + 59, 60, 61, 62, 63], + + ] + self.playerTurn = playerTurn + self.binary = self._binary() + self.id = self._convertStateToId() + self.allowedActions = self._allowedActions() + self.isEndGame = self._checkForEndGame() + self.value = self._getValue() + self.score = self._getScore() + + def _allowedActions(self): + # TODO --- + def varify_position(np_board, x, y): + available_pattern = [[-1, 1], + [-1, -1, 1], + [-1, -1, -1, 1], + [-1, -1, -1, -1, 1], + [-1, -1, -1, -1, -1, 1], + [-1, -1, -1, -1, -1, -1, 1], + ] # count can be used for non player count direct sum + + is_allowed_move = False + + # [1:] exclude current box itself + row_x = np_board[:x, y][::-1] + row_x_ = np_board[x:, y][1:] + row_y = np_board[x, :y][::-1] + row_y_ = np_board[x, y:][1:] + + xy_min = min(x, y) + xy_min_ = 7 - xy_min + xy_max = max(x, y) + + # anti_diag = np_board[x-xy_min:x,y-xy_min:y][::-1,::-1].diagonal() + # anti_diag_ = np_board[x:x+xy_min_, y:y+xy_min_][1:, 1:].diagonal() + # diag = np_board[x-xy_min:x, y:y+xy_min_][::-1, 1:].diagonal() + # diag_ = np_board[x:x+xy_min_, y-xy_min:y][1:, ::-1].diagonal() + + anti_diag = np_board[:x, :y][::-1, ::-1].diagonal() + anti_diag_ = np_board[x:, y:][1:, 1:].diagonal() + diag = np_board[:x, y:][::-1, 1:].diagonal() + diag_ = np_board[x:, :y][1:, ::-1].diagonal() + # print + + for arr in [row_x, row_x_, row_y, row_y_, anti_diag, anti_diag_, diag, diag_]: + for selection_length in range(1, len(arr)): + + jj_arr = arr[:selection_length + 1] * self.playerTurn + jj_arr = list(jj_arr) + if jj_arr in available_pattern: + is_allowed_move = True + return is_allowed_move + + return is_allowed_move + + allowed = [] + np_board = np.reshape(self.board, [8, 8]) + for ix in range(8): + for jy in range(8): + if np_board[ix, jy] == 0: + to_varify = False + if ix < 7: + if np_board[ix + 1, jy] == -self.playerTurn: + to_varify = True + if jy < 7: + if np_board[ix, jy + 1] == -self.playerTurn: + to_varify = True + if ix > 0: + if np_board[ix - 1, jy] == -self.playerTurn: + to_varify = True + if jy > 0: + if np_board[ix, jy - 1] == -self.playerTurn: + to_varify = True + # diag anti diag + if ix < 7 and jy < 7: + if np_board[ix + 1, jy + 1] == -self.playerTurn: + to_varify = True + if ix < 7 and jy > 0: + if np_board[ix + 1, jy - 1] == -self.playerTurn: + to_varify = True + if ix > 0 and jy < 7: + if np_board[ix - 1, jy + 1] == -self.playerTurn: + to_varify = True + if ix > 0 and jy > 0: + if np_board[ix - 1, jy - 1] == -self.playerTurn: + to_varify = True + + if to_varify: + allowed_move = varify_position(np_board, ix, jy) + if allowed_move: + allowed.append(ix * 8 + jy) + + # print + + # for i in xrange(len(self.board)): + # if self.board[i] == 0 or self.board[i] == self.playerTurn: + # allowed.append(i) + + # + # for i in xrange(len(self.board)): + # if i >= len(self.board) - 7: + # if self.board[i] == 0: + # allowed.append(i) + # else: + # if self.board[i] == 0 and self.board[i + 7] != 0: + # allowed.append(i) + # if i < len(self.board) - 7: + # if self.board[i+7] == self.playerTurn: + # allowed.append(i) + # if allowed ==[]: + # allowed.append(63) + return allowed + + def _binary(self): + + currentplayer_position = np.zeros(len(self.board), dtype=np.int) + currentplayer_position[self.board == self.playerTurn] = 1 + + other_position = np.zeros(len(self.board), dtype=np.int) + other_position[self.board == -self.playerTurn] = 1 + + position = np.append(currentplayer_position, other_position) + + return (position) + + def _convertStateToId(self): + player1_position = np.zeros(len(self.board), dtype=np.int) + player1_position[self.board == 1] = 1 + + other_position = np.zeros(len(self.board), dtype=np.int) + other_position[self.board == -1] = 1 + + position = np.append(player1_position, other_position) + + id = ''.join(map(str, position)) + + return id + + def _checkForEndGame(self): + if np.count_nonzero(self.board) == 64: + return 1 + if self.allowedActions == []: + return 1 + + # TODO check again + # for x, y, z, a in self.winners: + # if (self.board[x] + self.board[y] + self.board[z] + self.board[a] == 4 * -self.playerTurn): + # return 1 + + # for pat in self.winners: + # sum = 0 + # for pat_i in pat: + # sum +=self.board[pat_i] + # if (sum == 16 * -self.playerTurn): + # return 1 + + return 0 + + def _getValue(self): + # This is the value of the state for the current player + # i.e. if the previous player played a winning move, you lose + + # TODO check value again + # for x, y, z, a in self.winners: + # if (self.board[x] + self.board[y] + self.board[z] + self.board[a] == 4 * -self.playerTurn): + # return (-1, -1, 1) + if np.count_nonzero(self.board) == 64: + return (-1, -1, 1) + if list(self.board).count(-self.playerTurn) >= 50: + return (-1, -1, 1) + if list(self.board).count(-self.playerTurn) > list(self.board).count(self.playerTurn): + return (-1, -1, 1) + + # for pat in self.winners: + # sum = 0 + # for pat_i in pat: + # sum +=self.board[pat_i] + # if (sum == 16 * -self.playerTurn): + # return (-1, -1, 1) + + return (0, 0, 0) + + def _getScore(self): + tmp = self.value + return (tmp[1], tmp[2]) + + def fourConnectivity(self, action): + toChange = [] + if (action + 1) % 7 != 0: + toChange.append(action + 1) + + if action % 7 != 0: + toChange.append(action) + + if not (action >= 35): + toChange.append(action + 7) + + if not (action <= 6): + toChange.append(action - 7) + + return toChange + + def takeAction(self, action): + + def position_update(np_board, x, y): + available_pattern = [[-1, 1], + [-1, -1, 1], + [-1, -1, -1, 1], + [-1, -1, -1, -1, 1], + [-1, -1, -1, -1, -1, 1], + [-1, -1, -1, -1, -1, -1, 1], + ] # count can be used for non player count direct sum + + positions = [] + + # [1:] exclude current box itself + row_x = np_board[:x, y][::-1] + row_x_ = np_board[x:, y][1:] + row_y = np_board[x, :y][::-1] + row_y_ = np_board[x, y:][1:] + + anti_diag = np_board[:x, :y][::-1, ::-1].diagonal() + anti_diag_ = np_board[x:, y:][1:, 1:].diagonal() + diag = np_board[:x, y:][::-1, 1:].diagonal() + diag_ = np_board[x:, :y][1:, ::-1].diagonal() + + for indx, arr in enumerate([row_x, row_x_, row_y, row_y_, anti_diag, anti_diag_, diag, diag_]): + for selection_length in range(1, len(arr)): + + jj_arr = arr[:selection_length + 1] * self.playerTurn + jj_arr = list(jj_arr) + + if jj_arr in available_pattern: + if indx == 0: + for count in range(len(jj_arr) - 1): + act = (x - 1 - count) * 8 + y + positions.append(act) + + if indx == 1: + for count in range(len(jj_arr) - 1): + act = (x + 1 + count) * 8 + y + positions.append(act) + + if indx == 2: + for count in range(len(jj_arr) - 1): + act = (x) * 8 + (y - 1 - count) + positions.append(act) + + if indx == 3: + for count in range(len(jj_arr) - 1): + act = (x) * 8 + (y + 1 + count) + positions.append(act) + + if indx == 4: + for count in range(len(jj_arr) - 1): + act = (x - 1 - count) * 8 + (y - 1 - count) + positions.append(act) + + if indx == 5: + for count in range(len(jj_arr) - 1): + act = (x + 1 + count) * 8 + (y + 1 + count) + positions.append(act) + + if indx == 6: + for count in range(len(jj_arr) - 1): + act = (x - 1 - count) * 8 + (y + 1 + count) + positions.append(act) + + if indx == 7: + for count in range(len(jj_arr) - 1): + act = (x + 1 + count) * 8 + (y - 1 - count) + positions.append(act) + + break + + return positions + + newBoard = np.array(self.board) + np_board = np.reshape(self.board, [8, 8]) + position_updt = position_update(np_board, action / 8, action % 8) + + # if self.board[action] == self.playerTurn: + # toChange = self.fourConnectivity(action) + # newBoard[toChange] = self.playerTurn + # # newBoard[action] = self.playerTurn + # newBoard[action] = 0 + # else: + # newBoard[action] = self.playerTurn + + # print self.board,self.playerTurn + # if action != 63: + newBoard[action] = self.playerTurn + newBoard[position_updt] = self.playerTurn + + newState = GameState(newBoard, -self.playerTurn) + + value = 0 + done = 0 + + if newState.isEndGame: + value = newState.value[0] + done = 1 + + return (newState, value, done) + + def render(self, logger): + for r in range(8): + logger.info([self.pieces[str(x)] for x in self.board[8 * r: (8 * r + 8)]]) + logger.info('--------------') + + # To display render as image + # import cv2 as cv + # img = (np.resize(self.board, [8, 8]) + 1) * 127 + # img = img.astype(np.uint8) + # cv.imshow('mctc', img) + # cv.waitKey(1) + + # def render_display(self): + # img = (np.resize(self.board, [8, 8]) + 1) * 127 + # img = img.astype(np.uint8) + # cv.imshow('board', img) + # cv.waitKey(1) + # + # def render_display_pi(self, pi): + # img = (np.resize(pi, [8, 8])) * 254 + # img = img.astype(np.uint8) + # cv.imshow('pi', img) + # cv.waitKey(1)