diff --git a/.gitignore b/.gitignore index 2236a276..27c8aef4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,104 +1,107 @@ --# Byte-compiled / optimized / DLL files - -__pycache__/ - -*.py[cod] - -*$py.class - - - -# C extensions - -*.so - - - -# Distribution / packaging - -.Python - -env/ - -build/ - -develop-eggs/ - -dist/ - -downloads/ - -eggs/ - -.eggs/ - -lib/ - -lib64/ - -parts/ - -sdist/ - -var/ - -wheels/ - -*.egg-info/ - -.installed.cfg - -*.egg - - - -# PyInstaller - -# Usually these files are written by a python script from a template - -# before PyInstaller builds the exe, so as to inject date/other infos into it. - -*.manifest - -*.spec - - - -# Installer logs - -pip-log.txt - -pip-delete-this-directory.txt - - - -# Unit test / coverage reports - -htmlcov/ - -.tox/ - -.coverage - -.coverage.* - -.cache - -nosetests.xml - -coverage.xml - -*.cover - -.hypothesis/ - - - -# Translations - -*.mo - -*.pot - - - -# Django stuff: - -*.log - -local_settings.py - - - -# Flask stuff: - -instance/ - -.webassets-cache - - - -# Scrapy stuff: - -.scrapy - - - -# Sphinx documentation - -docs/_build/ - - - -# PyBuilder - -target/ - - - -# Jupyter Notebook - -.ipynb_checkpoints - - - -# pyenv - -.python-version - - - -# celery beat schedule file - -celerybeat-schedule - - - -# SageMath parsed files - -*.sage.py - - - -# dotenv - -.env - - - -# virtualenv - -.venv - -venv/ - -ENV/ - - - -# Spyder project settings - -.spyderproject - -.spyproject - - - -# Rope project settings - -.ropeproject - - - -# mkdocs documentation - -/site - - - -# mypy - -.mypy_cache/ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# dotenv +.env + +# virtualenv +.venv +venv/ +ENV/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +# Jetbrains preferences dir +.idea/ analysis/ diff --git a/MCTS.py b/MCTS.py index 76b18dac..1a2ccfd4 100644 --- a/MCTS.py +++ b/MCTS.py @@ -5,130 +5,132 @@ from utils import setup_logger import loggers as lg + class Node(): - def __init__(self, state): - self.state = state - self.playerTurn = state.playerTurn - self.id = state.id - self.edges = [] + def __init__(self, state): + self.state = state + self.playerTurn = state.playerTurn + self.id = state.id + self.edges = [] + + def isLeaf(self): + if len(self.edges) > 0: + return False + else: + return True - def isLeaf(self): - if len(self.edges) > 0: - return False - else: - return True class Edge(): - def __init__(self, inNode, outNode, prior, action): - self.id = inNode.state.id + '|' + outNode.state.id - self.inNode = inNode - self.outNode = outNode - self.playerTurn = inNode.state.playerTurn - self.action = action - - self.stats = { - 'N': 0, - 'W': 0, - 'Q': 0, - 'P': prior, - } - + def __init__(self, inNode, outNode, prior, action): + self.id = inNode.state.id + '|' + outNode.state.id + self.inNode = inNode + self.outNode = outNode + self.playerTurn = inNode.state.playerTurn + self.action = action + + self.stats = { + 'N': 0, + 'W': 0, + 'Q': 0, + 'P': prior, + } + class MCTS(): - def __init__(self, root, cpuct): - self.root = root - self.tree = {} - self.cpuct = cpuct - self.addNode(root) - - def __len__(self): - return len(self.tree) + def __init__(self, root, cpuct): + self.root = root + self.tree = {} + self.cpuct = cpuct + self.addNode(root) - def moveToLeaf(self): + def __len__(self): + return len(self.tree) - lg.logger_mcts.info('------MOVING TO LEAF------') + def moveToLeaf(self): - breadcrumbs = [] - currentNode = self.root + lg.logger_mcts.info('------MOVING TO LEAF------') - done = 0 - value = 0 + breadcrumbs = [] + currentNode = self.root - while not currentNode.isLeaf(): + done = 0 + value = 0 - lg.logger_mcts.info('PLAYER TURN...%d', currentNode.state.playerTurn) - - maxQU = -99999 + while not currentNode.isLeaf(): - if currentNode == self.root: - epsilon = config.EPSILON - nu = np.random.dirichlet([config.ALPHA] * len(currentNode.edges)) - else: - epsilon = 0 - nu = [0] * len(currentNode.edges) + lg.logger_mcts.info('PLAYER TURN...%d', currentNode.state.playerTurn) - Nb = 0 - for action, edge in currentNode.edges: - Nb = Nb + edge.stats['N'] + maxQU = -99999 - for idx, (action, edge) in enumerate(currentNode.edges): + if currentNode == self.root: + epsilon = config.EPSILON + nu = np.random.dirichlet([config.ALPHA] * len(currentNode.edges)) + else: + epsilon = 0 + nu = [0] * len(currentNode.edges) - U = self.cpuct * \ - ((1-epsilon) * edge.stats['P'] + epsilon * nu[idx] ) * \ - np.sqrt(Nb) / (1 + edge.stats['N']) - - Q = edge.stats['Q'] + Nb = 0 + for action, edge in currentNode.edges: + Nb = Nb + edge.stats['N'] - lg.logger_mcts.info('action: %d (%d)... N = %d, P = %f, nu = %f, adjP = %f, W = %f, Q = %f, U = %f, Q+U = %f' - , action, action % 7, edge.stats['N'], np.round(edge.stats['P'],6), np.round(nu[idx],6), ((1-epsilon) * edge.stats['P'] + epsilon * nu[idx] ) - , np.round(edge.stats['W'],6), np.round(Q,6), np.round(U,6), np.round(Q+U,6)) + for idx, (action, edge) in enumerate(currentNode.edges): - if Q + U > maxQU: - maxQU = Q + U - simulationAction = action - simulationEdge = edge + U = self.cpuct * \ + ((1 - epsilon) * edge.stats['P'] + epsilon * nu[idx]) * \ + np.sqrt(Nb) / (1 + edge.stats['N']) - lg.logger_mcts.info('action with highest Q + U...%d', simulationAction) + Q = edge.stats['Q'] - newState, value, done = currentNode.state.takeAction(simulationAction) #the value of the newState from the POV of the new playerTurn - currentNode = simulationEdge.outNode - breadcrumbs.append(simulationEdge) + lg.logger_mcts.info( + 'action: %d (%d)... N = %d, P = %f, nu = %f, adjP = %f, W = %f, Q = %f, U = %f, Q+U = %f' + , action, action % 7, edge.stats['N'], np.round(edge.stats['P'], 6), np.round(nu[idx], 6), + ((1 - epsilon) * edge.stats['P'] + epsilon * nu[idx]) + , np.round(edge.stats['W'], 6), np.round(Q, 6), np.round(U, 6), np.round(Q + U, 6)) - lg.logger_mcts.info('DONE...%d', done) + if Q + U > maxQU: + maxQU = Q + U + simulationAction = action + simulationEdge = edge - return currentNode, value, done, breadcrumbs + lg.logger_mcts.info('action with highest Q + U...%d', simulationAction) + newState, value, done = currentNode.state.takeAction( + simulationAction) # the value of the newState from the POV of the new playerTurn + currentNode = simulationEdge.outNode + breadcrumbs.append(simulationEdge) + lg.logger_mcts.info('DONE...%d', done) - def backFill(self, leaf, value, breadcrumbs): - lg.logger_mcts.info('------DOING BACKFILL------') + return currentNode, value, done, breadcrumbs - currentPlayer = leaf.state.playerTurn + def backFill(self, leaf, value, breadcrumbs): + lg.logger_mcts.info('------DOING BACKFILL------') - for edge in breadcrumbs: - playerTurn = edge.playerTurn - if playerTurn == currentPlayer: - direction = 1 - else: - direction = -1 + currentPlayer = leaf.state.playerTurn - edge.stats['N'] = edge.stats['N'] + 1 - edge.stats['W'] = edge.stats['W'] + value * direction - edge.stats['Q'] = edge.stats['W'] / edge.stats['N'] + for edge in breadcrumbs: + playerTurn = edge.playerTurn + if playerTurn == currentPlayer: + direction = 1 + else: + direction = -1 - lg.logger_mcts.info('updating edge with value %f for player %d... N = %d, W = %f, Q = %f' - , value * direction - , playerTurn - , edge.stats['N'] - , edge.stats['W'] - , edge.stats['Q'] - ) + edge.stats['N'] = edge.stats['N'] + 1 + edge.stats['W'] = edge.stats['W'] + value * direction + edge.stats['Q'] = edge.stats['W'] / edge.stats['N'] - edge.outNode.state.render(lg.logger_mcts) + lg.logger_mcts.info('updating edge with value %f for player %d... N = %d, W = %f, Q = %f' + , value * direction + , playerTurn + , edge.stats['N'] + , edge.stats['W'] + , edge.stats['Q'] + ) - def addNode(self, node): - self.tree[node.id] = node + edge.outNode.state.render(lg.logger_mcts) + def addNode(self, node): + self.tree[node.id] = node diff --git a/agent.py b/agent.py index a6e5d01a..df57e001 100644 --- a/agent.py +++ b/agent.py @@ -17,212 +17,205 @@ class User(): - def __init__(self, name, state_size, action_size): - self.name = name - self.state_size = state_size - self.action_size = action_size - - def act(self, state, tau): - action = input('Enter your chosen action: ') - pi = np.zeros(self.action_size) - pi[action] = 1 - value = None - NN_value = None - return (action, pi, value, NN_value) + def __init__(self, name, state_size, action_size): + self.name = name + self.state_size = state_size + self.action_size = action_size + def act(self, state, tau): + action = input('Enter your chosen action: ') + pi = np.zeros(self.action_size) + pi[action] = 1 + value = None + NN_value = None + return (action, pi, value, NN_value) class Agent(): - def __init__(self, name, state_size, action_size, mcts_simulations, cpuct, model): - self.name = name - - self.state_size = state_size - self.action_size = action_size - - self.cpuct = cpuct + def __init__(self, name, state_size, action_size, mcts_simulations, cpuct, model): + self.name = name - self.MCTSsimulations = mcts_simulations - self.model = model + self.state_size = state_size + self.action_size = action_size - self.mcts = None + self.cpuct = cpuct - self.train_overall_loss = [] - self.train_value_loss = [] - self.train_policy_loss = [] - self.val_overall_loss = [] - self.val_value_loss = [] - self.val_policy_loss = [] + self.MCTSsimulations = mcts_simulations + self.model = model - - def simulate(self): + self.mcts = None - lg.logger_mcts.info('ROOT NODE...%s', self.mcts.root.state.id) - self.mcts.root.state.render(lg.logger_mcts) - lg.logger_mcts.info('CURRENT PLAYER...%d', self.mcts.root.state.playerTurn) + self.train_overall_loss = [] + self.train_value_loss = [] + self.train_policy_loss = [] + self.val_overall_loss = [] + self.val_value_loss = [] + self.val_policy_loss = [] - ##### MOVE THE LEAF NODE - leaf, value, done, breadcrumbs = self.mcts.moveToLeaf() - leaf.state.render(lg.logger_mcts) + def simulate(self): - ##### EVALUATE THE LEAF NODE - value, breadcrumbs = self.evaluateLeaf(leaf, value, done, breadcrumbs) + lg.logger_mcts.info('ROOT NODE...%s', self.mcts.root.state.id) + self.mcts.root.state.render(lg.logger_mcts) + lg.logger_mcts.info('CURRENT PLAYER...%d', self.mcts.root.state.playerTurn) - ##### BACKFILL THE VALUE THROUGH THE TREE - self.mcts.backFill(leaf, value, breadcrumbs) + ##### MOVE THE LEAF NODE + leaf, value, done, breadcrumbs = self.mcts.moveToLeaf() + leaf.state.render(lg.logger_mcts) + ##### EVALUATE THE LEAF NODE + value, breadcrumbs = self.evaluateLeaf(leaf, value, done, breadcrumbs) - def act(self, state, tau): + ##### BACKFILL THE VALUE THROUGH THE TREE + self.mcts.backFill(leaf, value, breadcrumbs) - if self.mcts == None or state.id not in self.mcts.tree: - self.buildMCTS(state) - else: - self.changeRootMCTS(state) + def act(self, state, tau): - #### run the simulation - for sim in range(self.MCTSsimulations): - lg.logger_mcts.info('***************************') - lg.logger_mcts.info('****** SIMULATION %d ******', sim + 1) - lg.logger_mcts.info('***************************') - self.simulate() + if self.mcts == None or state.id not in self.mcts.tree: + self.buildMCTS(state) + else: + self.changeRootMCTS(state) - #### get action values - pi, values = self.getAV(1) + #### run the simulation + for sim in range(self.MCTSsimulations): + lg.logger_mcts.info('***************************') + lg.logger_mcts.info('****** SIMULATION %d ******', sim + 1) + lg.logger_mcts.info('***************************') + self.simulate() - ####pick the action - action, value = self.chooseAction(pi, values, tau) + #### get action values + pi, values = self.getAV(1) - nextState, _, _ = state.takeAction(action) + ####pick the action + action, value = self.chooseAction(pi, values, tau) - NN_value = -self.get_preds(nextState)[0] + nextState, _, _ = state.takeAction(action) - lg.logger_mcts.info('ACTION VALUES...%s', pi) - lg.logger_mcts.info('CHOSEN ACTION...%d', action) - lg.logger_mcts.info('MCTS PERCEIVED VALUE...%f', value) - lg.logger_mcts.info('NN PERCEIVED VALUE...%f', NN_value) + NN_value = -self.get_preds(nextState)[0] - return (action, pi, value, NN_value) + lg.logger_mcts.info('ACTION VALUES...%s', pi) + lg.logger_mcts.info('CHOSEN ACTION...%d', action) + lg.logger_mcts.info('MCTS PERCEIVED VALUE...%f', value) + lg.logger_mcts.info('NN PERCEIVED VALUE...%f', NN_value) + return (action, pi, value, NN_value) - def get_preds(self, state): - #predict the leaf - inputToModel = np.array([self.model.convertToModelInput(state)]) + def get_preds(self, state): + # predict the leaf + inputToModel = np.array([self.model.convertToModelInput(state)]) - preds = self.model.predict(inputToModel) - value_array = preds[0] - logits_array = preds[1] - value = value_array[0] + preds = self.model.predict(inputToModel) + value_array = preds[0] + logits_array = preds[1] + value = value_array[0] - logits = logits_array[0] + logits = logits_array[0] - allowedActions = state.allowedActions + allowedActions = state.allowedActions - mask = np.ones(logits.shape,dtype=bool) - mask[allowedActions] = False - logits[mask] = -100 + mask = np.ones(logits.shape, dtype=bool) + mask[allowedActions] = False + logits[mask] = -100 - #SOFTMAX - odds = np.exp(logits) - probs = odds / np.sum(odds) ###put this just before the for? + # SOFTMAX + odds = np.exp(logits) + probs = odds / np.sum(odds) ###put this just before the for? - return ((value, probs, allowedActions)) + return ((value, probs, allowedActions)) + def evaluateLeaf(self, leaf, value, done, breadcrumbs): - def evaluateLeaf(self, leaf, value, done, breadcrumbs): + lg.logger_mcts.info('------EVALUATING LEAF------') - lg.logger_mcts.info('------EVALUATING LEAF------') + if done == 0: - if done == 0: - - value, probs, allowedActions = self.get_preds(leaf.state) - lg.logger_mcts.info('PREDICTED VALUE FOR %d: %f', leaf.state.playerTurn, value) + value, probs, allowedActions = self.get_preds(leaf.state) + lg.logger_mcts.info('PREDICTED VALUE FOR %d: %f', leaf.state.playerTurn, value) - probs = probs[allowedActions] + probs = probs[allowedActions] - for idx, action in enumerate(allowedActions): - newState, _, _ = leaf.state.takeAction(action) - if newState.id not in self.mcts.tree: - node = mc.Node(newState) - self.mcts.addNode(node) - lg.logger_mcts.info('added node...%s...p = %f', node.id, probs[idx]) - else: - node = self.mcts.tree[newState.id] - lg.logger_mcts.info('existing node...%s...', node.id) + for idx, action in enumerate(allowedActions): + newState, _, _ = leaf.state.takeAction(action) + if newState.id not in self.mcts.tree: + node = mc.Node(newState) + self.mcts.addNode(node) + lg.logger_mcts.info('added node...%s...p = %f', node.id, probs[idx]) + else: + node = self.mcts.tree[newState.id] + lg.logger_mcts.info('existing node...%s...', node.id) - newEdge = mc.Edge(leaf, node, probs[idx], action) - leaf.edges.append((action, newEdge)) - - else: - lg.logger_mcts.info('GAME VALUE FOR %d: %f', leaf.playerTurn, value) + newEdge = mc.Edge(leaf, node, probs[idx], action) + leaf.edges.append((action, newEdge)) - return ((value, breadcrumbs)) + else: + lg.logger_mcts.info('GAME VALUE FOR %d: %f', leaf.playerTurn, value) + return ((value, breadcrumbs)) - - def getAV(self, tau): - edges = self.mcts.root.edges - pi = np.zeros(self.action_size, dtype=np.integer) - values = np.zeros(self.action_size, dtype=np.float32) - - for action, edge in edges: - pi[action] = pow(edge.stats['N'], 1/tau) - values[action] = edge.stats['Q'] + def getAV(self, tau): + edges = self.mcts.root.edges + pi = np.zeros(self.action_size, dtype=np.integer) + values = np.zeros(self.action_size, dtype=np.float32) - pi = pi / (np.sum(pi) * 1.0) - return pi, values + for action, edge in edges: + pi[action] = pow(edge.stats['N'], 1 / tau) + values[action] = edge.stats['Q'] - def chooseAction(self, pi, values, tau): - if tau == 0: - actions = np.argwhere(pi == max(pi)) - action = random.choice(actions)[0] - else: - action_idx = np.random.multinomial(1, pi) - action = np.where(action_idx==1)[0][0] + pi = pi / (np.sum(pi) * 1.0) + return pi, values - value = values[action] + def chooseAction(self, pi, values, tau): + if tau == 0: + actions = np.argwhere(pi == max(pi)) + action = random.choice(actions)[0] + else: + action_idx = np.random.multinomial(1, pi) + action = np.where(action_idx == 1)[0][0] - return action, value + value = values[action] - def replay(self, ltmemory): - lg.logger_mcts.info('******RETRAINING MODEL******') + return action, value + def replay(self, ltmemory): + lg.logger_mcts.info('******RETRAINING MODEL******') - for i in range(config.TRAINING_LOOPS): - minibatch = random.sample(ltmemory, min(config.BATCH_SIZE, len(ltmemory))) + for i in range(config.TRAINING_LOOPS): + minibatch = random.sample(ltmemory, min(config.BATCH_SIZE, len(ltmemory))) - training_states = np.array([self.model.convertToModelInput(row['state']) for row in minibatch]) - training_targets = {'value_head': np.array([row['value'] for row in minibatch]) - , 'policy_head': np.array([row['AV'] for row in minibatch])} + training_states = np.array([self.model.convertToModelInput(row['state']) for row in minibatch]) + training_targets = {'value_head': np.array([row['value'] for row in minibatch]) + , 'policy_head': np.array([row['AV'] for row in minibatch])} - fit = self.model.fit(training_states, training_targets, epochs=config.EPOCHS, verbose=1, validation_split=0, batch_size = 32) - lg.logger_mcts.info('NEW LOSS %s', fit.history) + fit = self.model.fit(training_states, training_targets, epochs=config.EPOCHS, verbose=1, validation_split=0, + batch_size=32) + lg.logger_mcts.info('NEW LOSS %s', fit.history) - self.train_overall_loss.append(round(fit.history['loss'][config.EPOCHS - 1],4)) - self.train_value_loss.append(round(fit.history['value_head_loss'][config.EPOCHS - 1],4)) - self.train_policy_loss.append(round(fit.history['policy_head_loss'][config.EPOCHS - 1],4)) + self.train_overall_loss.append(round(fit.history['loss'][config.EPOCHS - 1], 4)) + self.train_value_loss.append(round(fit.history['value_head_loss'][config.EPOCHS - 1], 4)) + self.train_policy_loss.append(round(fit.history['policy_head_loss'][config.EPOCHS - 1], 4)) - plt.plot(self.train_overall_loss, 'k') - plt.plot(self.train_value_loss, 'k:') - plt.plot(self.train_policy_loss, 'k--') + plt.plot(self.train_overall_loss, 'k') + plt.plot(self.train_value_loss, 'k:') + plt.plot(self.train_policy_loss, 'k--') - plt.legend(['train_overall_loss', 'train_value_loss', 'train_policy_loss'], loc='lower left') + plt.legend(['train_overall_loss', 'train_value_loss', 'train_policy_loss'], loc='lower left') - display.clear_output(wait=True) - display.display(pl.gcf()) - pl.gcf().clear() - time.sleep(1.0) + display.clear_output(wait=True) + display.display(pl.gcf()) + pl.gcf().clear() + time.sleep(1.0) - print('\n') - self.model.printWeightAverages() + print('\n') + self.model.printWeightAverages() - def predict(self, inputToModel): - preds = self.model.predict(inputToModel) - return preds + def predict(self, inputToModel): + preds = self.model.predict(inputToModel) + return preds - def buildMCTS(self, state): - lg.logger_mcts.info('****** BUILDING NEW MCTS TREE FOR AGENT %s ******', self.name) - self.root = mc.Node(state) - self.mcts = mc.MCTS(self.root, self.cpuct) + def buildMCTS(self, state): + lg.logger_mcts.info('****** BUILDING NEW MCTS TREE FOR AGENT %s ******', self.name) + self.root = mc.Node(state) + self.mcts = mc.MCTS(self.root, self.cpuct) - def changeRootMCTS(self, state): - lg.logger_mcts.info('****** CHANGING ROOT OF MCTS TREE TO %s FOR AGENT %s ******', state.id, self.name) - self.mcts.root = self.mcts.tree[state.id] \ No newline at end of file + def changeRootMCTS(self, state): + lg.logger_mcts.info('****** CHANGING ROOT OF MCTS TREE TO %s FOR AGENT %s ******', state.id, self.name) + self.mcts.root = self.mcts.tree[state.id] diff --git a/config.py b/config.py index dd2c4c85..17075c0b 100644 --- a/config.py +++ b/config.py @@ -2,12 +2,11 @@ EPISODES = 30 MCTS_SIMS = 50 MEMORY_SIZE = 30000 -TURNS_UNTIL_TAU0 = 10 # turn on which it starts playing deterministically +TURNS_UNTIL_TAU0 = 10 # turn on which it starts playing deterministically CPUCT = 1 EPSILON = 0.2 ALPHA = 0.8 - #### RETRAINING BATCH_SIZE = 256 EPOCHS = 1 @@ -17,14 +16,14 @@ TRAINING_LOOPS = 10 HIDDEN_CNN_LAYERS = [ - {'filters':75, 'kernel_size': (4,4)} - , {'filters':75, 'kernel_size': (4,4)} - , {'filters':75, 'kernel_size': (4,4)} - , {'filters':75, 'kernel_size': (4,4)} - , {'filters':75, 'kernel_size': (4,4)} - , {'filters':75, 'kernel_size': (4,4)} - ] + {'filters': 75, 'kernel_size': (4, 4)} + , {'filters': 75, 'kernel_size': (4, 4)} + , {'filters': 75, 'kernel_size': (4, 4)} + , {'filters': 75, 'kernel_size': (4, 4)} + , {'filters': 75, 'kernel_size': (4, 4)} + , {'filters': 75, 'kernel_size': (4, 4)} +] #### EVALUATION EVAL_EPISODES = 20 -SCORING_THRESHOLD = 1.3 \ No newline at end of file +SCORING_THRESHOLD = 1.3 diff --git a/funcs.py b/funcs.py index ecb82bff..d46bec12 100644 --- a/funcs.py +++ b/funcs.py @@ -10,69 +10,72 @@ import config -def playMatchesBetweenVersions(env, run_version, player1version, player2version, EPISODES, logger, turns_until_tau0, goes_first = 0): - + +def playMatchesBetweenVersions(env, run_version, player1version, player2version, EPISODES, logger, turns_until_tau0, + goes_first=0): if player1version == -1: player1 = User('player1', env.state_size, env.action_size) else: - player1_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, config.HIDDEN_CNN_LAYERS) + player1_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, + config.HIDDEN_CNN_LAYERS) if player1version > 0: player1_network = player1_NN.read(env.name, run_version, player1version) - player1_NN.model.set_weights(player1_network.get_weights()) + player1_NN.model.set_weights(player1_network.get_weights()) player1 = Agent('player1', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player1_NN) if player2version == -1: player2 = User('player2', env.state_size, env.action_size) else: - player2_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, config.HIDDEN_CNN_LAYERS) - + player2_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, + config.HIDDEN_CNN_LAYERS) + if player2version > 0: player2_network = player2_NN.read(env.name, run_version, player2version) player2_NN.model.set_weights(player2_network.get_weights()) player2 = Agent('player2', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player2_NN) - scores, memory, points, sp_scores = playMatches(player1, player2, EPISODES, logger, turns_until_tau0, None, goes_first) + scores, memory, points, sp_scores = playMatches(player1, player2, EPISODES, logger, turns_until_tau0, None, + goes_first) return (scores, memory, points, sp_scores) -def playMatches(player1, player2, EPISODES, logger, turns_until_tau0, memory = None, goes_first = 0): - +def playMatches(player1, player2, EPISODES, logger, turns_until_tau0, memory=None, goes_first=0): env = Game() - scores = {player1.name:0, "drawn": 0, player2.name:0} - sp_scores = {'sp':0, "drawn": 0, 'nsp':0} - points = {player1.name:[], player2.name:[]} + scores = {player1.name: 0, "drawn": 0, player2.name: 0} + sp_scores = {'sp': 0, "drawn": 0, 'nsp': 0} + points = {player1.name: [], player2.name: []} for e in range(EPISODES): logger.info('====================') - logger.info('EPISODE %d OF %d', e+1, EPISODES) + logger.info('EPISODE %d OF %d', e + 1, EPISODES) logger.info('====================') - print (str(e+1) + ' ', end='') + print(str(e + 1) + ' ', end='') state = env.reset() - + done = 0 turn = 0 player1.mcts = None player2.mcts = None if goes_first == 0: - player1Starts = random.randint(0,1) * 2 - 1 + player1Starts = random.randint(0, 1) * 2 - 1 else: player1Starts = goes_first if player1Starts == 1: - players = {1:{"agent": player1, "name":player1.name} - , -1: {"agent": player2, "name":player2.name} - } + players = {1: {"agent": player1, "name": player1.name} + , -1: {"agent": player2, "name": player2.name} + } logger.info(player1.name + ' plays as X') else: - players = {1:{"agent": player2, "name":player2.name} - , -1: {"agent": player1, "name":player1.name} - } + players = {1: {"agent": player2, "name": player2.name} + , -1: {"agent": player1, "name": player1.name} + } logger.info(player2.name + ' plays as X') logger.info('--------------') @@ -80,7 +83,7 @@ def playMatches(player1, player2, EPISODES, logger, turns_until_tau0, memory = N while done == 0: turn = turn + 1 - + #### Run the MCTS algo and return an action if turn < turns_until_tau0: action, pi, MCTS_value, NN_value = players[state.playerTurn]['agent'].act(state, 1) @@ -91,20 +94,21 @@ def playMatches(player1, player2, EPISODES, logger, turns_until_tau0, memory = N ####Commit the move to memory memory.commit_stmemory(env.identities, state, pi) - logger.info('action: %d', action) for r in range(env.grid_shape[0]): - logger.info(['----' if x == 0 else '{0:.2f}'.format(np.round(x,2)) for x in pi[env.grid_shape[1]*r : (env.grid_shape[1]*r + env.grid_shape[1])]]) - logger.info('MCTS perceived value for %s: %f', state.pieces[str(state.playerTurn)] ,np.round(MCTS_value,2)) - logger.info('NN perceived value for %s: %f', state.pieces[str(state.playerTurn)] ,np.round(NN_value,2)) + logger.info(['----' if x == 0 else '{0:.2f}'.format(np.round(x, 2)) for x in + pi[env.grid_shape[1] * r: (env.grid_shape[1] * r + env.grid_shape[1])]]) + logger.info('MCTS perceived value for %s: %f', state.pieces[str(state.playerTurn)], np.round(MCTS_value, 2)) + logger.info('NN perceived value for %s: %f', state.pieces[str(state.playerTurn)], np.round(NN_value, 2)) logger.info('====================') ### Do the action - state, value, done, _ = env.step(action) #the value of the newState from the POV of the new playerTurn i.e. -1 if the previous player played a winning move - + state, value, done, _ = env.step( + action) # the value of the newState from the POV of the new playerTurn i.e. -1 if the previous player played a winning move + env.gameState.render(logger) - if done == 1: + if done == 1: if memory != None: #### If the game is finished, assign the values correctly to the game moves for move in memory.stmemory: @@ -112,13 +116,13 @@ def playMatches(player1, player2, EPISODES, logger, turns_until_tau0, memory = N move['value'] = value else: move['value'] = -value - + memory.commit_ltmemory() - + if value == 1: logger.info('%s WINS!', players[state.playerTurn]['name']) scores[players[state.playerTurn]['name']] = scores[players[state.playerTurn]['name']] + 1 - if state.playerTurn == 1: + if state.playerTurn == 1: sp_scores['sp'] = sp_scores['sp'] + 1 else: sp_scores['nsp'] = sp_scores['nsp'] + 1 @@ -126,8 +130,8 @@ def playMatches(player1, player2, EPISODES, logger, turns_until_tau0, memory = N elif value == -1: logger.info('%s WINS!', players[-state.playerTurn]['name']) scores[players[-state.playerTurn]['name']] = scores[players[-state.playerTurn]['name']] + 1 - - if state.playerTurn == 1: + + if state.playerTurn == 1: sp_scores['nsp'] = sp_scores['nsp'] + 1 else: sp_scores['sp'] = sp_scores['sp'] + 1 diff --git a/game.py b/game.py index 3355a77e..d1758257 100644 --- a/game.py +++ b/game.py @@ -1,228 +1,233 @@ import numpy as np import logging + class Game: - def __init__(self): - self.currentPlayer = 1 - self.gameState = GameState(np.array([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], dtype=np.int), 1) - self.actionSpace = np.array([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], dtype=np.int) - self.pieces = {'1':'X', '0': '-', '-1':'O'} - self.grid_shape = (6,7) - self.input_shape = (2,6,7) - self.name = 'connect4' - self.state_size = len(self.gameState.binary) - self.action_size = len(self.actionSpace) - - def reset(self): - self.gameState = GameState(np.array([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], dtype=np.int), 1) - self.currentPlayer = 1 - return self.gameState - - def step(self, action): - next_state, value, done = self.gameState.takeAction(action) - self.gameState = next_state - self.currentPlayer = -self.currentPlayer - info = None - return ((next_state, value, done, info)) - - def identities(self, state, actionValues): - identities = [(state,actionValues)] - - currentBoard = state.board - currentAV = actionValues - - currentBoard = np.array([ - currentBoard[6], currentBoard[5],currentBoard[4], currentBoard[3], currentBoard[2], currentBoard[1], currentBoard[0] - , currentBoard[13], currentBoard[12],currentBoard[11], currentBoard[10], currentBoard[9], currentBoard[8], currentBoard[7] - , currentBoard[20], currentBoard[19],currentBoard[18], currentBoard[17], currentBoard[16], currentBoard[15], currentBoard[14] - , currentBoard[27], currentBoard[26],currentBoard[25], currentBoard[24], currentBoard[23], currentBoard[22], currentBoard[21] - , currentBoard[34], currentBoard[33],currentBoard[32], currentBoard[31], currentBoard[30], currentBoard[29], currentBoard[28] - , currentBoard[41], currentBoard[40],currentBoard[39], currentBoard[38], currentBoard[37], currentBoard[36], currentBoard[35] - ]) - - currentAV = np.array([ - currentAV[6], currentAV[5],currentAV[4], currentAV[3], currentAV[2], currentAV[1], currentAV[0] - , currentAV[13], currentAV[12],currentAV[11], currentAV[10], currentAV[9], currentAV[8], currentAV[7] - , currentAV[20], currentAV[19],currentAV[18], currentAV[17], currentAV[16], currentAV[15], currentAV[14] - , currentAV[27], currentAV[26],currentAV[25], currentAV[24], currentAV[23], currentAV[22], currentAV[21] - , currentAV[34], currentAV[33],currentAV[32], currentAV[31], currentAV[30], currentAV[29], currentAV[28] - , currentAV[41], currentAV[40],currentAV[39], currentAV[38], currentAV[37], currentAV[36], currentAV[35] - ]) - - identities.append((GameState(currentBoard, state.playerTurn), currentAV)) - - return identities + def __init__(self): + self.currentPlayer = 1 + self.gameState = GameState(np.array( + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0], dtype=np.int), 1) + self.actionSpace = np.array( + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0], dtype=np.int) + self.pieces = {'1': 'X', '0': '-', '-1': 'O'} + self.grid_shape = (6, 7) + self.input_shape = (2, 6, 7) + self.name = 'connect4' + self.state_size = len(self.gameState.binary) + self.action_size = len(self.actionSpace) + + def reset(self): + self.gameState = GameState(np.array( + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0], dtype=np.int), 1) + self.currentPlayer = 1 + return self.gameState + + def step(self, action): + next_state, value, done = self.gameState.takeAction(action) + self.gameState = next_state + self.currentPlayer = -self.currentPlayer + info = None + return ((next_state, value, done, info)) + + def identities(self, state, actionValues): + identities = [(state, actionValues)] + + currentBoard = state.board + currentAV = actionValues + + currentBoard = np.array([ + currentBoard[6], currentBoard[5], currentBoard[4], currentBoard[3], currentBoard[2], currentBoard[1], + currentBoard[0] + , currentBoard[13], currentBoard[12], currentBoard[11], currentBoard[10], currentBoard[9], currentBoard[8], + currentBoard[7] + , currentBoard[20], currentBoard[19], currentBoard[18], currentBoard[17], currentBoard[16], + currentBoard[15], currentBoard[14] + , currentBoard[27], currentBoard[26], currentBoard[25], currentBoard[24], currentBoard[23], + currentBoard[22], currentBoard[21] + , currentBoard[34], currentBoard[33], currentBoard[32], currentBoard[31], currentBoard[30], + currentBoard[29], currentBoard[28] + , currentBoard[41], currentBoard[40], currentBoard[39], currentBoard[38], currentBoard[37], + currentBoard[36], currentBoard[35] + ]) + + currentAV = np.array([ + currentAV[6], currentAV[5], currentAV[4], currentAV[3], currentAV[2], currentAV[1], currentAV[0] + , currentAV[13], currentAV[12], currentAV[11], currentAV[10], currentAV[9], currentAV[8], currentAV[7] + , currentAV[20], currentAV[19], currentAV[18], currentAV[17], currentAV[16], currentAV[15], currentAV[14] + , currentAV[27], currentAV[26], currentAV[25], currentAV[24], currentAV[23], currentAV[22], currentAV[21] + , currentAV[34], currentAV[33], currentAV[32], currentAV[31], currentAV[30], currentAV[29], currentAV[28] + , currentAV[41], currentAV[40], currentAV[39], currentAV[38], currentAV[37], currentAV[36], currentAV[35] + ]) + + identities.append((GameState(currentBoard, state.playerTurn), currentAV)) + + return identities class GameState(): - def __init__(self, board, playerTurn): - self.board = board - self.pieces = {'1':'X', '0': '-', '-1':'O'} - self.winners = [ - [0,1,2,3], - [1,2,3,4], - [2,3,4,5], - [3,4,5,6], - [7,8,9,10], - [8,9,10,11], - [9,10,11,12], - [10,11,12,13], - [14,15,16,17], - [15,16,17,18], - [16,17,18,19], - [17,18,19,20], - [21,22,23,24], - [22,23,24,25], - [23,24,25,26], - [24,25,26,27], - [28,29,30,31], - [29,30,31,32], - [30,31,32,33], - [31,32,33,34], - [35,36,37,38], - [36,37,38,39], - [37,38,39,40], - [38,39,40,41], - - [0,7,14,21], - [7,14,21,28], - [14,21,28,35], - [1,8,15,22], - [8,15,22,29], - [15,22,29,36], - [2,9,16,23], - [9,16,23,30], - [16,23,30,37], - [3,10,17,24], - [10,17,24,31], - [17,24,31,38], - [4,11,18,25], - [11,18,25,32], - [18,25,32,39], - [5,12,19,26], - [12,19,26,33], - [19,26,33,40], - [6,13,20,27], - [13,20,27,34], - [20,27,34,41], - - [3,9,15,21], - [4,10,16,22], - [10,16,22,28], - [5,11,17,23], - [11,17,23,29], - [17,23,29,35], - [6,12,18,24], - [12,18,24,30], - [18,24,30,36], - [13,19,25,31], - [19,25,31,37], - [20,26,32,38], - - [3,11,19,27], - [2,10,18,26], - [10,18,26,34], - [1,9,17,25], - [9,17,25,33], - [17,25,33,41], - [0,8,16,24], - [8,16,24,32], - [16,24,32,40], - [7,15,23,31], - [15,23,31,39], - [14,22,30,38], - ] - self.playerTurn = playerTurn - self.binary = self._binary() - self.id = self._convertStateToId() - self.allowedActions = self._allowedActions() - self.isEndGame = self._checkForEndGame() - self.value = self._getValue() - self.score = self._getScore() - - def _allowedActions(self): - allowed = [] - for i in range(len(self.board)): - if i >= len(self.board) - 7: - if self.board[i]==0: - allowed.append(i) - else: - if self.board[i] == 0 and self.board[i+7] != 0: - allowed.append(i) - - return allowed - - def _binary(self): - - currentplayer_position = np.zeros(len(self.board), dtype=np.int) - currentplayer_position[self.board==self.playerTurn] = 1 - - other_position = np.zeros(len(self.board), dtype=np.int) - other_position[self.board==-self.playerTurn] = 1 - - position = np.append(currentplayer_position,other_position) - - return (position) - - def _convertStateToId(self): - player1_position = np.zeros(len(self.board), dtype=np.int) - player1_position[self.board==1] = 1 - - other_position = np.zeros(len(self.board), dtype=np.int) - other_position[self.board==-1] = 1 - - position = np.append(player1_position,other_position) - - id = ''.join(map(str,position)) - - return id - - def _checkForEndGame(self): - if np.count_nonzero(self.board) == 42: - return 1 - - for x,y,z,a in self.winners: - if (self.board[x] + self.board[y] + self.board[z] + self.board[a] == 4 * -self.playerTurn): - return 1 - return 0 - - - def _getValue(self): - # This is the value of the state for the current player - # i.e. if the previous player played a winning move, you lose - for x,y,z,a in self.winners: - if (self.board[x] + self.board[y] + self.board[z] + self.board[a] == 4 * -self.playerTurn): - return (-1, -1, 1) - return (0, 0, 0) - - - def _getScore(self): - tmp = self.value - return (tmp[1], tmp[2]) - - - - - def takeAction(self, action): - newBoard = np.array(self.board) - newBoard[action]=self.playerTurn - - newState = GameState(newBoard, -self.playerTurn) - - value = 0 - done = 0 - - if newState.isEndGame: - value = newState.value[0] - done = 1 - - return (newState, value, done) - - - - - def render(self, logger): - for r in range(6): - logger.info([self.pieces[str(x)] for x in self.board[7*r : (7*r + 7)]]) - logger.info('--------------') \ No newline at end of file + def __init__(self, board, playerTurn): + self.board = board + self.pieces = {'1': 'X', '0': '-', '-1': 'O'} + self.winners = [ + [0, 1, 2, 3], + [1, 2, 3, 4], + [2, 3, 4, 5], + [3, 4, 5, 6], + [7, 8, 9, 10], + [8, 9, 10, 11], + [9, 10, 11, 12], + [10, 11, 12, 13], + [14, 15, 16, 17], + [15, 16, 17, 18], + [16, 17, 18, 19], + [17, 18, 19, 20], + [21, 22, 23, 24], + [22, 23, 24, 25], + [23, 24, 25, 26], + [24, 25, 26, 27], + [28, 29, 30, 31], + [29, 30, 31, 32], + [30, 31, 32, 33], + [31, 32, 33, 34], + [35, 36, 37, 38], + [36, 37, 38, 39], + [37, 38, 39, 40], + [38, 39, 40, 41], + + [0, 7, 14, 21], + [7, 14, 21, 28], + [14, 21, 28, 35], + [1, 8, 15, 22], + [8, 15, 22, 29], + [15, 22, 29, 36], + [2, 9, 16, 23], + [9, 16, 23, 30], + [16, 23, 30, 37], + [3, 10, 17, 24], + [10, 17, 24, 31], + [17, 24, 31, 38], + [4, 11, 18, 25], + [11, 18, 25, 32], + [18, 25, 32, 39], + [5, 12, 19, 26], + [12, 19, 26, 33], + [19, 26, 33, 40], + [6, 13, 20, 27], + [13, 20, 27, 34], + [20, 27, 34, 41], + + [3, 9, 15, 21], + [4, 10, 16, 22], + [10, 16, 22, 28], + [5, 11, 17, 23], + [11, 17, 23, 29], + [17, 23, 29, 35], + [6, 12, 18, 24], + [12, 18, 24, 30], + [18, 24, 30, 36], + [13, 19, 25, 31], + [19, 25, 31, 37], + [20, 26, 32, 38], + + [3, 11, 19, 27], + [2, 10, 18, 26], + [10, 18, 26, 34], + [1, 9, 17, 25], + [9, 17, 25, 33], + [17, 25, 33, 41], + [0, 8, 16, 24], + [8, 16, 24, 32], + [16, 24, 32, 40], + [7, 15, 23, 31], + [15, 23, 31, 39], + [14, 22, 30, 38], + ] + self.playerTurn = playerTurn + self.binary = self._binary() + self.id = self._convertStateToId() + self.allowedActions = self._allowedActions() + self.isEndGame = self._checkForEndGame() + self.value = self._getValue() + self.score = self._getScore() + + def _allowedActions(self): + allowed = [] + for i in range(len(self.board)): + if i >= len(self.board) - 7: + if self.board[i] == 0: + allowed.append(i) + else: + if self.board[i] == 0 and self.board[i + 7] != 0: + allowed.append(i) + + return allowed + + def _binary(self): + + currentplayer_position = np.zeros(len(self.board), dtype=np.int) + currentplayer_position[self.board == self.playerTurn] = 1 + + other_position = np.zeros(len(self.board), dtype=np.int) + other_position[self.board == -self.playerTurn] = 1 + + position = np.append(currentplayer_position, other_position) + + return (position) + + def _convertStateToId(self): + player1_position = np.zeros(len(self.board), dtype=np.int) + player1_position[self.board == 1] = 1 + + other_position = np.zeros(len(self.board), dtype=np.int) + other_position[self.board == -1] = 1 + + position = np.append(player1_position, other_position) + + id = ''.join(map(str, position)) + + return id + + def _checkForEndGame(self): + if np.count_nonzero(self.board) == 42: + return 1 + + for x, y, z, a in self.winners: + if (self.board[x] + self.board[y] + self.board[z] + self.board[a] == 4 * -self.playerTurn): + return 1 + return 0 + + def _getValue(self): + # This is the value of the state for the current player + # i.e. if the previous player played a winning move, you lose + for x, y, z, a in self.winners: + if (self.board[x] + self.board[y] + self.board[z] + self.board[a] == 4 * -self.playerTurn): + return (-1, -1, 1) + return (0, 0, 0) + + def _getScore(self): + tmp = self.value + return (tmp[1], tmp[2]) + + def takeAction(self, action): + newBoard = np.array(self.board) + newBoard[action] = self.playerTurn + + newState = GameState(newBoard, -self.playerTurn) + + value = 0 + done = 0 + + if newState.isEndGame: + value = newState.value[0] + done = 1 + + return (newState, value, done) + + def render(self, logger): + for r in range(6): + logger.info([self.pieces[str(x)] for x in self.board[7 * r: (7 * r + 7)]]) + logger.info('--------------') diff --git a/games/connect4/game.py b/games/connect4/game.py index e1343fa2..d1758257 100644 --- a/games/connect4/game.py +++ b/games/connect4/game.py @@ -1,231 +1,233 @@ import numpy as np import logging + class Game: - def __init__(self): - self.currentPlayer = 1 - self.gameState = GameState(np.array([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], dtype=np.int), 1) - self.actionSpace = np.array([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], dtype=np.int) - self.pieces = {'1':'X', '0': '-', '-1':'O'} - self.grid_shape = (6,7) - self.input_shape = (2,6,7) - self.name = 'connect4' - self.state_size = len(self.gameState.binary) - self.action_size = len(self.actionSpace) - - def reset(self): - self.gameState = GameState(np.array([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], dtype=np.int), 1) - self.currentPlayer = 1 - return self.gameState - - def step(self, action): - next_state, value, done = self.gameState.takeAction(action) - self.gameState = next_state - self.currentPlayer = -self.currentPlayer - info = None - return ((next_state, value, done, info)) - - def identities(self, state, actionValues): - identities = [(state,actionValues)] - - currentBoard = state.board - currentAV = actionValues - - currentBoard = np.array([ - currentBoard[6], currentBoard[5],currentBoard[4], currentBoard[3], currentBoard[2], currentBoard[1], currentBoard[0] - , currentBoard[13], currentBoard[12],currentBoard[11], currentBoard[10], currentBoard[9], currentBoard[8], currentBoard[7] - , currentBoard[20], currentBoard[19],currentBoard[18], currentBoard[17], currentBoard[16], currentBoard[15], currentBoard[14] - , currentBoard[27], currentBoard[26],currentBoard[25], currentBoard[24], currentBoard[23], currentBoard[22], currentBoard[21] - , currentBoard[34], currentBoard[33],currentBoard[32], currentBoard[31], currentBoard[30], currentBoard[29], currentBoard[28] - , currentBoard[41], currentBoard[40],currentBoard[39], currentBoard[38], currentBoard[37], currentBoard[36], currentBoard[35] - ]) - - currentAV = np.array([ - currentAV[6], currentAV[5],currentAV[4], currentAV[3], currentAV[2], currentAV[1], currentAV[0] - , currentAV[13], currentAV[12],currentAV[11], currentAV[10], currentAV[9], currentAV[8], currentAV[7] - , currentAV[20], currentAV[19],currentAV[18], currentAV[17], currentAV[16], currentAV[15], currentAV[14] - , currentAV[27], currentAV[26],currentAV[25], currentAV[24], currentAV[23], currentAV[22], currentAV[21] - , currentAV[34], currentAV[33],currentAV[32], currentAV[31], currentAV[30], currentAV[29], currentAV[28] - , currentAV[41], currentAV[40],currentAV[39], currentAV[38], currentAV[37], currentAV[36], currentAV[35] - ]) - - identities.append((GameState(currentBoard, state.playerTurn), currentAV)) - - return identities + def __init__(self): + self.currentPlayer = 1 + self.gameState = GameState(np.array( + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0], dtype=np.int), 1) + self.actionSpace = np.array( + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0], dtype=np.int) + self.pieces = {'1': 'X', '0': '-', '-1': 'O'} + self.grid_shape = (6, 7) + self.input_shape = (2, 6, 7) + self.name = 'connect4' + self.state_size = len(self.gameState.binary) + self.action_size = len(self.actionSpace) + + def reset(self): + self.gameState = GameState(np.array( + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0], dtype=np.int), 1) + self.currentPlayer = 1 + return self.gameState + + def step(self, action): + next_state, value, done = self.gameState.takeAction(action) + self.gameState = next_state + self.currentPlayer = -self.currentPlayer + info = None + return ((next_state, value, done, info)) + + def identities(self, state, actionValues): + identities = [(state, actionValues)] + + currentBoard = state.board + currentAV = actionValues + + currentBoard = np.array([ + currentBoard[6], currentBoard[5], currentBoard[4], currentBoard[3], currentBoard[2], currentBoard[1], + currentBoard[0] + , currentBoard[13], currentBoard[12], currentBoard[11], currentBoard[10], currentBoard[9], currentBoard[8], + currentBoard[7] + , currentBoard[20], currentBoard[19], currentBoard[18], currentBoard[17], currentBoard[16], + currentBoard[15], currentBoard[14] + , currentBoard[27], currentBoard[26], currentBoard[25], currentBoard[24], currentBoard[23], + currentBoard[22], currentBoard[21] + , currentBoard[34], currentBoard[33], currentBoard[32], currentBoard[31], currentBoard[30], + currentBoard[29], currentBoard[28] + , currentBoard[41], currentBoard[40], currentBoard[39], currentBoard[38], currentBoard[37], + currentBoard[36], currentBoard[35] + ]) + + currentAV = np.array([ + currentAV[6], currentAV[5], currentAV[4], currentAV[3], currentAV[2], currentAV[1], currentAV[0] + , currentAV[13], currentAV[12], currentAV[11], currentAV[10], currentAV[9], currentAV[8], currentAV[7] + , currentAV[20], currentAV[19], currentAV[18], currentAV[17], currentAV[16], currentAV[15], currentAV[14] + , currentAV[27], currentAV[26], currentAV[25], currentAV[24], currentAV[23], currentAV[22], currentAV[21] + , currentAV[34], currentAV[33], currentAV[32], currentAV[31], currentAV[30], currentAV[29], currentAV[28] + , currentAV[41], currentAV[40], currentAV[39], currentAV[38], currentAV[37], currentAV[36], currentAV[35] + ]) + + identities.append((GameState(currentBoard, state.playerTurn), currentAV)) + + return identities class GameState(): - def __init__(self, board, playerTurn): - self.board = board - self.pieces = {'1':'X', '0': '-', '-1':'O'} - self.winners = [ - [0,1,2,3], - [1,2,3,4], - [2,3,4,5], - [3,4,5,6], - [7,8,9,10], - [8,9,10,11], - [9,10,11,12], - [10,11,12,13], - [14,15,16,17], - [15,16,17,18], - [16,17,18,19], - [17,18,19,20], - [21,22,23,24], - [22,23,24,25], - [23,24,25,26], - [24,25,26,27], - [28,29,30,31], - [29,30,31,32], - [30,31,32,33], - [31,32,33,34], - [35,36,37,38], - [36,37,38,39], - [37,38,39,40], - [38,39,40,41], - - [0,7,14,21], - [7,14,21,28], - [14,21,28,35], - [1,8,15,22], - [8,15,22,29], - [15,22,29,36], - [2,9,16,23], - [9,16,23,30], - [16,23,30,37], - [3,10,17,24], - [10,17,24,31], - [17,24,31,38], - [4,11,18,25], - [11,18,25,32], - [18,25,32,39], - [5,12,19,26], - [12,19,26,33], - [19,26,33,40], - [6,13,20,27], - [13,20,27,34], - [20,27,34,41], - - [3,9,15,21], - [4,10,16,22], - [10,16,22,28], - [5,11,17,23], - [11,17,23,29], - [17,23,29,35], - [6,12,18,24], - [12,18,24,30], - [18,24,30,36], - [13,19,25,31], - [19,25,31,37], - [20,26,32,38], - - [3,11,19,27], - [2,10,18,26], - [10,18,26,34], - [1,9,17,25], - [9,17,25,33], - [17,25,33,41], - [0,8,16,24], - [8,16,24,32], - [16,24,32,40], - [7,15,23,31], - [15,23,31,39], - [14,22,30,38], - ] - self.playerTurn = playerTurn - self.binary = self._binary() - self.id = self._convertStateToId() - self.allowedActions = self._allowedActions() - self.isEndGame = self._checkForEndGame() - self.value = self._getValue() - self.score = self._getScore() - - def _allowedActions(self): - allowed = [] - for i in range(len(self.board)): - if i >= len(self.board) - 7: - if self.board[i]==0: - allowed.append(i) - else: - if self.board[i] == 0 and self.board[i+7] != 0: - allowed.append(i) - - return allowed - - def _binary(self): - - currentplayer_position = np.zeros(len(self.board), dtype=np.int) - currentplayer_position[self.board==self.playerTurn] = 1 - - other_position = np.zeros(len(self.board), dtype=np.int) - other_position[self.board==-self.playerTurn] = 1 - - position = np.append(currentplayer_position,other_position) - - return (position) - - def _convertStateToId(self): - player1_position = np.zeros(len(self.board), dtype=np.int) - player1_position[self.board==1] = 1 - - other_position = np.zeros(len(self.board), dtype=np.int) - other_position[self.board==-1] = 1 - - position = np.append(player1_position,other_position) - - id = ''.join(map(str,position)) - - return id - - def _checkForEndGame(self): - if np.count_nonzero(self.board) == 42: - return 1 - - for x,y,z,a in self.winners: - if (self.board[x] + self.board[y] + self.board[z] + self.board[a] == 4 * -self.playerTurn): - return 1 - return 0 - - - def _getValue(self): - # This is the value of the state for the current player - # i.e. if the previous player played a winning move, you lose - for x,y,z,a in self.winners: - if (self.board[x] + self.board[y] + self.board[z] + self.board[a] == 4 * -self.playerTurn): - return (-1, -1, 1) - return (0, 0, 0) - - - def _getScore(self): - tmp = self.value - return (tmp[1], tmp[2]) - - - - - def takeAction(self, action): - newBoard = np.array(self.board) - newBoard[action]=self.playerTurn - - newState = GameState(newBoard, -self.playerTurn) - - value = 0 - done = 0 - - if newState.isEndGame: - value = newState.value[0] - done = 1 - - return (newState, value, done) - - - - - def render(self, logger): - for r in range(6): - logger.info([self.pieces[str(x)] for x in self.board[7*r : (7*r + 7)]]) - logger.info('--------------') - - - + def __init__(self, board, playerTurn): + self.board = board + self.pieces = {'1': 'X', '0': '-', '-1': 'O'} + self.winners = [ + [0, 1, 2, 3], + [1, 2, 3, 4], + [2, 3, 4, 5], + [3, 4, 5, 6], + [7, 8, 9, 10], + [8, 9, 10, 11], + [9, 10, 11, 12], + [10, 11, 12, 13], + [14, 15, 16, 17], + [15, 16, 17, 18], + [16, 17, 18, 19], + [17, 18, 19, 20], + [21, 22, 23, 24], + [22, 23, 24, 25], + [23, 24, 25, 26], + [24, 25, 26, 27], + [28, 29, 30, 31], + [29, 30, 31, 32], + [30, 31, 32, 33], + [31, 32, 33, 34], + [35, 36, 37, 38], + [36, 37, 38, 39], + [37, 38, 39, 40], + [38, 39, 40, 41], + + [0, 7, 14, 21], + [7, 14, 21, 28], + [14, 21, 28, 35], + [1, 8, 15, 22], + [8, 15, 22, 29], + [15, 22, 29, 36], + [2, 9, 16, 23], + [9, 16, 23, 30], + [16, 23, 30, 37], + [3, 10, 17, 24], + [10, 17, 24, 31], + [17, 24, 31, 38], + [4, 11, 18, 25], + [11, 18, 25, 32], + [18, 25, 32, 39], + [5, 12, 19, 26], + [12, 19, 26, 33], + [19, 26, 33, 40], + [6, 13, 20, 27], + [13, 20, 27, 34], + [20, 27, 34, 41], + + [3, 9, 15, 21], + [4, 10, 16, 22], + [10, 16, 22, 28], + [5, 11, 17, 23], + [11, 17, 23, 29], + [17, 23, 29, 35], + [6, 12, 18, 24], + [12, 18, 24, 30], + [18, 24, 30, 36], + [13, 19, 25, 31], + [19, 25, 31, 37], + [20, 26, 32, 38], + + [3, 11, 19, 27], + [2, 10, 18, 26], + [10, 18, 26, 34], + [1, 9, 17, 25], + [9, 17, 25, 33], + [17, 25, 33, 41], + [0, 8, 16, 24], + [8, 16, 24, 32], + [16, 24, 32, 40], + [7, 15, 23, 31], + [15, 23, 31, 39], + [14, 22, 30, 38], + ] + self.playerTurn = playerTurn + self.binary = self._binary() + self.id = self._convertStateToId() + self.allowedActions = self._allowedActions() + self.isEndGame = self._checkForEndGame() + self.value = self._getValue() + self.score = self._getScore() + + def _allowedActions(self): + allowed = [] + for i in range(len(self.board)): + if i >= len(self.board) - 7: + if self.board[i] == 0: + allowed.append(i) + else: + if self.board[i] == 0 and self.board[i + 7] != 0: + allowed.append(i) + + return allowed + + def _binary(self): + + currentplayer_position = np.zeros(len(self.board), dtype=np.int) + currentplayer_position[self.board == self.playerTurn] = 1 + + other_position = np.zeros(len(self.board), dtype=np.int) + other_position[self.board == -self.playerTurn] = 1 + + position = np.append(currentplayer_position, other_position) + + return (position) + + def _convertStateToId(self): + player1_position = np.zeros(len(self.board), dtype=np.int) + player1_position[self.board == 1] = 1 + + other_position = np.zeros(len(self.board), dtype=np.int) + other_position[self.board == -1] = 1 + + position = np.append(player1_position, other_position) + + id = ''.join(map(str, position)) + + return id + + def _checkForEndGame(self): + if np.count_nonzero(self.board) == 42: + return 1 + + for x, y, z, a in self.winners: + if (self.board[x] + self.board[y] + self.board[z] + self.board[a] == 4 * -self.playerTurn): + return 1 + return 0 + + def _getValue(self): + # This is the value of the state for the current player + # i.e. if the previous player played a winning move, you lose + for x, y, z, a in self.winners: + if (self.board[x] + self.board[y] + self.board[z] + self.board[a] == 4 * -self.playerTurn): + return (-1, -1, 1) + return (0, 0, 0) + + def _getScore(self): + tmp = self.value + return (tmp[1], tmp[2]) + + def takeAction(self, action): + newBoard = np.array(self.board) + newBoard[action] = self.playerTurn + + newState = GameState(newBoard, -self.playerTurn) + + value = 0 + done = 0 + + if newState.isEndGame: + value = newState.value[0] + done = 1 + + return (newState, value, done) + + def render(self, logger): + for r in range(6): + logger.info([self.pieces[str(x)] for x in self.board[7 * r: (7 * r + 7)]]) + logger.info('--------------') diff --git a/games/metasquares/game.py b/games/metasquares/game.py index 7418280f..e4c719ec 100644 --- a/games/metasquares/game.py +++ b/games/metasquares/game.py @@ -1,271 +1,266 @@ import numpy as np import logging -class Game: - - def __init__(self): - self.currentPlayer = 1 - self.gameState = GameState(np.array([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], dtype=np.int), 1) - self.actionSpace = np.array([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], dtype=np.int) - self.pieces = {'1':'X', '0': '-', '-1':'O'} - self.grid_shape = (5,5) - self.input_shape = (2,5,5) - self.name = 'metaSquares' - self.state_size = len(self.gameState.binary) - self.action_size = len(self.actionSpace) - - def reset(self): - self.gameState = GameState(np.array([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], dtype=np.int), 1) - self.currentPlayer = 1 - return self.gameState - - def step(self, action): - next_state, value, done = self.gameState.takeAction(action) - self.gameState = next_state - self.currentPlayer = -self.currentPlayer - info = None - return ((next_state, value, done, info)) - - def identities(self, state, actionValues): - identities = [] - currentBoard = state.board - currentAV = actionValues - - for n in range(5): - currentBoard = np.array([ - currentBoard[20], currentBoard[15],currentBoard[10], currentBoard[5],currentBoard[0] - , currentBoard[21], currentBoard[16],currentBoard[11], currentBoard[6],currentBoard[1] - , currentBoard[22], currentBoard[17],currentBoard[12], currentBoard[7],currentBoard[2] - , currentBoard[23], currentBoard[18],currentBoard[13], currentBoard[8],currentBoard[3] - , currentBoard[24], currentBoard[19],currentBoard[14], currentBoard[9],currentBoard[4] - ]) - - currentAV = np.array([ - currentAV[20], currentAV[15],currentAV[10], currentAV[5],currentAV[0] - , currentAV[21], currentAV[16],currentAV[11], currentAV[6],currentAV[1] - , currentAV[22], currentAV[17],currentAV[12], currentAV[7],currentAV[2] - , currentAV[23], currentAV[18],currentAV[13], currentAV[8],currentAV[3] - , currentAV[24], currentAV[19],currentAV[14], currentAV[9],currentAV[4] - - ]) - - identities.append((GameState(currentBoard, state.playerTurn), currentAV)) - - currentBoard = np.array([ - currentBoard[4], currentBoard[3],currentBoard[2], currentBoard[1],currentBoard[0] - , currentBoard[9], currentBoard[8],currentBoard[7], currentBoard[6],currentBoard[5] - , currentBoard[14], currentBoard[13],currentBoard[12], currentBoard[11],currentBoard[10] - , currentBoard[19], currentBoard[18],currentBoard[17], currentBoard[16],currentBoard[15] - , currentBoard[24], currentBoard[23],currentBoard[22], currentBoard[21],currentBoard[20] - ]) - - currentAV = np.array([ - currentAV[4], currentAV[3],currentAV[2], currentAV[1],currentAV[0] - , currentAV[9], currentAV[8],currentAV[7], currentAV[6],currentAV[5] - , currentAV[14], currentAV[13],currentAV[12], currentAV[11],currentAV[10] - , currentAV[19], currentAV[18],currentAV[17], currentAV[16],currentAV[15] - , currentAV[24], currentAV[23],currentAV[22], currentAV[21],currentAV[20] - - ]) - - for n in range(5): - currentBoard = np.array([ - currentBoard[20], currentBoard[15],currentBoard[10], currentBoard[5],currentBoard[0] - , currentBoard[21], currentBoard[16],currentBoard[11], currentBoard[6],currentBoard[1] - , currentBoard[22], currentBoard[17],currentBoard[12], currentBoard[7],currentBoard[2] - , currentBoard[23], currentBoard[18],currentBoard[13], currentBoard[8],currentBoard[3] - , currentBoard[24], currentBoard[19],currentBoard[14], currentBoard[9],currentBoard[4] - ]) - - currentAV = np.array([ - currentAV[20], currentAV[15],currentAV[10], currentAV[5],currentAV[0] - , currentAV[21], currentAV[16],currentAV[11], currentAV[6],currentAV[1] - , currentAV[22], currentAV[17],currentAV[12], currentAV[7],currentAV[2] - , currentAV[23], currentAV[18],currentAV[13], currentAV[8],currentAV[3] - , currentAV[24], currentAV[19],currentAV[14], currentAV[9],currentAV[4] - - ]) - - identities.append((GameState(currentBoard, state.playerTurn), currentAV)) - - return identities +class Game: + def __init__(self): + self.currentPlayer = 1 + self.gameState = GameState( + np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=np.int), 1) + self.actionSpace = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + dtype=np.int) + self.pieces = {'1': 'X', '0': '-', '-1': 'O'} + self.grid_shape = (5, 5) + self.input_shape = (2, 5, 5) + self.name = 'metaSquares' + self.state_size = len(self.gameState.binary) + self.action_size = len(self.actionSpace) + + def reset(self): + self.gameState = GameState( + np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=np.int), 1) + self.currentPlayer = 1 + return self.gameState + + def step(self, action): + next_state, value, done = self.gameState.takeAction(action) + self.gameState = next_state + self.currentPlayer = -self.currentPlayer + info = None + return ((next_state, value, done, info)) + + def identities(self, state, actionValues): + identities = [] + currentBoard = state.board + currentAV = actionValues + + for n in range(5): + currentBoard = np.array([ + currentBoard[20], currentBoard[15], currentBoard[10], currentBoard[5], currentBoard[0] + , currentBoard[21], currentBoard[16], currentBoard[11], currentBoard[6], currentBoard[1] + , currentBoard[22], currentBoard[17], currentBoard[12], currentBoard[7], currentBoard[2] + , currentBoard[23], currentBoard[18], currentBoard[13], currentBoard[8], currentBoard[3] + , currentBoard[24], currentBoard[19], currentBoard[14], currentBoard[9], currentBoard[4] + ]) + + currentAV = np.array([ + currentAV[20], currentAV[15], currentAV[10], currentAV[5], currentAV[0] + , currentAV[21], currentAV[16], currentAV[11], currentAV[6], currentAV[1] + , currentAV[22], currentAV[17], currentAV[12], currentAV[7], currentAV[2] + , currentAV[23], currentAV[18], currentAV[13], currentAV[8], currentAV[3] + , currentAV[24], currentAV[19], currentAV[14], currentAV[9], currentAV[4] + + ]) + + identities.append((GameState(currentBoard, state.playerTurn), currentAV)) + + currentBoard = np.array([ + currentBoard[4], currentBoard[3], currentBoard[2], currentBoard[1], currentBoard[0] + , currentBoard[9], currentBoard[8], currentBoard[7], currentBoard[6], currentBoard[5] + , currentBoard[14], currentBoard[13], currentBoard[12], currentBoard[11], currentBoard[10] + , currentBoard[19], currentBoard[18], currentBoard[17], currentBoard[16], currentBoard[15] + , currentBoard[24], currentBoard[23], currentBoard[22], currentBoard[21], currentBoard[20] + ]) + + currentAV = np.array([ + currentAV[4], currentAV[3], currentAV[2], currentAV[1], currentAV[0] + , currentAV[9], currentAV[8], currentAV[7], currentAV[6], currentAV[5] + , currentAV[14], currentAV[13], currentAV[12], currentAV[11], currentAV[10] + , currentAV[19], currentAV[18], currentAV[17], currentAV[16], currentAV[15] + , currentAV[24], currentAV[23], currentAV[22], currentAV[21], currentAV[20] + + ]) + + for n in range(5): + currentBoard = np.array([ + currentBoard[20], currentBoard[15], currentBoard[10], currentBoard[5], currentBoard[0] + , currentBoard[21], currentBoard[16], currentBoard[11], currentBoard[6], currentBoard[1] + , currentBoard[22], currentBoard[17], currentBoard[12], currentBoard[7], currentBoard[2] + , currentBoard[23], currentBoard[18], currentBoard[13], currentBoard[8], currentBoard[3] + , currentBoard[24], currentBoard[19], currentBoard[14], currentBoard[9], currentBoard[4] + ]) + + currentAV = np.array([ + currentAV[20], currentAV[15], currentAV[10], currentAV[5], currentAV[0] + , currentAV[21], currentAV[16], currentAV[11], currentAV[6], currentAV[1] + , currentAV[22], currentAV[17], currentAV[12], currentAV[7], currentAV[2] + , currentAV[23], currentAV[18], currentAV[13], currentAV[8], currentAV[3] + , currentAV[24], currentAV[19], currentAV[14], currentAV[9], currentAV[4] + + ]) + + identities.append((GameState(currentBoard, state.playerTurn), currentAV)) + + return identities class GameState(): - def __init__(self, board, playerTurn): - self.board = board - self.pieces = {'1':'X', '0': '-', '-1':'O'} - self.winners = [ - {'points': 1, 'tiles' : [ - [0,1,5,6] - ,[1,2,6,7] - ,[2,3,7,8] - ,[3,4,8,9] - ,[5,6,10,11] - ,[6,7,11,12] - ,[7,8,12,13] - ,[8,9,13,14] - ,[10,11,15,16] - ,[11,12,16,17] - ,[12,13,17,18] - ,[13,14,18,19] - ,[15,16,20,21] - ,[16,17,21,22] - ,[17,18,22,23] - ,[18,19,23,24] - ]}, - {'points': 2, 'tiles' : [ - [1,5,7,11] - ,[2,6,8,12] - ,[3,7,9,13] - ,[6,10,12,16] - ,[7,11,13,17] - ,[8,12,14,18] - ,[11,15,17,21] - ,[12,16,18,22] - ,[13,17,19,23] - ]}, - {'points': 4, 'tiles' : [ - [0,2,10,12] - ,[1,3,11,13] - ,[2,4,12,14] - ,[5,7,15,17] - ,[6,8,16,18] - ,[7,9,17,19] - ,[10,12,20,22] - ,[11,13,21,23] - ,[12,14,22,24] - ]}, - {'points': 5, 'tiles' : [ - [1,10,8,17] - ,[6,15,13,22] - ,[2,11,9,18] - ,[7,16,14,23] - ,[2,5,13,16] - ,[7,10,18,21] - ,[3,6,14,17] - ,[8,11,19,22] - ]}, - {'points': 8, 'tiles' : [ - [2,10,14,22] - ]}, - {'points': 9, 'tiles' : [ - [0,3,15,18] - ,[1,4,16,19] - ,[5,8,20,23] - ,[6,9,21,24] - ]}, - {'points': 10, 'tiles' : [ - [1,9,23,15] - ,[5,3,19,21] - ]}, - {'points': 16, 'tiles' : [ - [0,4,20,24] - ]}, - ] - self.playerTurn = playerTurn - self.binary = self._binary() - self.id = self._convertStateToId() - self.allowedActions = self._allowedActions() - self.isEndGame = self._checkForEndGame() - self.value = self._getValue() - self.score = self._getScore() - - def _allowedActions(self): - return np.where(self.board == 0)[0] - - def _binary(self): - - currentplayer_position = np.zeros(len(self.board), dtype=np.int) - currentplayer_position[self.board==self.playerTurn] = 1 - - other_position = np.zeros(len(self.board), dtype=np.int) - other_position[self.board==-self.playerTurn] = 1 - - position = np.append(currentplayer_position,other_position) - - return (position) - - def _convertStateToId(self): - player1_position = np.zeros(len(self.board), dtype=np.int) - player1_position[self.board==1] = 1 - - other_position = np.zeros(len(self.board), dtype=np.int) - other_position[self.board==-1] = 1 - - position = np.append(player1_position,other_position) - - id = ''.join(map(str,position)) - - return id - - - - - def _checkForEndGame(self): - if np.count_nonzero(self.board) == 24: - return 1 - return 0 - - def _getValue(self): - currentPlayerPoints = 0 - for squareType in self.winners: - points = squareType['points'] - for tiles in squareType['tiles']: - checkFlag = 0 - tilenum = 0 - while tilenum < 4 and checkFlag == 0: - if self.board[tiles[tilenum]] != self.playerTurn: - checkFlag = 1 - tilenum = tilenum + 1 - if checkFlag == 0: - currentPlayerPoints = currentPlayerPoints + points - - opponentPlayerPoints = 0 - for squareType in self.winners: - points = squareType['points'] - for tiles in squareType['tiles']: - checkFlag = 0 - tilenum = 0 - while tilenum < 4 and checkFlag == 0: - if self.board[tiles[tilenum]] != -self.playerTurn: - checkFlag = 1 - tilenum = tilenum + 1 - if checkFlag == 0: - opponentPlayerPoints = opponentPlayerPoints + points - - if currentPlayerPoints > opponentPlayerPoints: - return (1, currentPlayerPoints, opponentPlayerPoints) - elif currentPlayerPoints < opponentPlayerPoints: - return (-1, currentPlayerPoints, opponentPlayerPoints) - else: - return (0, currentPlayerPoints, opponentPlayerPoints) - - - def _getScore(self): - tmp = self.value - return (tmp[1], tmp[2]) - - def takeAction(self, action): - newBoard = np.array(self.board) - newBoard[action] = self.playerTurn - newState = GameState(newBoard, -self.playerTurn) - - value = 0 - done = 0 - - if newState.isEndGame: - value = newState.value[0] - done = 1 - - return (newState, value, done) - - def render(self, logger): - for r in range(5): - logger.info([self.pieces[str(x)] for x in self.board[5*r : (5*r + 5)]]) - logger.info('--------------') - - - + def __init__(self, board, playerTurn): + self.board = board + self.pieces = {'1': 'X', '0': '-', '-1': 'O'} + self.winners = [ + {'points': 1, 'tiles': [ + [0, 1, 5, 6] + , [1, 2, 6, 7] + , [2, 3, 7, 8] + , [3, 4, 8, 9] + , [5, 6, 10, 11] + , [6, 7, 11, 12] + , [7, 8, 12, 13] + , [8, 9, 13, 14] + , [10, 11, 15, 16] + , [11, 12, 16, 17] + , [12, 13, 17, 18] + , [13, 14, 18, 19] + , [15, 16, 20, 21] + , [16, 17, 21, 22] + , [17, 18, 22, 23] + , [18, 19, 23, 24] + ]}, + {'points': 2, 'tiles': [ + [1, 5, 7, 11] + , [2, 6, 8, 12] + , [3, 7, 9, 13] + , [6, 10, 12, 16] + , [7, 11, 13, 17] + , [8, 12, 14, 18] + , [11, 15, 17, 21] + , [12, 16, 18, 22] + , [13, 17, 19, 23] + ]}, + {'points': 4, 'tiles': [ + [0, 2, 10, 12] + , [1, 3, 11, 13] + , [2, 4, 12, 14] + , [5, 7, 15, 17] + , [6, 8, 16, 18] + , [7, 9, 17, 19] + , [10, 12, 20, 22] + , [11, 13, 21, 23] + , [12, 14, 22, 24] + ]}, + {'points': 5, 'tiles': [ + [1, 10, 8, 17] + , [6, 15, 13, 22] + , [2, 11, 9, 18] + , [7, 16, 14, 23] + , [2, 5, 13, 16] + , [7, 10, 18, 21] + , [3, 6, 14, 17] + , [8, 11, 19, 22] + ]}, + {'points': 8, 'tiles': [ + [2, 10, 14, 22] + ]}, + {'points': 9, 'tiles': [ + [0, 3, 15, 18] + , [1, 4, 16, 19] + , [5, 8, 20, 23] + , [6, 9, 21, 24] + ]}, + {'points': 10, 'tiles': [ + [1, 9, 23, 15] + , [5, 3, 19, 21] + ]}, + {'points': 16, 'tiles': [ + [0, 4, 20, 24] + ]}, + ] + self.playerTurn = playerTurn + self.binary = self._binary() + self.id = self._convertStateToId() + self.allowedActions = self._allowedActions() + self.isEndGame = self._checkForEndGame() + self.value = self._getValue() + self.score = self._getScore() + + def _allowedActions(self): + return np.where(self.board == 0)[0] + + def _binary(self): + + currentplayer_position = np.zeros(len(self.board), dtype=np.int) + currentplayer_position[self.board == self.playerTurn] = 1 + + other_position = np.zeros(len(self.board), dtype=np.int) + other_position[self.board == -self.playerTurn] = 1 + + position = np.append(currentplayer_position, other_position) + + return (position) + + def _convertStateToId(self): + player1_position = np.zeros(len(self.board), dtype=np.int) + player1_position[self.board == 1] = 1 + + other_position = np.zeros(len(self.board), dtype=np.int) + other_position[self.board == -1] = 1 + + position = np.append(player1_position, other_position) + + id = ''.join(map(str, position)) + + return id + + def _checkForEndGame(self): + if np.count_nonzero(self.board) == 24: + return 1 + return 0 + + def _getValue(self): + currentPlayerPoints = 0 + for squareType in self.winners: + points = squareType['points'] + for tiles in squareType['tiles']: + checkFlag = 0 + tilenum = 0 + while tilenum < 4 and checkFlag == 0: + if self.board[tiles[tilenum]] != self.playerTurn: + checkFlag = 1 + tilenum = tilenum + 1 + if checkFlag == 0: + currentPlayerPoints = currentPlayerPoints + points + + opponentPlayerPoints = 0 + for squareType in self.winners: + points = squareType['points'] + for tiles in squareType['tiles']: + checkFlag = 0 + tilenum = 0 + while tilenum < 4 and checkFlag == 0: + if self.board[tiles[tilenum]] != -self.playerTurn: + checkFlag = 1 + tilenum = tilenum + 1 + if checkFlag == 0: + opponentPlayerPoints = opponentPlayerPoints + points + + if currentPlayerPoints > opponentPlayerPoints: + return (1, currentPlayerPoints, opponentPlayerPoints) + elif currentPlayerPoints < opponentPlayerPoints: + return (-1, currentPlayerPoints, opponentPlayerPoints) + else: + return (0, currentPlayerPoints, opponentPlayerPoints) + + def _getScore(self): + tmp = self.value + return (tmp[1], tmp[2]) + + def takeAction(self, action): + newBoard = np.array(self.board) + newBoard[action] = self.playerTurn + newState = GameState(newBoard, -self.playerTurn) + + value = 0 + done = 0 + + if newState.isEndGame: + value = newState.value[0] + done = 1 + + return (newState, value, done) + + def render(self, logger): + for r in range(5): + logger.info([self.pieces[str(x)] for x in self.board[5 * r: (5 * r + 5)]]) + logger.info('--------------') diff --git a/initialise.py b/initialise.py index 88126380..28feb7e1 100644 --- a/initialise.py +++ b/initialise.py @@ -1,3 +1,3 @@ INITIAL_RUN_NUMBER = None INITIAL_MODEL_VERSION = None -INITIAL_MEMORY_VERSION = None +INITIAL_MEMORY_VERSION = None diff --git a/loggers.py b/loggers.py index 901cc3d8..18f7c481 100644 --- a/loggers.py +++ b/loggers.py @@ -1,4 +1,3 @@ - from utils import setup_logger from settings import run_folder @@ -6,12 +5,11 @@ ### WARNING: the mcts log file gets big quite quickly LOGGER_DISABLED = { -'main':False -, 'memory':False -, 'tourney':False -, 'mcts':False -, 'model': False} - + 'main': False + , 'memory': False + , 'tourney': False + , 'mcts': False + , 'model': False} logger_mcts = setup_logger('logger_mcts', run_folder + 'logs/logger_mcts.log') logger_mcts.disabled = LOGGER_DISABLED['mcts'] @@ -27,4 +25,3 @@ logger_model = setup_logger('logger_model', run_folder + 'logs/logger_model.log') logger_model.disabled = LOGGER_DISABLED['model'] - \ No newline at end of file diff --git a/loss.py b/loss.py index 44035040..cef1f3c1 100644 --- a/loss.py +++ b/loss.py @@ -1,19 +1,16 @@ - import tensorflow as tf -def softmax_cross_entropy_with_logits(y_true, y_pred): - - p = y_pred - pi = y_true - zero = tf.zeros(shape = tf.shape(pi), dtype=tf.float32) - where = tf.equal(pi, zero) - - negatives = tf.fill(tf.shape(pi), -100.0) - p = tf.where(where, negatives, p) +def softmax_cross_entropy_with_logits(y_true, y_pred): + p = y_pred + pi = y_true - loss = tf.nn.softmax_cross_entropy_with_logits(labels = pi, logits = p) + zero = tf.zeros(shape=tf.shape(pi), dtype=tf.float32) + where = tf.equal(pi, zero) - return loss + negatives = tf.fill(tf.shape(pi), -100.0) + p = tf.where(where, negatives, p) + loss = tf.nn.softmax_cross_entropy_with_logits(labels=pi, logits=p) + return loss diff --git a/main.py b/main.py index 3a498d7a..bed8c668 100644 --- a/main.py +++ b/main.py @@ -2,13 +2,13 @@ # %matplotlib inline import numpy as np + np.set_printoptions(suppress=True) from shutil import copyfile import random from importlib import reload - from keras.utils import plot_model from game import Game, GameState @@ -23,7 +23,6 @@ import initialise import pickle - lg.logger_main.info('=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*') lg.logger_main.info('=*=*=*=*=*=. NEW LOG =*=*=*=*=*') lg.logger_main.info('=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*') @@ -32,7 +31,8 @@ # If loading an existing neural network, copy the config file to root if initialise.INITIAL_RUN_NUMBER != None: - copyfile(run_archive_folder + env.name + '/run' + str(initialise.INITIAL_RUN_NUMBER).zfill(4) + '/config.py', './config.py') + copyfile(run_archive_folder + env.name + '/run' + str(initialise.INITIAL_RUN_NUMBER).zfill(4) + '/config.py', + './config.py') import config @@ -42,29 +42,33 @@ memory = Memory(config.MEMORY_SIZE) else: print('LOADING MEMORY VERSION ' + str(initialise.INITIAL_MEMORY_VERSION) + '...') - memory = pickle.load( open( run_archive_folder + env.name + '/run' + str(initialise.INITIAL_RUN_NUMBER).zfill(4) + "/memory/memory" + str(initialise.INITIAL_MEMORY_VERSION).zfill(4) + ".p", "rb" ) ) + memory = pickle.load(open( + run_archive_folder + env.name + '/run' + str(initialise.INITIAL_RUN_NUMBER).zfill(4) + "/memory/memory" + str( + initialise.INITIAL_MEMORY_VERSION).zfill(4) + ".p", "rb")) ######## LOAD MODEL IF NECESSARY ######## # create an untrained neural network objects from the config file -current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2,) + env.grid_shape, env.action_size, config.HIDDEN_CNN_LAYERS) -best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2,) + env.grid_shape, env.action_size, config.HIDDEN_CNN_LAYERS) +current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2,) + env.grid_shape, env.action_size, + config.HIDDEN_CNN_LAYERS) +best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2,) + env.grid_shape, env.action_size, + config.HIDDEN_CNN_LAYERS) -#If loading an existing neural netwrok, set the weights from that model +# If loading an existing neural netwrok, set the weights from that model if initialise.INITIAL_MODEL_VERSION != None: - best_player_version = initialise.INITIAL_MODEL_VERSION + best_player_version = initialise.INITIAL_MODEL_VERSION print('LOADING MODEL VERSION ' + str(initialise.INITIAL_MODEL_VERSION) + '...') m_tmp = best_NN.read(env.name, initialise.INITIAL_RUN_NUMBER, best_player_version) current_NN.model.set_weights(m_tmp.get_weights()) best_NN.model.set_weights(m_tmp.get_weights()) -#otherwise just ensure the weights on the two players are the same +# otherwise just ensure the weights on the two players are the same else: best_player_version = 0 best_NN.model.set_weights(current_NN.model.get_weights()) -#copy the config file to the run folder +# copy the config file to the run folder copyfile('./config.py', run_folder + 'config.py') -plot_model(current_NN.model, to_file=run_folder + 'models/model.png', show_shapes = True) +plot_model(current_NN.model, to_file=run_folder + 'models/model.png', show_shapes=True) print('\n') @@ -72,7 +76,7 @@ current_player = Agent('current_player', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, current_NN) best_player = Agent('best_player', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, best_NN) -#user_player = User('player1', env.state_size, env.action_size) +# user_player = User('player1', env.state_size, env.action_size) iteration = 0 while 1: @@ -80,19 +84,20 @@ iteration += 1 reload(lg) reload(config) - + print('ITERATION NUMBER ' + str(iteration)) - + lg.logger_main.info('BEST PLAYER VERSION: %d', best_player_version) print('BEST PLAYER VERSION ' + str(best_player_version)) ######## SELF PLAY ######## print('SELF PLAYING ' + str(config.EPISODES) + ' EPISODES...') - _, memory, _, _ = playMatches(best_player, best_player, config.EPISODES, lg.logger_main, turns_until_tau0 = config.TURNS_UNTIL_TAU0, memory = memory) + _, memory, _, _ = playMatches(best_player, best_player, config.EPISODES, lg.logger_main, + turns_until_tau0=config.TURNS_UNTIL_TAU0, memory=memory) print('\n') - + memory.clear_stmemory() - + if len(memory.ltmemory) >= config.MEMORY_SIZE: ######## RETRAINING ######## @@ -101,14 +106,14 @@ print('') if iteration % 5 == 0: - pickle.dump( memory, open( run_folder + "memory/memory" + str(iteration).zfill(4) + ".p", "wb" ) ) + pickle.dump(memory, open(run_folder + "memory/memory" + str(iteration).zfill(4) + ".p", "wb")) lg.logger_memory.info('====================') lg.logger_memory.info('NEW MEMORIES') lg.logger_memory.info('====================') - + memory_samp = random.sample(memory.ltmemory, min(1000, len(memory.ltmemory))) - + for s in memory_samp: current_value, current_probs, _ = current_player.get_preds(s['state']) best_value, best_probs, _ = best_player.get_preds(s['state']) @@ -116,22 +121,23 @@ lg.logger_memory.info('MCTS VALUE FOR %s: %f', s['playerTurn'], s['value']) lg.logger_memory.info('CUR PRED VALUE FOR %s: %f', s['playerTurn'], current_value) lg.logger_memory.info('BES PRED VALUE FOR %s: %f', s['playerTurn'], best_value) - lg.logger_memory.info('THE MCTS ACTION VALUES: %s', ['%.2f' % elem for elem in s['AV']] ) - lg.logger_memory.info('CUR PRED ACTION VALUES: %s', ['%.2f' % elem for elem in current_probs]) - lg.logger_memory.info('BES PRED ACTION VALUES: %s', ['%.2f' % elem for elem in best_probs]) + lg.logger_memory.info('THE MCTS ACTION VALUES: %s', ['%.2f' % elem for elem in s['AV']]) + lg.logger_memory.info('CUR PRED ACTION VALUES: %s', ['%.2f' % elem for elem in current_probs]) + lg.logger_memory.info('BES PRED ACTION VALUES: %s', ['%.2f' % elem for elem in best_probs]) lg.logger_memory.info('ID: %s', s['state'].id) lg.logger_memory.info('INPUT TO MODEL: %s', current_player.model.convertToModelInput(s['state'])) s['state'].render(lg.logger_memory) - + ######## TOURNAMENT ######## print('TOURNAMENT...') - scores, _, points, sp_scores = playMatches(best_player, current_player, config.EVAL_EPISODES, lg.logger_tourney, turns_until_tau0 = 0, memory = None) + scores, _, points, sp_scores = playMatches(best_player, current_player, config.EVAL_EPISODES, lg.logger_tourney, + turns_until_tau0=0, memory=None) print('\nSCORES') print(scores) print('\nSTARTING PLAYER / NON-STARTING PLAYER SCORES') print(sp_scores) - #print(points) + # print(points) print('\n\n') diff --git a/memory.py b/memory.py index 856e0f53..5905f704 100644 --- a/memory.py +++ b/memory.py @@ -3,27 +3,27 @@ import config + class Memory: - def __init__(self, MEMORY_SIZE): - self.MEMORY_SIZE = config.MEMORY_SIZE - self.ltmemory = deque(maxlen=config.MEMORY_SIZE) - self.stmemory = deque(maxlen=config.MEMORY_SIZE) + def __init__(self, MEMORY_SIZE): + self.MEMORY_SIZE = config.MEMORY_SIZE + self.ltmemory = deque(maxlen=config.MEMORY_SIZE) + self.stmemory = deque(maxlen=config.MEMORY_SIZE) - def commit_stmemory(self, identities, state, actionValues): - for r in identities(state, actionValues): - self.stmemory.append({ - 'board': r[0].board - , 'state': r[0] - , 'id': r[0].id - , 'AV': r[1] - , 'playerTurn': r[0].playerTurn - }) + def commit_stmemory(self, identities, state, actionValues): + for r in identities(state, actionValues): + self.stmemory.append({ + 'board': r[0].board + , 'state': r[0] + , 'id': r[0].id + , 'AV': r[1] + , 'playerTurn': r[0].playerTurn + }) - def commit_ltmemory(self): - for i in self.stmemory: - self.ltmemory.append(i) - self.clear_stmemory() + def commit_ltmemory(self): + for i in self.stmemory: + self.ltmemory.append(i) + self.clear_stmemory() - def clear_stmemory(self): - self.stmemory = deque(maxlen=config.MEMORY_SIZE) - \ No newline at end of file + def clear_stmemory(self): + self.stmemory = deque(maxlen=config.MEMORY_SIZE) diff --git a/model.py b/model.py index a98b27e8..d98bcd79 100644 --- a/model.py +++ b/model.py @@ -19,227 +19,229 @@ from settings import run_folder, run_archive_folder + class Gen_Model(): - def __init__(self, reg_const, learning_rate, input_dim, output_dim): - self.reg_const = reg_const - self.learning_rate = learning_rate - self.input_dim = input_dim - self.output_dim = output_dim - - def predict(self, x): - return self.model.predict(x) - - def fit(self, states, targets, epochs, verbose, validation_split, batch_size): - return self.model.fit(states, targets, epochs=epochs, verbose=verbose, validation_split = validation_split, batch_size = batch_size) - - def write(self, game, version): - self.model.save(run_folder + 'models/version' + "{0:0>4}".format(version) + '.h5') - - def read(self, game, run_number, version): - return load_model( run_archive_folder + game + '/run' + str(run_number).zfill(4) + "/models/version" + "{0:0>4}".format(version) + '.h5', custom_objects={'softmax_cross_entropy_with_logits': softmax_cross_entropy_with_logits}) - - def printWeightAverages(self): - layers = self.model.layers - for i, l in enumerate(layers): - try: - x = l.get_weights()[0] - lg.logger_model.info('WEIGHT LAYER %d: ABSAV = %f, SD =%f, ABSMAX =%f, ABSMIN =%f', i, np.mean(np.abs(x)), np.std(x), np.max(np.abs(x)), np.min(np.abs(x))) - except: - pass - lg.logger_model.info('------------------') - for i, l in enumerate(layers): - try: - x = l.get_weights()[1] - lg.logger_model.info('BIAS LAYER %d: ABSAV = %f, SD =%f, ABSMAX =%f, ABSMIN =%f', i, np.mean(np.abs(x)), np.std(x), np.max(np.abs(x)), np.min(np.abs(x))) - except: - pass - lg.logger_model.info('******************') - - - def viewLayers(self): - layers = self.model.layers - for i, l in enumerate(layers): - x = l.get_weights() - print('LAYER ' + str(i)) - - try: - weights = x[0] - s = weights.shape - - fig = plt.figure(figsize=(s[2], s[3])) # width, height in inches - channel = 0 - filter = 0 - for i in range(s[2] * s[3]): - - sub = fig.add_subplot(s[3], s[2], i + 1) - sub.imshow(weights[:,:,channel,filter], cmap='coolwarm', clim=(-1, 1),aspect="auto") - channel = (channel + 1) % s[2] - filter = (filter + 1) % s[3] - - except: - - try: - fig = plt.figure(figsize=(3, len(x))) # width, height in inches - for i in range(len(x)): - sub = fig.add_subplot(len(x), 1, i + 1) - if i == 0: - clim = (0,2) - else: - clim = (0, 2) - sub.imshow([x[i]], cmap='coolwarm', clim=clim,aspect="auto") - - plt.show() - - except: - try: - fig = plt.figure(figsize=(3, 3)) # width, height in inches - sub = fig.add_subplot(1, 1, 1) - sub.imshow(x[0], cmap='coolwarm', clim=(-1, 1),aspect="auto") - - plt.show() - - except: - pass - - plt.show() - - lg.logger_model.info('------------------') + def __init__(self, reg_const, learning_rate, input_dim, output_dim): + self.reg_const = reg_const + self.learning_rate = learning_rate + self.input_dim = input_dim + self.output_dim = output_dim + + def predict(self, x): + return self.model.predict(x) + + def fit(self, states, targets, epochs, verbose, validation_split, batch_size): + return self.model.fit(states, targets, epochs=epochs, verbose=verbose, validation_split=validation_split, + batch_size=batch_size) + + def write(self, game, version): + self.model.save(run_folder + 'models/version' + "{0:0>4}".format(version) + '.h5') + + def read(self, game, run_number, version): + return load_model( + run_archive_folder + game + '/run' + str(run_number).zfill(4) + "/models/version" + "{0:0>4}".format( + version) + '.h5', + custom_objects={'softmax_cross_entropy_with_logits': softmax_cross_entropy_with_logits}) + + def printWeightAverages(self): + layers = self.model.layers + for i, l in enumerate(layers): + try: + x = l.get_weights()[0] + lg.logger_model.info('WEIGHT LAYER %d: ABSAV = %f, SD =%f, ABSMAX =%f, ABSMIN =%f', i, + np.mean(np.abs(x)), np.std(x), np.max(np.abs(x)), np.min(np.abs(x))) + except: + pass + lg.logger_model.info('------------------') + for i, l in enumerate(layers): + try: + x = l.get_weights()[1] + lg.logger_model.info('BIAS LAYER %d: ABSAV = %f, SD =%f, ABSMAX =%f, ABSMIN =%f', i, np.mean(np.abs(x)), + np.std(x), np.max(np.abs(x)), np.min(np.abs(x))) + except: + pass + lg.logger_model.info('******************') + + def viewLayers(self): + layers = self.model.layers + for i, l in enumerate(layers): + x = l.get_weights() + print('LAYER ' + str(i)) + + try: + weights = x[0] + s = weights.shape + + fig = plt.figure(figsize=(s[2], s[3])) # width, height in inches + channel = 0 + filter = 0 + for i in range(s[2] * s[3]): + sub = fig.add_subplot(s[3], s[2], i + 1) + sub.imshow(weights[:, :, channel, filter], cmap='coolwarm', clim=(-1, 1), aspect="auto") + channel = (channel + 1) % s[2] + filter = (filter + 1) % s[3] + + except: + + try: + fig = plt.figure(figsize=(3, len(x))) # width, height in inches + for i in range(len(x)): + sub = fig.add_subplot(len(x), 1, i + 1) + if i == 0: + clim = (0, 2) + else: + clim = (0, 2) + sub.imshow([x[i]], cmap='coolwarm', clim=clim, aspect="auto") + + plt.show() + + except: + try: + fig = plt.figure(figsize=(3, 3)) # width, height in inches + sub = fig.add_subplot(1, 1, 1) + sub.imshow(x[0], cmap='coolwarm', clim=(-1, 1), aspect="auto") + + plt.show() + + except: + pass + + plt.show() + + lg.logger_model.info('------------------') class Residual_CNN(Gen_Model): - def __init__(self, reg_const, learning_rate, input_dim, output_dim, hidden_layers): - Gen_Model.__init__(self, reg_const, learning_rate, input_dim, output_dim) - self.hidden_layers = hidden_layers - self.num_layers = len(hidden_layers) - self.model = self._build_model() - - def residual_layer(self, input_block, filters, kernel_size): - - x = self.conv_layer(input_block, filters, kernel_size) - - x = Conv2D( - filters = filters - , kernel_size = kernel_size - , data_format="channels_first" - , padding = 'same' - , use_bias=False - , activation='linear' - , kernel_regularizer = regularizers.l2(self.reg_const) - )(x) - - x = BatchNormalization(axis=1)(x) - - x = add([input_block, x]) - - x = LeakyReLU()(x) + def __init__(self, reg_const, learning_rate, input_dim, output_dim, hidden_layers): + Gen_Model.__init__(self, reg_const, learning_rate, input_dim, output_dim) + self.hidden_layers = hidden_layers + self.num_layers = len(hidden_layers) + self.model = self._build_model() + + def residual_layer(self, input_block, filters, kernel_size): + + x = self.conv_layer(input_block, filters, kernel_size) - return (x) + x = Conv2D( + filters=filters + , kernel_size=kernel_size + , data_format="channels_first" + , padding='same' + , use_bias=False + , activation='linear' + , kernel_regularizer=regularizers.l2(self.reg_const) + )(x) - def conv_layer(self, x, filters, kernel_size): + x = BatchNormalization(axis=1)(x) - x = Conv2D( - filters = filters - , kernel_size = kernel_size - , data_format="channels_first" - , padding = 'same' - , use_bias=False - , activation='linear' - , kernel_regularizer = regularizers.l2(self.reg_const) - )(x) + x = add([input_block, x]) - x = BatchNormalization(axis=1)(x) - x = LeakyReLU()(x) + x = LeakyReLU()(x) - return (x) + return (x) - def value_head(self, x): + def conv_layer(self, x, filters, kernel_size): - x = Conv2D( - filters = 1 - , kernel_size = (1,1) - , data_format="channels_first" - , padding = 'same' - , use_bias=False - , activation='linear' - , kernel_regularizer = regularizers.l2(self.reg_const) - )(x) + x = Conv2D( + filters=filters + , kernel_size=kernel_size + , data_format="channels_first" + , padding='same' + , use_bias=False + , activation='linear' + , kernel_regularizer=regularizers.l2(self.reg_const) + )(x) + x = BatchNormalization(axis=1)(x) + x = LeakyReLU()(x) - x = BatchNormalization(axis=1)(x) - x = LeakyReLU()(x) + return (x) - x = Flatten()(x) + def value_head(self, x): - x = Dense( - 20 - , use_bias=False - , activation='linear' - , kernel_regularizer=regularizers.l2(self.reg_const) - )(x) + x = Conv2D( + filters=1 + , kernel_size=(1, 1) + , data_format="channels_first" + , padding='same' + , use_bias=False + , activation='linear' + , kernel_regularizer=regularizers.l2(self.reg_const) + )(x) - x = LeakyReLU()(x) + x = BatchNormalization(axis=1)(x) + x = LeakyReLU()(x) - x = Dense( - 1 - , use_bias=False - , activation='tanh' - , kernel_regularizer=regularizers.l2(self.reg_const) - , name = 'value_head' - )(x) + x = Flatten()(x) + x = Dense( + 20 + , use_bias=False + , activation='linear' + , kernel_regularizer=regularizers.l2(self.reg_const) + )(x) + x = LeakyReLU()(x) - return (x) + x = Dense( + 1 + , use_bias=False + , activation='tanh' + , kernel_regularizer=regularizers.l2(self.reg_const) + , name='value_head' + )(x) - def policy_head(self, x): + return (x) - x = Conv2D( - filters = 2 - , kernel_size = (1,1) - , data_format="channels_first" - , padding = 'same' - , use_bias=False - , activation='linear' - , kernel_regularizer = regularizers.l2(self.reg_const) - )(x) + def policy_head(self, x): - x = BatchNormalization(axis=1)(x) - x = LeakyReLU()(x) + x = Conv2D( + filters=2 + , kernel_size=(1, 1) + , data_format="channels_first" + , padding='same' + , use_bias=False + , activation='linear' + , kernel_regularizer=regularizers.l2(self.reg_const) + )(x) - x = Flatten()(x) + x = BatchNormalization(axis=1)(x) + x = LeakyReLU()(x) - x = Dense( - self.output_dim - , use_bias=False - , activation='linear' - , kernel_regularizer=regularizers.l2(self.reg_const) - , name = 'policy_head' - )(x) + x = Flatten()(x) - return (x) + x = Dense( + self.output_dim + , use_bias=False + , activation='linear' + , kernel_regularizer=regularizers.l2(self.reg_const) + , name='policy_head' + )(x) - def _build_model(self): + return (x) - main_input = Input(shape = self.input_dim, name = 'main_input') + def _build_model(self): - x = self.conv_layer(main_input, self.hidden_layers[0]['filters'], self.hidden_layers[0]['kernel_size']) + main_input = Input(shape=self.input_dim, name='main_input') - if len(self.hidden_layers) > 1: - for h in self.hidden_layers[1:]: - x = self.residual_layer(x, h['filters'], h['kernel_size']) + x = self.conv_layer(main_input, self.hidden_layers[0]['filters'], self.hidden_layers[0]['kernel_size']) - vh = self.value_head(x) - ph = self.policy_head(x) + if len(self.hidden_layers) > 1: + for h in self.hidden_layers[1:]: + x = self.residual_layer(x, h['filters'], h['kernel_size']) - model = Model(inputs=[main_input], outputs=[vh, ph]) - model.compile(loss={'value_head': 'mean_squared_error', 'policy_head': softmax_cross_entropy_with_logits}, - optimizer=SGD(lr=self.learning_rate, momentum = config.MOMENTUM), - loss_weights={'value_head': 0.5, 'policy_head': 0.5} - ) + vh = self.value_head(x) + ph = self.policy_head(x) - return model + model = Model(inputs=[main_input], outputs=[vh, ph]) + model.compile(loss={'value_head': 'mean_squared_error', 'policy_head': softmax_cross_entropy_with_logits}, + optimizer=SGD(lr=self.learning_rate, momentum=config.MOMENTUM), + loss_weights={'value_head': 0.5, 'policy_head': 0.5} + ) - def convertToModelInput(self, state): - inputToModel = state.binary #np.append(state.binary, [(state.playerTurn + 1)/2] * self.input_dim[1] * self.input_dim[2]) - inputToModel = np.reshape(inputToModel, self.input_dim) - return (inputToModel) + return model + + def convertToModelInput(self, state): + inputToModel = state.binary # np.append(state.binary, [(state.playerTurn + 1)/2] * self.input_dim[1] * self.input_dim[2]) + inputToModel = np.reshape(inputToModel, self.input_dim) + return (inputToModel) diff --git a/run.ipynb b/run.ipynb index d8f42f46..68e47482 100644 --- a/run.ipynb +++ b/run.ipynb @@ -90,7 +90,6 @@ { "ename": "KeyboardInterrupt", "evalue": "", - "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", @@ -102,7 +101,8 @@ "\u001b[0;32m~/Git/ADSP/Public/AppliedDataSciencePartners/DeepReinforcementLearning/app/game.py\u001b[0m in \u001b[0;36mtakeAction\u001b[0;34m(self, action)\u001b[0m\n\u001b[1;32m 209\u001b[0m \u001b[0mnewBoard\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0maction\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplayerTurn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 210\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 211\u001b[0;31m \u001b[0mnewState\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mGameState\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnewBoard\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m-\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplayerTurn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 212\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 213\u001b[0m \u001b[0mvalue\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/Git/ADSP/Public/AppliedDataSciencePartners/DeepReinforcementLearning/app/game.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, board, playerTurn)\u001b[0m\n\u001b[1;32m 121\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;36m20\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m26\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m32\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m38\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 122\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 123\u001b[0;31m \u001b[0;34m[\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m11\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m19\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m27\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 124\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m18\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m26\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 125\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m18\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m26\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m34\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] + ], + "output_type": "error" } ], "source": [ @@ -258,7 +258,6 @@ { "ename": "KeyboardInterrupt", "evalue": "", - "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", @@ -279,7 +278,8 @@ "\u001b[0;32m~/.virtualenvs/py3_deepreinforcement/lib/python3.6/site-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36m_do_call\u001b[0;34m(self, fn, *args)\u001b[0m\n\u001b[1;32m 1359\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_do_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1360\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1361\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1362\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mOpError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1363\u001b[0m \u001b[0mmessage\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcompat\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mas_text\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmessage\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.virtualenvs/py3_deepreinforcement/lib/python3.6/site-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36m_run_fn\u001b[0;34m(session, feed_dict, fetch_list, target_list, options, run_metadata)\u001b[0m\n\u001b[1;32m 1338\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1339\u001b[0m return tf_session.TF_Run(session, options, feed_dict, fetch_list,\n\u001b[0;32m-> 1340\u001b[0;31m target_list, status, run_metadata)\n\u001b[0m\u001b[1;32m 1341\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1342\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_prun_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msession\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhandle\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfeed_dict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfetch_list\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] + ], + "output_type": "error" } ], "source": [ @@ -306,7 +306,6 @@ { "ename": "ValueError", "evalue": "cannot reshape array of size 84 into shape (2,5,5)", - "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", @@ -316,7 +315,8 @@ "\u001b[0;32m~/.virtualenvs/py3_deepreinforcement/lib/python3.6/site-packages/numpy/core/fromnumeric.py\u001b[0m in \u001b[0;36mreshape\u001b[0;34m(a, newshape, order)\u001b[0m\n\u001b[1;32m 255\u001b[0m [5, 6]])\n\u001b[1;32m 256\u001b[0m \"\"\"\n\u001b[0;32m--> 257\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_wrapfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'reshape'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnewshape\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0morder\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 258\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 259\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.virtualenvs/py3_deepreinforcement/lib/python3.6/site-packages/numpy/core/fromnumeric.py\u001b[0m in \u001b[0;36m_wrapfunc\u001b[0;34m(obj, method, *args, **kwds)\u001b[0m\n\u001b[1;32m 50\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_wrapfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 51\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 52\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 53\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 54\u001b[0m \u001b[0;31m# An AttributeError occurs if the object does not have\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mValueError\u001b[0m: cannot reshape array of size 84 into shape (2,5,5)" - ] + ], + "output_type": "error" } ], "source": [ @@ -372,7 +372,6 @@ { "ename": "KeyboardInterrupt", "evalue": "", - "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", @@ -403,7 +402,8 @@ "\u001b[0;32m~/.virtualenvs/py3_deepreinforcement/lib/python3.6/site-packages/matplotlib/ticker.py\u001b[0m in \u001b[0;36mset_locs\u001b[0;34m(self, locs)\u001b[0m\n\u001b[1;32m 664\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_compute_offset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 665\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_set_orderOfMagnitude\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0md\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 666\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_set_format\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvmin\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvmax\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 667\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 668\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_compute_offset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.virtualenvs/py3_deepreinforcement/lib/python3.6/site-packages/matplotlib/ticker.py\u001b[0m in \u001b[0;36m_set_format\u001b[0;34m(self, vmin, vmax)\u001b[0m\n\u001b[1;32m 741\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 742\u001b[0m \u001b[0m_locs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlocs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 743\u001b[0;31m \u001b[0mlocs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0masarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_locs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moffset\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;36m10.\u001b[0m \u001b[0;34m**\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0morderOfMagnitude\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 744\u001b[0m \u001b[0mloc_range\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mptp\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlocs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 745\u001b[0m \u001b[0;31m# Curvilinear coordinates can yield two identical points.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] + ], + "output_type": "error" } ], "source": [ diff --git a/run/config.py b/run/config.py index dd2c4c85..17075c0b 100644 --- a/run/config.py +++ b/run/config.py @@ -2,12 +2,11 @@ EPISODES = 30 MCTS_SIMS = 50 MEMORY_SIZE = 30000 -TURNS_UNTIL_TAU0 = 10 # turn on which it starts playing deterministically +TURNS_UNTIL_TAU0 = 10 # turn on which it starts playing deterministically CPUCT = 1 EPSILON = 0.2 ALPHA = 0.8 - #### RETRAINING BATCH_SIZE = 256 EPOCHS = 1 @@ -17,14 +16,14 @@ TRAINING_LOOPS = 10 HIDDEN_CNN_LAYERS = [ - {'filters':75, 'kernel_size': (4,4)} - , {'filters':75, 'kernel_size': (4,4)} - , {'filters':75, 'kernel_size': (4,4)} - , {'filters':75, 'kernel_size': (4,4)} - , {'filters':75, 'kernel_size': (4,4)} - , {'filters':75, 'kernel_size': (4,4)} - ] + {'filters': 75, 'kernel_size': (4, 4)} + , {'filters': 75, 'kernel_size': (4, 4)} + , {'filters': 75, 'kernel_size': (4, 4)} + , {'filters': 75, 'kernel_size': (4, 4)} + , {'filters': 75, 'kernel_size': (4, 4)} + , {'filters': 75, 'kernel_size': (4, 4)} +] #### EVALUATION EVAL_EPISODES = 20 -SCORING_THRESHOLD = 1.3 \ No newline at end of file +SCORING_THRESHOLD = 1.3 diff --git a/settings.py b/settings.py index 73c70aa3..1a94a43d 100644 --- a/settings.py +++ b/settings.py @@ -1,2 +1,2 @@ run_folder = './run/' -run_archive_folder = './run_archive/' \ No newline at end of file +run_archive_folder = './run_archive/' diff --git a/utils.py b/utils.py index 67c54919..737b040e 100644 --- a/utils.py +++ b/utils.py @@ -1,11 +1,10 @@ - import logging -def setup_logger(name, log_file, level=logging.INFO): +def setup_logger(name, log_file, level=logging.INFO): formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') - handler = logging.FileHandler(log_file) + handler = logging.FileHandler(log_file) handler.setFormatter(formatter) logger = logging.getLogger(name)