From ab38d70ab324caa5d5dd60d055c07df4d9f48526 Mon Sep 17 00:00:00 2001
From: Joseph Greene <jgreenemi@gmail.com>
Date: Fri, 26 Oct 2018 23:21:00 -0700
Subject: [PATCH] Fix .gitignore entries that weren't being parsed. Fix
 inconsistent indenting and whitespace.

---
 .gitignore                | 205 +++++++--------
 MCTS.py                   | 186 +++++++-------
 agent.py                  | 297 +++++++++++-----------
 config.py                 |  19 +-
 funcs.py                  |  74 +++---
 game.py                   | 445 ++++++++++++++++----------------
 games/connect4/game.py    | 448 ++++++++++++++++----------------
 games/metasquares/game.py | 521 +++++++++++++++++++-------------------
 initialise.py             |   2 +-
 loggers.py                |  13 +-
 loss.py                   |  21 +-
 main.py                   |  58 +++--
 memory.py                 |  40 +--
 model.py                  | 382 ++++++++++++++--------------
 run.ipynb                 |  16 +-
 run/config.py             |  19 +-
 settings.py               |   2 +-
 utils.py                  |   5 +-
 18 files changed, 1378 insertions(+), 1375 deletions(-)

diff --git a/.gitignore b/.gitignore
index 2236a276..27c8aef4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,104 +1,107 @@
--# Byte-compiled / optimized / DLL files
- -__pycache__/
- -*.py[cod]
- -*$py.class
- -
- -# C extensions
- -*.so
- -
- -# Distribution / packaging
- -.Python
- -env/
- -build/
- -develop-eggs/
- -dist/
- -downloads/
- -eggs/
- -.eggs/
- -lib/
- -lib64/
- -parts/
- -sdist/
- -var/
- -wheels/
- -*.egg-info/
- -.installed.cfg
- -*.egg
- -
- -# PyInstaller
- -#  Usually these files are written by a python script from a template
- -#  before PyInstaller builds the exe, so as to inject date/other infos into it.
- -*.manifest
- -*.spec
- -
- -# Installer logs
- -pip-log.txt
- -pip-delete-this-directory.txt
- -
- -# Unit test / coverage reports
- -htmlcov/
- -.tox/
- -.coverage
- -.coverage.*
- -.cache
- -nosetests.xml
- -coverage.xml
- -*.cover
- -.hypothesis/
- -
- -# Translations
- -*.mo
- -*.pot
- -
- -# Django stuff:
- -*.log
- -local_settings.py
- -
- -# Flask stuff:
- -instance/
- -.webassets-cache
- -
- -# Scrapy stuff:
- -.scrapy
- -
- -# Sphinx documentation
- -docs/_build/
- -
- -# PyBuilder
- -target/
- -
- -# Jupyter Notebook
- -.ipynb_checkpoints
- -
- -# pyenv
- -.python-version
- -
- -# celery beat schedule file
- -celerybeat-schedule
- -
- -# SageMath parsed files
- -*.sage.py
- -
- -# dotenv
- -.env
- -
- -# virtualenv
- -.venv
- -venv/
- -ENV/
- -
- -# Spyder project settings
- -.spyderproject
- -.spyproject
- -
- -# Rope project settings
- -.ropeproject
- -
- -# mkdocs documentation
- -/site
- -
- -# mypy
- -.mypy_cache/
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# dotenv
+.env
+
+# virtualenv
+.venv
+venv/
+ENV/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+
+# Jetbrains preferences dir
+.idea/
 
 
 analysis/
diff --git a/MCTS.py b/MCTS.py
index 76b18dac..1a2ccfd4 100644
--- a/MCTS.py
+++ b/MCTS.py
@@ -5,130 +5,132 @@
 from utils import setup_logger
 import loggers as lg
 
+
 class Node():
 
-	def __init__(self, state):
-		self.state = state
-		self.playerTurn = state.playerTurn
-		self.id = state.id
-		self.edges = []
+    def __init__(self, state):
+        self.state = state
+        self.playerTurn = state.playerTurn
+        self.id = state.id
+        self.edges = []
+
+    def isLeaf(self):
+        if len(self.edges) > 0:
+            return False
+        else:
+            return True
 
-	def isLeaf(self):
-		if len(self.edges) > 0:
-			return False
-		else:
-			return True
 
 class Edge():
 
-	def __init__(self, inNode, outNode, prior, action):
-		self.id = inNode.state.id + '|' + outNode.state.id
-		self.inNode = inNode
-		self.outNode = outNode
-		self.playerTurn = inNode.state.playerTurn
-		self.action = action
-
-		self.stats =  {
-					'N': 0,
-					'W': 0,
-					'Q': 0,
-					'P': prior,
-				}
-				
+    def __init__(self, inNode, outNode, prior, action):
+        self.id = inNode.state.id + '|' + outNode.state.id
+        self.inNode = inNode
+        self.outNode = outNode
+        self.playerTurn = inNode.state.playerTurn
+        self.action = action
+
+        self.stats = {
+            'N': 0,
+            'W': 0,
+            'Q': 0,
+            'P': prior,
+        }
+
 
 class MCTS():
 
-	def __init__(self, root, cpuct):
-		self.root = root
-		self.tree = {}
-		self.cpuct = cpuct
-		self.addNode(root)
-	
-	def __len__(self):
-		return len(self.tree)
+    def __init__(self, root, cpuct):
+        self.root = root
+        self.tree = {}
+        self.cpuct = cpuct
+        self.addNode(root)
 
-	def moveToLeaf(self):
+    def __len__(self):
+        return len(self.tree)
 
-		lg.logger_mcts.info('------MOVING TO LEAF------')
+    def moveToLeaf(self):
 
-		breadcrumbs = []
-		currentNode = self.root
+        lg.logger_mcts.info('------MOVING TO LEAF------')
 
-		done = 0
-		value = 0
+        breadcrumbs = []
+        currentNode = self.root
 
-		while not currentNode.isLeaf():
+        done = 0
+        value = 0
 
-			lg.logger_mcts.info('PLAYER TURN...%d', currentNode.state.playerTurn)
-		
-			maxQU = -99999
+        while not currentNode.isLeaf():
 
-			if currentNode == self.root:
-				epsilon = config.EPSILON
-				nu = np.random.dirichlet([config.ALPHA] * len(currentNode.edges))
-			else:
-				epsilon = 0
-				nu = [0] * len(currentNode.edges)
+            lg.logger_mcts.info('PLAYER TURN...%d', currentNode.state.playerTurn)
 
-			Nb = 0
-			for action, edge in currentNode.edges:
-				Nb = Nb + edge.stats['N']
+            maxQU = -99999
 
-			for idx, (action, edge) in enumerate(currentNode.edges):
+            if currentNode == self.root:
+                epsilon = config.EPSILON
+                nu = np.random.dirichlet([config.ALPHA] * len(currentNode.edges))
+            else:
+                epsilon = 0
+                nu = [0] * len(currentNode.edges)
 
-				U = self.cpuct * \
-					((1-epsilon) * edge.stats['P'] + epsilon * nu[idx] )  * \
-					np.sqrt(Nb) / (1 + edge.stats['N'])
-					
-				Q = edge.stats['Q']
+            Nb = 0
+            for action, edge in currentNode.edges:
+                Nb = Nb + edge.stats['N']
 
-				lg.logger_mcts.info('action: %d (%d)... N = %d, P = %f, nu = %f, adjP = %f, W = %f, Q = %f, U = %f, Q+U = %f'
-					, action, action % 7, edge.stats['N'], np.round(edge.stats['P'],6), np.round(nu[idx],6), ((1-epsilon) * edge.stats['P'] + epsilon * nu[idx] )
-					, np.round(edge.stats['W'],6), np.round(Q,6), np.round(U,6), np.round(Q+U,6))
+            for idx, (action, edge) in enumerate(currentNode.edges):
 
-				if Q + U > maxQU:
-					maxQU = Q + U
-					simulationAction = action
-					simulationEdge = edge
+                U = self.cpuct * \
+                    ((1 - epsilon) * edge.stats['P'] + epsilon * nu[idx]) * \
+                    np.sqrt(Nb) / (1 + edge.stats['N'])
 
-			lg.logger_mcts.info('action with highest Q + U...%d', simulationAction)
+                Q = edge.stats['Q']
 
-			newState, value, done = currentNode.state.takeAction(simulationAction) #the value of the newState from the POV of the new playerTurn
-			currentNode = simulationEdge.outNode
-			breadcrumbs.append(simulationEdge)
+                lg.logger_mcts.info(
+                    'action: %d (%d)... N = %d, P = %f, nu = %f, adjP = %f, W = %f, Q = %f, U = %f, Q+U = %f'
+                    , action, action % 7, edge.stats['N'], np.round(edge.stats['P'], 6), np.round(nu[idx], 6),
+                    ((1 - epsilon) * edge.stats['P'] + epsilon * nu[idx])
+                    , np.round(edge.stats['W'], 6), np.round(Q, 6), np.round(U, 6), np.round(Q + U, 6))
 
-		lg.logger_mcts.info('DONE...%d', done)
+                if Q + U > maxQU:
+                    maxQU = Q + U
+                    simulationAction = action
+                    simulationEdge = edge
 
-		return currentNode, value, done, breadcrumbs
+            lg.logger_mcts.info('action with highest Q + U...%d', simulationAction)
 
+            newState, value, done = currentNode.state.takeAction(
+                simulationAction)  # the value of the newState from the POV of the new playerTurn
+            currentNode = simulationEdge.outNode
+            breadcrumbs.append(simulationEdge)
 
+        lg.logger_mcts.info('DONE...%d', done)
 
-	def backFill(self, leaf, value, breadcrumbs):
-		lg.logger_mcts.info('------DOING BACKFILL------')
+        return currentNode, value, done, breadcrumbs
 
-		currentPlayer = leaf.state.playerTurn
+    def backFill(self, leaf, value, breadcrumbs):
+        lg.logger_mcts.info('------DOING BACKFILL------')
 
-		for edge in breadcrumbs:
-			playerTurn = edge.playerTurn
-			if playerTurn == currentPlayer:
-				direction = 1
-			else:
-				direction = -1
+        currentPlayer = leaf.state.playerTurn
 
-			edge.stats['N'] = edge.stats['N'] + 1
-			edge.stats['W'] = edge.stats['W'] + value * direction
-			edge.stats['Q'] = edge.stats['W'] / edge.stats['N']
+        for edge in breadcrumbs:
+            playerTurn = edge.playerTurn
+            if playerTurn == currentPlayer:
+                direction = 1
+            else:
+                direction = -1
 
-			lg.logger_mcts.info('updating edge with value %f for player %d... N = %d, W = %f, Q = %f'
-				, value * direction
-				, playerTurn
-				, edge.stats['N']
-				, edge.stats['W']
-				, edge.stats['Q']
-				)
+            edge.stats['N'] = edge.stats['N'] + 1
+            edge.stats['W'] = edge.stats['W'] + value * direction
+            edge.stats['Q'] = edge.stats['W'] / edge.stats['N']
 
-			edge.outNode.state.render(lg.logger_mcts)
+            lg.logger_mcts.info('updating edge with value %f for player %d... N = %d, W = %f, Q = %f'
+                                , value * direction
+                                , playerTurn
+                                , edge.stats['N']
+                                , edge.stats['W']
+                                , edge.stats['Q']
+                                )
 
-	def addNode(self, node):
-		self.tree[node.id] = node
+            edge.outNode.state.render(lg.logger_mcts)
 
+    def addNode(self, node):
+        self.tree[node.id] = node
diff --git a/agent.py b/agent.py
index a6e5d01a..df57e001 100644
--- a/agent.py
+++ b/agent.py
@@ -17,212 +17,205 @@
 
 
 class User():
-	def __init__(self, name, state_size, action_size):
-		self.name = name
-		self.state_size = state_size
-		self.action_size = action_size
-
-	def act(self, state, tau):
-		action = input('Enter your chosen action: ')
-		pi = np.zeros(self.action_size)
-		pi[action] = 1
-		value = None
-		NN_value = None
-		return (action, pi, value, NN_value)
+    def __init__(self, name, state_size, action_size):
+        self.name = name
+        self.state_size = state_size
+        self.action_size = action_size
 
+    def act(self, state, tau):
+        action = input('Enter your chosen action: ')
+        pi = np.zeros(self.action_size)
+        pi[action] = 1
+        value = None
+        NN_value = None
+        return (action, pi, value, NN_value)
 
 
 class Agent():
-	def __init__(self, name, state_size, action_size, mcts_simulations, cpuct, model):
-		self.name = name
-
-		self.state_size = state_size
-		self.action_size = action_size
-
-		self.cpuct = cpuct
+    def __init__(self, name, state_size, action_size, mcts_simulations, cpuct, model):
+        self.name = name
 
-		self.MCTSsimulations = mcts_simulations
-		self.model = model
+        self.state_size = state_size
+        self.action_size = action_size
 
-		self.mcts = None
+        self.cpuct = cpuct
 
-		self.train_overall_loss = []
-		self.train_value_loss = []
-		self.train_policy_loss = []
-		self.val_overall_loss = []
-		self.val_value_loss = []
-		self.val_policy_loss = []
+        self.MCTSsimulations = mcts_simulations
+        self.model = model
 
-	
-	def simulate(self):
+        self.mcts = None
 
-		lg.logger_mcts.info('ROOT NODE...%s', self.mcts.root.state.id)
-		self.mcts.root.state.render(lg.logger_mcts)
-		lg.logger_mcts.info('CURRENT PLAYER...%d', self.mcts.root.state.playerTurn)
+        self.train_overall_loss = []
+        self.train_value_loss = []
+        self.train_policy_loss = []
+        self.val_overall_loss = []
+        self.val_value_loss = []
+        self.val_policy_loss = []
 
-		##### MOVE THE LEAF NODE
-		leaf, value, done, breadcrumbs = self.mcts.moveToLeaf()
-		leaf.state.render(lg.logger_mcts)
+    def simulate(self):
 
-		##### EVALUATE THE LEAF NODE
-		value, breadcrumbs = self.evaluateLeaf(leaf, value, done, breadcrumbs)
+        lg.logger_mcts.info('ROOT NODE...%s', self.mcts.root.state.id)
+        self.mcts.root.state.render(lg.logger_mcts)
+        lg.logger_mcts.info('CURRENT PLAYER...%d', self.mcts.root.state.playerTurn)
 
-		##### BACKFILL THE VALUE THROUGH THE TREE
-		self.mcts.backFill(leaf, value, breadcrumbs)
+        ##### MOVE THE LEAF NODE
+        leaf, value, done, breadcrumbs = self.mcts.moveToLeaf()
+        leaf.state.render(lg.logger_mcts)
 
+        ##### EVALUATE THE LEAF NODE
+        value, breadcrumbs = self.evaluateLeaf(leaf, value, done, breadcrumbs)
 
-	def act(self, state, tau):
+        ##### BACKFILL THE VALUE THROUGH THE TREE
+        self.mcts.backFill(leaf, value, breadcrumbs)
 
-		if self.mcts == None or state.id not in self.mcts.tree:
-			self.buildMCTS(state)
-		else:
-			self.changeRootMCTS(state)
+    def act(self, state, tau):
 
-		#### run the simulation
-		for sim in range(self.MCTSsimulations):
-			lg.logger_mcts.info('***************************')
-			lg.logger_mcts.info('****** SIMULATION %d ******', sim + 1)
-			lg.logger_mcts.info('***************************')
-			self.simulate()
+        if self.mcts == None or state.id not in self.mcts.tree:
+            self.buildMCTS(state)
+        else:
+            self.changeRootMCTS(state)
 
-		#### get action values
-		pi, values = self.getAV(1)
+        #### run the simulation
+        for sim in range(self.MCTSsimulations):
+            lg.logger_mcts.info('***************************')
+            lg.logger_mcts.info('****** SIMULATION %d ******', sim + 1)
+            lg.logger_mcts.info('***************************')
+            self.simulate()
 
-		####pick the action
-		action, value = self.chooseAction(pi, values, tau)
+        #### get action values
+        pi, values = self.getAV(1)
 
-		nextState, _, _ = state.takeAction(action)
+        ####pick the action
+        action, value = self.chooseAction(pi, values, tau)
 
-		NN_value = -self.get_preds(nextState)[0]
+        nextState, _, _ = state.takeAction(action)
 
-		lg.logger_mcts.info('ACTION VALUES...%s', pi)
-		lg.logger_mcts.info('CHOSEN ACTION...%d', action)
-		lg.logger_mcts.info('MCTS PERCEIVED VALUE...%f', value)
-		lg.logger_mcts.info('NN PERCEIVED VALUE...%f', NN_value)
+        NN_value = -self.get_preds(nextState)[0]
 
-		return (action, pi, value, NN_value)
+        lg.logger_mcts.info('ACTION VALUES...%s', pi)
+        lg.logger_mcts.info('CHOSEN ACTION...%d', action)
+        lg.logger_mcts.info('MCTS PERCEIVED VALUE...%f', value)
+        lg.logger_mcts.info('NN PERCEIVED VALUE...%f', NN_value)
 
+        return (action, pi, value, NN_value)
 
-	def get_preds(self, state):
-		#predict the leaf
-		inputToModel = np.array([self.model.convertToModelInput(state)])
+    def get_preds(self, state):
+        # predict the leaf
+        inputToModel = np.array([self.model.convertToModelInput(state)])
 
-		preds = self.model.predict(inputToModel)
-		value_array = preds[0]
-		logits_array = preds[1]
-		value = value_array[0]
+        preds = self.model.predict(inputToModel)
+        value_array = preds[0]
+        logits_array = preds[1]
+        value = value_array[0]
 
-		logits = logits_array[0]
+        logits = logits_array[0]
 
-		allowedActions = state.allowedActions
+        allowedActions = state.allowedActions
 
-		mask = np.ones(logits.shape,dtype=bool)
-		mask[allowedActions] = False
-		logits[mask] = -100
+        mask = np.ones(logits.shape, dtype=bool)
+        mask[allowedActions] = False
+        logits[mask] = -100
 
-		#SOFTMAX
-		odds = np.exp(logits)
-		probs = odds / np.sum(odds) ###put this just before the for?
+        # SOFTMAX
+        odds = np.exp(logits)
+        probs = odds / np.sum(odds)  ###put this just before the for?
 
-		return ((value, probs, allowedActions))
+        return ((value, probs, allowedActions))
 
+    def evaluateLeaf(self, leaf, value, done, breadcrumbs):
 
-	def evaluateLeaf(self, leaf, value, done, breadcrumbs):
+        lg.logger_mcts.info('------EVALUATING LEAF------')
 
-		lg.logger_mcts.info('------EVALUATING LEAF------')
+        if done == 0:
 
-		if done == 0:
-	
-			value, probs, allowedActions = self.get_preds(leaf.state)
-			lg.logger_mcts.info('PREDICTED VALUE FOR %d: %f', leaf.state.playerTurn, value)
+            value, probs, allowedActions = self.get_preds(leaf.state)
+            lg.logger_mcts.info('PREDICTED VALUE FOR %d: %f', leaf.state.playerTurn, value)
 
-			probs = probs[allowedActions]
+            probs = probs[allowedActions]
 
-			for idx, action in enumerate(allowedActions):
-				newState, _, _ = leaf.state.takeAction(action)
-				if newState.id not in self.mcts.tree:
-					node = mc.Node(newState)
-					self.mcts.addNode(node)
-					lg.logger_mcts.info('added node...%s...p = %f', node.id, probs[idx])
-				else:
-					node = self.mcts.tree[newState.id]
-					lg.logger_mcts.info('existing node...%s...', node.id)
+            for idx, action in enumerate(allowedActions):
+                newState, _, _ = leaf.state.takeAction(action)
+                if newState.id not in self.mcts.tree:
+                    node = mc.Node(newState)
+                    self.mcts.addNode(node)
+                    lg.logger_mcts.info('added node...%s...p = %f', node.id, probs[idx])
+                else:
+                    node = self.mcts.tree[newState.id]
+                    lg.logger_mcts.info('existing node...%s...', node.id)
 
-				newEdge = mc.Edge(leaf, node, probs[idx], action)
-				leaf.edges.append((action, newEdge))
-				
-		else:
-			lg.logger_mcts.info('GAME VALUE FOR %d: %f', leaf.playerTurn, value)
+                newEdge = mc.Edge(leaf, node, probs[idx], action)
+                leaf.edges.append((action, newEdge))
 
-		return ((value, breadcrumbs))
+        else:
+            lg.logger_mcts.info('GAME VALUE FOR %d: %f', leaf.playerTurn, value)
 
+        return ((value, breadcrumbs))
 
-		
-	def getAV(self, tau):
-		edges = self.mcts.root.edges
-		pi = np.zeros(self.action_size, dtype=np.integer)
-		values = np.zeros(self.action_size, dtype=np.float32)
-		
-		for action, edge in edges:
-			pi[action] = pow(edge.stats['N'], 1/tau)
-			values[action] = edge.stats['Q']
+    def getAV(self, tau):
+        edges = self.mcts.root.edges
+        pi = np.zeros(self.action_size, dtype=np.integer)
+        values = np.zeros(self.action_size, dtype=np.float32)
 
-		pi = pi / (np.sum(pi) * 1.0)
-		return pi, values
+        for action, edge in edges:
+            pi[action] = pow(edge.stats['N'], 1 / tau)
+            values[action] = edge.stats['Q']
 
-	def chooseAction(self, pi, values, tau):
-		if tau == 0:
-			actions = np.argwhere(pi == max(pi))
-			action = random.choice(actions)[0]
-		else:
-			action_idx = np.random.multinomial(1, pi)
-			action = np.where(action_idx==1)[0][0]
+        pi = pi / (np.sum(pi) * 1.0)
+        return pi, values
 
-		value = values[action]
+    def chooseAction(self, pi, values, tau):
+        if tau == 0:
+            actions = np.argwhere(pi == max(pi))
+            action = random.choice(actions)[0]
+        else:
+            action_idx = np.random.multinomial(1, pi)
+            action = np.where(action_idx == 1)[0][0]
 
-		return action, value
+        value = values[action]
 
-	def replay(self, ltmemory):
-		lg.logger_mcts.info('******RETRAINING MODEL******')
+        return action, value
 
+    def replay(self, ltmemory):
+        lg.logger_mcts.info('******RETRAINING MODEL******')
 
-		for i in range(config.TRAINING_LOOPS):
-			minibatch = random.sample(ltmemory, min(config.BATCH_SIZE, len(ltmemory)))
+        for i in range(config.TRAINING_LOOPS):
+            minibatch = random.sample(ltmemory, min(config.BATCH_SIZE, len(ltmemory)))
 
-			training_states = np.array([self.model.convertToModelInput(row['state']) for row in minibatch])
-			training_targets = {'value_head': np.array([row['value'] for row in minibatch])
-								, 'policy_head': np.array([row['AV'] for row in minibatch])} 
+            training_states = np.array([self.model.convertToModelInput(row['state']) for row in minibatch])
+            training_targets = {'value_head': np.array([row['value'] for row in minibatch])
+                , 'policy_head': np.array([row['AV'] for row in minibatch])}
 
-			fit = self.model.fit(training_states, training_targets, epochs=config.EPOCHS, verbose=1, validation_split=0, batch_size = 32)
-			lg.logger_mcts.info('NEW LOSS %s', fit.history)
+            fit = self.model.fit(training_states, training_targets, epochs=config.EPOCHS, verbose=1, validation_split=0,
+                                 batch_size=32)
+            lg.logger_mcts.info('NEW LOSS %s', fit.history)
 
-			self.train_overall_loss.append(round(fit.history['loss'][config.EPOCHS - 1],4))
-			self.train_value_loss.append(round(fit.history['value_head_loss'][config.EPOCHS - 1],4)) 
-			self.train_policy_loss.append(round(fit.history['policy_head_loss'][config.EPOCHS - 1],4)) 
+            self.train_overall_loss.append(round(fit.history['loss'][config.EPOCHS - 1], 4))
+            self.train_value_loss.append(round(fit.history['value_head_loss'][config.EPOCHS - 1], 4))
+            self.train_policy_loss.append(round(fit.history['policy_head_loss'][config.EPOCHS - 1], 4))
 
-		plt.plot(self.train_overall_loss, 'k')
-		plt.plot(self.train_value_loss, 'k:')
-		plt.plot(self.train_policy_loss, 'k--')
+        plt.plot(self.train_overall_loss, 'k')
+        plt.plot(self.train_value_loss, 'k:')
+        plt.plot(self.train_policy_loss, 'k--')
 
-		plt.legend(['train_overall_loss', 'train_value_loss', 'train_policy_loss'], loc='lower left')
+        plt.legend(['train_overall_loss', 'train_value_loss', 'train_policy_loss'], loc='lower left')
 
-		display.clear_output(wait=True)
-		display.display(pl.gcf())
-		pl.gcf().clear()
-		time.sleep(1.0)
+        display.clear_output(wait=True)
+        display.display(pl.gcf())
+        pl.gcf().clear()
+        time.sleep(1.0)
 
-		print('\n')
-		self.model.printWeightAverages()
+        print('\n')
+        self.model.printWeightAverages()
 
-	def predict(self, inputToModel):
-		preds = self.model.predict(inputToModel)
-		return preds
+    def predict(self, inputToModel):
+        preds = self.model.predict(inputToModel)
+        return preds
 
-	def buildMCTS(self, state):
-		lg.logger_mcts.info('****** BUILDING NEW MCTS TREE FOR AGENT %s ******', self.name)
-		self.root = mc.Node(state)
-		self.mcts = mc.MCTS(self.root, self.cpuct)
+    def buildMCTS(self, state):
+        lg.logger_mcts.info('****** BUILDING NEW MCTS TREE FOR AGENT %s ******', self.name)
+        self.root = mc.Node(state)
+        self.mcts = mc.MCTS(self.root, self.cpuct)
 
-	def changeRootMCTS(self, state):
-		lg.logger_mcts.info('****** CHANGING ROOT OF MCTS TREE TO %s FOR AGENT %s ******', state.id, self.name)
-		self.mcts.root = self.mcts.tree[state.id]
\ No newline at end of file
+    def changeRootMCTS(self, state):
+        lg.logger_mcts.info('****** CHANGING ROOT OF MCTS TREE TO %s FOR AGENT %s ******', state.id, self.name)
+        self.mcts.root = self.mcts.tree[state.id]
diff --git a/config.py b/config.py
index dd2c4c85..17075c0b 100644
--- a/config.py
+++ b/config.py
@@ -2,12 +2,11 @@
 EPISODES = 30
 MCTS_SIMS = 50
 MEMORY_SIZE = 30000
-TURNS_UNTIL_TAU0 = 10 # turn on which it starts playing deterministically
+TURNS_UNTIL_TAU0 = 10  # turn on which it starts playing deterministically
 CPUCT = 1
 EPSILON = 0.2
 ALPHA = 0.8
 
-
 #### RETRAINING
 BATCH_SIZE = 256
 EPOCHS = 1
@@ -17,14 +16,14 @@
 TRAINING_LOOPS = 10
 
 HIDDEN_CNN_LAYERS = [
-	{'filters':75, 'kernel_size': (4,4)}
-	 , {'filters':75, 'kernel_size': (4,4)}
-	 , {'filters':75, 'kernel_size': (4,4)}
-	 , {'filters':75, 'kernel_size': (4,4)}
-	 , {'filters':75, 'kernel_size': (4,4)}
-	 , {'filters':75, 'kernel_size': (4,4)}
-	]
+    {'filters': 75, 'kernel_size': (4, 4)}
+    , {'filters': 75, 'kernel_size': (4, 4)}
+    , {'filters': 75, 'kernel_size': (4, 4)}
+    , {'filters': 75, 'kernel_size': (4, 4)}
+    , {'filters': 75, 'kernel_size': (4, 4)}
+    , {'filters': 75, 'kernel_size': (4, 4)}
+]
 
 #### EVALUATION
 EVAL_EPISODES = 20
-SCORING_THRESHOLD = 1.3
\ No newline at end of file
+SCORING_THRESHOLD = 1.3
diff --git a/funcs.py b/funcs.py
index ecb82bff..d46bec12 100644
--- a/funcs.py
+++ b/funcs.py
@@ -10,69 +10,72 @@
 
 import config
 
-def playMatchesBetweenVersions(env, run_version, player1version, player2version, EPISODES, logger, turns_until_tau0, goes_first = 0):
-    
+
+def playMatchesBetweenVersions(env, run_version, player1version, player2version, EPISODES, logger, turns_until_tau0,
+                               goes_first=0):
     if player1version == -1:
         player1 = User('player1', env.state_size, env.action_size)
     else:
-        player1_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)
+        player1_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size,
+                                  config.HIDDEN_CNN_LAYERS)
 
         if player1version > 0:
             player1_network = player1_NN.read(env.name, run_version, player1version)
-            player1_NN.model.set_weights(player1_network.get_weights())   
+            player1_NN.model.set_weights(player1_network.get_weights())
         player1 = Agent('player1', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player1_NN)
 
     if player2version == -1:
         player2 = User('player2', env.state_size, env.action_size)
     else:
-        player2_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)
-        
+        player2_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size,
+                                  config.HIDDEN_CNN_LAYERS)
+
         if player2version > 0:
             player2_network = player2_NN.read(env.name, run_version, player2version)
             player2_NN.model.set_weights(player2_network.get_weights())
         player2 = Agent('player2', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player2_NN)
 
-    scores, memory, points, sp_scores = playMatches(player1, player2, EPISODES, logger, turns_until_tau0, None, goes_first)
+    scores, memory, points, sp_scores = playMatches(player1, player2, EPISODES, logger, turns_until_tau0, None,
+                                                    goes_first)
 
     return (scores, memory, points, sp_scores)
 
 
-def playMatches(player1, player2, EPISODES, logger, turns_until_tau0, memory = None, goes_first = 0):
-
+def playMatches(player1, player2, EPISODES, logger, turns_until_tau0, memory=None, goes_first=0):
     env = Game()
-    scores = {player1.name:0, "drawn": 0, player2.name:0}
-    sp_scores = {'sp':0, "drawn": 0, 'nsp':0}
-    points = {player1.name:[], player2.name:[]}
+    scores = {player1.name: 0, "drawn": 0, player2.name: 0}
+    sp_scores = {'sp': 0, "drawn": 0, 'nsp': 0}
+    points = {player1.name: [], player2.name: []}
 
     for e in range(EPISODES):
 
         logger.info('====================')
-        logger.info('EPISODE %d OF %d', e+1, EPISODES)
+        logger.info('EPISODE %d OF %d', e + 1, EPISODES)
         logger.info('====================')
 
-        print (str(e+1) + ' ', end='')
+        print(str(e + 1) + ' ', end='')
 
         state = env.reset()
-        
+
         done = 0
         turn = 0
         player1.mcts = None
         player2.mcts = None
 
         if goes_first == 0:
-            player1Starts = random.randint(0,1) * 2 - 1
+            player1Starts = random.randint(0, 1) * 2 - 1
         else:
             player1Starts = goes_first
 
         if player1Starts == 1:
-            players = {1:{"agent": player1, "name":player1.name}
-                    , -1: {"agent": player2, "name":player2.name}
-                    }
+            players = {1: {"agent": player1, "name": player1.name}
+                , -1: {"agent": player2, "name": player2.name}
+                       }
             logger.info(player1.name + ' plays as X')
         else:
-            players = {1:{"agent": player2, "name":player2.name}
-                    , -1: {"agent": player1, "name":player1.name}
-                    }
+            players = {1: {"agent": player2, "name": player2.name}
+                , -1: {"agent": player1, "name": player1.name}
+                       }
             logger.info(player2.name + ' plays as X')
             logger.info('--------------')
 
@@ -80,7 +83,7 @@ def playMatches(player1, player2, EPISODES, logger, turns_until_tau0, memory = N
 
         while done == 0:
             turn = turn + 1
-    
+
             #### Run the MCTS algo and return an action
             if turn < turns_until_tau0:
                 action, pi, MCTS_value, NN_value = players[state.playerTurn]['agent'].act(state, 1)
@@ -91,20 +94,21 @@ def playMatches(player1, player2, EPISODES, logger, turns_until_tau0, memory = N
                 ####Commit the move to memory
                 memory.commit_stmemory(env.identities, state, pi)
 
-
             logger.info('action: %d', action)
             for r in range(env.grid_shape[0]):
-                logger.info(['----' if x == 0 else '{0:.2f}'.format(np.round(x,2)) for x in pi[env.grid_shape[1]*r : (env.grid_shape[1]*r + env.grid_shape[1])]])
-            logger.info('MCTS perceived value for %s: %f', state.pieces[str(state.playerTurn)] ,np.round(MCTS_value,2))
-            logger.info('NN perceived value for %s: %f', state.pieces[str(state.playerTurn)] ,np.round(NN_value,2))
+                logger.info(['----' if x == 0 else '{0:.2f}'.format(np.round(x, 2)) for x in
+                             pi[env.grid_shape[1] * r: (env.grid_shape[1] * r + env.grid_shape[1])]])
+            logger.info('MCTS perceived value for %s: %f', state.pieces[str(state.playerTurn)], np.round(MCTS_value, 2))
+            logger.info('NN perceived value for %s: %f', state.pieces[str(state.playerTurn)], np.round(NN_value, 2))
             logger.info('====================')
 
             ### Do the action
-            state, value, done, _ = env.step(action) #the value of the newState from the POV of the new playerTurn i.e. -1 if the previous player played a winning move
-            
+            state, value, done, _ = env.step(
+                action)  # the value of the newState from the POV of the new playerTurn i.e. -1 if the previous player played a winning move
+
             env.gameState.render(logger)
 
-            if done == 1: 
+            if done == 1:
                 if memory != None:
                     #### If the game is finished, assign the values correctly to the game moves
                     for move in memory.stmemory:
@@ -112,13 +116,13 @@ def playMatches(player1, player2, EPISODES, logger, turns_until_tau0, memory = N
                             move['value'] = value
                         else:
                             move['value'] = -value
-                         
+
                     memory.commit_ltmemory()
-             
+
                 if value == 1:
                     logger.info('%s WINS!', players[state.playerTurn]['name'])
                     scores[players[state.playerTurn]['name']] = scores[players[state.playerTurn]['name']] + 1
-                    if state.playerTurn == 1: 
+                    if state.playerTurn == 1:
                         sp_scores['sp'] = sp_scores['sp'] + 1
                     else:
                         sp_scores['nsp'] = sp_scores['nsp'] + 1
@@ -126,8 +130,8 @@ def playMatches(player1, player2, EPISODES, logger, turns_until_tau0, memory = N
                 elif value == -1:
                     logger.info('%s WINS!', players[-state.playerTurn]['name'])
                     scores[players[-state.playerTurn]['name']] = scores[players[-state.playerTurn]['name']] + 1
-               
-                    if state.playerTurn == 1: 
+
+                    if state.playerTurn == 1:
                         sp_scores['nsp'] = sp_scores['nsp'] + 1
                     else:
                         sp_scores['sp'] = sp_scores['sp'] + 1
diff --git a/game.py b/game.py
index 3355a77e..d1758257 100644
--- a/game.py
+++ b/game.py
@@ -1,228 +1,233 @@
 import numpy as np
 import logging
 
+
 class Game:
 
-	def __init__(self):		
-		self.currentPlayer = 1
-		self.gameState = GameState(np.array([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], dtype=np.int), 1)
-		self.actionSpace = np.array([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], dtype=np.int)
-		self.pieces = {'1':'X', '0': '-', '-1':'O'}
-		self.grid_shape = (6,7)
-		self.input_shape = (2,6,7)
-		self.name = 'connect4'
-		self.state_size = len(self.gameState.binary)
-		self.action_size = len(self.actionSpace)
-
-	def reset(self):
-		self.gameState = GameState(np.array([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], dtype=np.int), 1)
-		self.currentPlayer = 1
-		return self.gameState
-
-	def step(self, action):
-		next_state, value, done = self.gameState.takeAction(action)
-		self.gameState = next_state
-		self.currentPlayer = -self.currentPlayer
-		info = None
-		return ((next_state, value, done, info))
-
-	def identities(self, state, actionValues):
-		identities = [(state,actionValues)]
-
-		currentBoard = state.board
-		currentAV = actionValues
-
-		currentBoard = np.array([
-			  currentBoard[6], currentBoard[5],currentBoard[4], currentBoard[3], currentBoard[2], currentBoard[1], currentBoard[0]
-			, currentBoard[13], currentBoard[12],currentBoard[11], currentBoard[10], currentBoard[9], currentBoard[8], currentBoard[7]
-			, currentBoard[20], currentBoard[19],currentBoard[18], currentBoard[17], currentBoard[16], currentBoard[15], currentBoard[14]
-			, currentBoard[27], currentBoard[26],currentBoard[25], currentBoard[24], currentBoard[23], currentBoard[22], currentBoard[21]
-			, currentBoard[34], currentBoard[33],currentBoard[32], currentBoard[31], currentBoard[30], currentBoard[29], currentBoard[28]
-			, currentBoard[41], currentBoard[40],currentBoard[39], currentBoard[38], currentBoard[37], currentBoard[36], currentBoard[35]
-			])
-
-		currentAV = np.array([
-			currentAV[6], currentAV[5],currentAV[4], currentAV[3], currentAV[2], currentAV[1], currentAV[0]
-			, currentAV[13], currentAV[12],currentAV[11], currentAV[10], currentAV[9], currentAV[8], currentAV[7]
-			, currentAV[20], currentAV[19],currentAV[18], currentAV[17], currentAV[16], currentAV[15], currentAV[14]
-			, currentAV[27], currentAV[26],currentAV[25], currentAV[24], currentAV[23], currentAV[22], currentAV[21]
-			, currentAV[34], currentAV[33],currentAV[32], currentAV[31], currentAV[30], currentAV[29], currentAV[28]
-			, currentAV[41], currentAV[40],currentAV[39], currentAV[38], currentAV[37], currentAV[36], currentAV[35]
-					])
-
-		identities.append((GameState(currentBoard, state.playerTurn), currentAV))
-
-		return identities
+    def __init__(self):
+        self.currentPlayer = 1
+        self.gameState = GameState(np.array(
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+             0, 0, 0, 0, 0, 0], dtype=np.int), 1)
+        self.actionSpace = np.array(
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+             0, 0, 0, 0, 0, 0], dtype=np.int)
+        self.pieces = {'1': 'X', '0': '-', '-1': 'O'}
+        self.grid_shape = (6, 7)
+        self.input_shape = (2, 6, 7)
+        self.name = 'connect4'
+        self.state_size = len(self.gameState.binary)
+        self.action_size = len(self.actionSpace)
+
+    def reset(self):
+        self.gameState = GameState(np.array(
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+             0, 0, 0, 0, 0, 0], dtype=np.int), 1)
+        self.currentPlayer = 1
+        return self.gameState
+
+    def step(self, action):
+        next_state, value, done = self.gameState.takeAction(action)
+        self.gameState = next_state
+        self.currentPlayer = -self.currentPlayer
+        info = None
+        return ((next_state, value, done, info))
+
+    def identities(self, state, actionValues):
+        identities = [(state, actionValues)]
+
+        currentBoard = state.board
+        currentAV = actionValues
+
+        currentBoard = np.array([
+            currentBoard[6], currentBoard[5], currentBoard[4], currentBoard[3], currentBoard[2], currentBoard[1],
+            currentBoard[0]
+            , currentBoard[13], currentBoard[12], currentBoard[11], currentBoard[10], currentBoard[9], currentBoard[8],
+            currentBoard[7]
+            , currentBoard[20], currentBoard[19], currentBoard[18], currentBoard[17], currentBoard[16],
+            currentBoard[15], currentBoard[14]
+            , currentBoard[27], currentBoard[26], currentBoard[25], currentBoard[24], currentBoard[23],
+            currentBoard[22], currentBoard[21]
+            , currentBoard[34], currentBoard[33], currentBoard[32], currentBoard[31], currentBoard[30],
+            currentBoard[29], currentBoard[28]
+            , currentBoard[41], currentBoard[40], currentBoard[39], currentBoard[38], currentBoard[37],
+            currentBoard[36], currentBoard[35]
+        ])
+
+        currentAV = np.array([
+            currentAV[6], currentAV[5], currentAV[4], currentAV[3], currentAV[2], currentAV[1], currentAV[0]
+            , currentAV[13], currentAV[12], currentAV[11], currentAV[10], currentAV[9], currentAV[8], currentAV[7]
+            , currentAV[20], currentAV[19], currentAV[18], currentAV[17], currentAV[16], currentAV[15], currentAV[14]
+            , currentAV[27], currentAV[26], currentAV[25], currentAV[24], currentAV[23], currentAV[22], currentAV[21]
+            , currentAV[34], currentAV[33], currentAV[32], currentAV[31], currentAV[30], currentAV[29], currentAV[28]
+            , currentAV[41], currentAV[40], currentAV[39], currentAV[38], currentAV[37], currentAV[36], currentAV[35]
+        ])
+
+        identities.append((GameState(currentBoard, state.playerTurn), currentAV))
+
+        return identities
 
 
 class GameState():
-	def __init__(self, board, playerTurn):
-		self.board = board
-		self.pieces = {'1':'X', '0': '-', '-1':'O'}
-		self.winners = [
-			[0,1,2,3],
-			[1,2,3,4],
-			[2,3,4,5],
-			[3,4,5,6],
-			[7,8,9,10],
-			[8,9,10,11],
-			[9,10,11,12],
-			[10,11,12,13],
-			[14,15,16,17],
-			[15,16,17,18],
-			[16,17,18,19],
-			[17,18,19,20],
-			[21,22,23,24],
-			[22,23,24,25],
-			[23,24,25,26],
-			[24,25,26,27],
-			[28,29,30,31],
-			[29,30,31,32],
-			[30,31,32,33],
-			[31,32,33,34],
-			[35,36,37,38],
-			[36,37,38,39],
-			[37,38,39,40],
-			[38,39,40,41],
-
-			[0,7,14,21],
-			[7,14,21,28],
-			[14,21,28,35],
-			[1,8,15,22],
-			[8,15,22,29],
-			[15,22,29,36],
-			[2,9,16,23],
-			[9,16,23,30],
-			[16,23,30,37],
-			[3,10,17,24],
-			[10,17,24,31],
-			[17,24,31,38],
-			[4,11,18,25],
-			[11,18,25,32],
-			[18,25,32,39],
-			[5,12,19,26],
-			[12,19,26,33],
-			[19,26,33,40],
-			[6,13,20,27],
-			[13,20,27,34],
-			[20,27,34,41],
-
-			[3,9,15,21],
-			[4,10,16,22],
-			[10,16,22,28],
-			[5,11,17,23],
-			[11,17,23,29],
-			[17,23,29,35],
-			[6,12,18,24],
-			[12,18,24,30],
-			[18,24,30,36],
-			[13,19,25,31],
-			[19,25,31,37],
-			[20,26,32,38],
-
-			[3,11,19,27],
-			[2,10,18,26],
-			[10,18,26,34],
-			[1,9,17,25],
-			[9,17,25,33],
-			[17,25,33,41],
-			[0,8,16,24],
-			[8,16,24,32],
-			[16,24,32,40],
-			[7,15,23,31],
-			[15,23,31,39],
-			[14,22,30,38],
-			]
-		self.playerTurn = playerTurn
-		self.binary = self._binary()
-		self.id = self._convertStateToId()
-		self.allowedActions = self._allowedActions()
-		self.isEndGame = self._checkForEndGame()
-		self.value = self._getValue()
-		self.score = self._getScore()
-
-	def _allowedActions(self):
-		allowed = []
-		for i in range(len(self.board)):
-			if i >= len(self.board) - 7:
-				if self.board[i]==0:
-					allowed.append(i)
-			else:
-				if self.board[i] == 0 and self.board[i+7] != 0:
-					allowed.append(i)
-
-		return allowed
-
-	def _binary(self):
-
-		currentplayer_position = np.zeros(len(self.board), dtype=np.int)
-		currentplayer_position[self.board==self.playerTurn] = 1
-
-		other_position = np.zeros(len(self.board), dtype=np.int)
-		other_position[self.board==-self.playerTurn] = 1
-
-		position = np.append(currentplayer_position,other_position)
-
-		return (position)
-
-	def _convertStateToId(self):
-		player1_position = np.zeros(len(self.board), dtype=np.int)
-		player1_position[self.board==1] = 1
-
-		other_position = np.zeros(len(self.board), dtype=np.int)
-		other_position[self.board==-1] = 1
-
-		position = np.append(player1_position,other_position)
-
-		id = ''.join(map(str,position))
-
-		return id
-
-	def _checkForEndGame(self):
-		if np.count_nonzero(self.board) == 42:
-			return 1
-
-		for x,y,z,a in self.winners:
-			if (self.board[x] + self.board[y] + self.board[z] + self.board[a] == 4 * -self.playerTurn):
-				return 1
-		return 0
-
-
-	def _getValue(self):
-		# This is the value of the state for the current player
-		# i.e. if the previous player played a winning move, you lose
-		for x,y,z,a in self.winners:
-			if (self.board[x] + self.board[y] + self.board[z] + self.board[a] == 4 * -self.playerTurn):
-				return (-1, -1, 1)
-		return (0, 0, 0)
-
-
-	def _getScore(self):
-		tmp = self.value
-		return (tmp[1], tmp[2])
-
-
-
-
-	def takeAction(self, action):
-		newBoard = np.array(self.board)
-		newBoard[action]=self.playerTurn
-		
-		newState = GameState(newBoard, -self.playerTurn)
-
-		value = 0
-		done = 0
-
-		if newState.isEndGame:
-			value = newState.value[0]
-			done = 1
-
-		return (newState, value, done) 
-
-
-
-
-	def render(self, logger):
-		for r in range(6):
-			logger.info([self.pieces[str(x)] for x in self.board[7*r : (7*r + 7)]])
-		logger.info('--------------')
\ No newline at end of file
+    def __init__(self, board, playerTurn):
+        self.board = board
+        self.pieces = {'1': 'X', '0': '-', '-1': 'O'}
+        self.winners = [
+            [0, 1, 2, 3],
+            [1, 2, 3, 4],
+            [2, 3, 4, 5],
+            [3, 4, 5, 6],
+            [7, 8, 9, 10],
+            [8, 9, 10, 11],
+            [9, 10, 11, 12],
+            [10, 11, 12, 13],
+            [14, 15, 16, 17],
+            [15, 16, 17, 18],
+            [16, 17, 18, 19],
+            [17, 18, 19, 20],
+            [21, 22, 23, 24],
+            [22, 23, 24, 25],
+            [23, 24, 25, 26],
+            [24, 25, 26, 27],
+            [28, 29, 30, 31],
+            [29, 30, 31, 32],
+            [30, 31, 32, 33],
+            [31, 32, 33, 34],
+            [35, 36, 37, 38],
+            [36, 37, 38, 39],
+            [37, 38, 39, 40],
+            [38, 39, 40, 41],
+
+            [0, 7, 14, 21],
+            [7, 14, 21, 28],
+            [14, 21, 28, 35],
+            [1, 8, 15, 22],
+            [8, 15, 22, 29],
+            [15, 22, 29, 36],
+            [2, 9, 16, 23],
+            [9, 16, 23, 30],
+            [16, 23, 30, 37],
+            [3, 10, 17, 24],
+            [10, 17, 24, 31],
+            [17, 24, 31, 38],
+            [4, 11, 18, 25],
+            [11, 18, 25, 32],
+            [18, 25, 32, 39],
+            [5, 12, 19, 26],
+            [12, 19, 26, 33],
+            [19, 26, 33, 40],
+            [6, 13, 20, 27],
+            [13, 20, 27, 34],
+            [20, 27, 34, 41],
+
+            [3, 9, 15, 21],
+            [4, 10, 16, 22],
+            [10, 16, 22, 28],
+            [5, 11, 17, 23],
+            [11, 17, 23, 29],
+            [17, 23, 29, 35],
+            [6, 12, 18, 24],
+            [12, 18, 24, 30],
+            [18, 24, 30, 36],
+            [13, 19, 25, 31],
+            [19, 25, 31, 37],
+            [20, 26, 32, 38],
+
+            [3, 11, 19, 27],
+            [2, 10, 18, 26],
+            [10, 18, 26, 34],
+            [1, 9, 17, 25],
+            [9, 17, 25, 33],
+            [17, 25, 33, 41],
+            [0, 8, 16, 24],
+            [8, 16, 24, 32],
+            [16, 24, 32, 40],
+            [7, 15, 23, 31],
+            [15, 23, 31, 39],
+            [14, 22, 30, 38],
+        ]
+        self.playerTurn = playerTurn
+        self.binary = self._binary()
+        self.id = self._convertStateToId()
+        self.allowedActions = self._allowedActions()
+        self.isEndGame = self._checkForEndGame()
+        self.value = self._getValue()
+        self.score = self._getScore()
+
+    def _allowedActions(self):
+        allowed = []
+        for i in range(len(self.board)):
+            if i >= len(self.board) - 7:
+                if self.board[i] == 0:
+                    allowed.append(i)
+            else:
+                if self.board[i] == 0 and self.board[i + 7] != 0:
+                    allowed.append(i)
+
+        return allowed
+
+    def _binary(self):
+
+        currentplayer_position = np.zeros(len(self.board), dtype=np.int)
+        currentplayer_position[self.board == self.playerTurn] = 1
+
+        other_position = np.zeros(len(self.board), dtype=np.int)
+        other_position[self.board == -self.playerTurn] = 1
+
+        position = np.append(currentplayer_position, other_position)
+
+        return (position)
+
+    def _convertStateToId(self):
+        player1_position = np.zeros(len(self.board), dtype=np.int)
+        player1_position[self.board == 1] = 1
+
+        other_position = np.zeros(len(self.board), dtype=np.int)
+        other_position[self.board == -1] = 1
+
+        position = np.append(player1_position, other_position)
+
+        id = ''.join(map(str, position))
+
+        return id
+
+    def _checkForEndGame(self):
+        if np.count_nonzero(self.board) == 42:
+            return 1
+
+        for x, y, z, a in self.winners:
+            if (self.board[x] + self.board[y] + self.board[z] + self.board[a] == 4 * -self.playerTurn):
+                return 1
+        return 0
+
+    def _getValue(self):
+        # This is the value of the state for the current player
+        # i.e. if the previous player played a winning move, you lose
+        for x, y, z, a in self.winners:
+            if (self.board[x] + self.board[y] + self.board[z] + self.board[a] == 4 * -self.playerTurn):
+                return (-1, -1, 1)
+        return (0, 0, 0)
+
+    def _getScore(self):
+        tmp = self.value
+        return (tmp[1], tmp[2])
+
+    def takeAction(self, action):
+        newBoard = np.array(self.board)
+        newBoard[action] = self.playerTurn
+
+        newState = GameState(newBoard, -self.playerTurn)
+
+        value = 0
+        done = 0
+
+        if newState.isEndGame:
+            value = newState.value[0]
+            done = 1
+
+        return (newState, value, done)
+
+    def render(self, logger):
+        for r in range(6):
+            logger.info([self.pieces[str(x)] for x in self.board[7 * r: (7 * r + 7)]])
+        logger.info('--------------')
diff --git a/games/connect4/game.py b/games/connect4/game.py
index e1343fa2..d1758257 100644
--- a/games/connect4/game.py
+++ b/games/connect4/game.py
@@ -1,231 +1,233 @@
 import numpy as np
 import logging
 
+
 class Game:
 
-	def __init__(self):		
-		self.currentPlayer = 1
-		self.gameState = GameState(np.array([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], dtype=np.int), 1)
-		self.actionSpace = np.array([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], dtype=np.int)
-		self.pieces = {'1':'X', '0': '-', '-1':'O'}
-		self.grid_shape = (6,7)
-		self.input_shape = (2,6,7)
-		self.name = 'connect4'
-		self.state_size = len(self.gameState.binary)
-		self.action_size = len(self.actionSpace)
-
-	def reset(self):
-		self.gameState = GameState(np.array([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], dtype=np.int), 1)
-		self.currentPlayer = 1
-		return self.gameState
-
-	def step(self, action):
-		next_state, value, done = self.gameState.takeAction(action)
-		self.gameState = next_state
-		self.currentPlayer = -self.currentPlayer
-		info = None
-		return ((next_state, value, done, info))
-
-	def identities(self, state, actionValues):
-		identities = [(state,actionValues)]
-
-		currentBoard = state.board
-		currentAV = actionValues
-
-		currentBoard = np.array([
-			  currentBoard[6], currentBoard[5],currentBoard[4], currentBoard[3], currentBoard[2], currentBoard[1], currentBoard[0]
-			, currentBoard[13], currentBoard[12],currentBoard[11], currentBoard[10], currentBoard[9], currentBoard[8], currentBoard[7]
-			, currentBoard[20], currentBoard[19],currentBoard[18], currentBoard[17], currentBoard[16], currentBoard[15], currentBoard[14]
-			, currentBoard[27], currentBoard[26],currentBoard[25], currentBoard[24], currentBoard[23], currentBoard[22], currentBoard[21]
-			, currentBoard[34], currentBoard[33],currentBoard[32], currentBoard[31], currentBoard[30], currentBoard[29], currentBoard[28]
-			, currentBoard[41], currentBoard[40],currentBoard[39], currentBoard[38], currentBoard[37], currentBoard[36], currentBoard[35]
-			])
-
-		currentAV = np.array([
-			currentAV[6], currentAV[5],currentAV[4], currentAV[3], currentAV[2], currentAV[1], currentAV[0]
-			, currentAV[13], currentAV[12],currentAV[11], currentAV[10], currentAV[9], currentAV[8], currentAV[7]
-			, currentAV[20], currentAV[19],currentAV[18], currentAV[17], currentAV[16], currentAV[15], currentAV[14]
-			, currentAV[27], currentAV[26],currentAV[25], currentAV[24], currentAV[23], currentAV[22], currentAV[21]
-			, currentAV[34], currentAV[33],currentAV[32], currentAV[31], currentAV[30], currentAV[29], currentAV[28]
-			, currentAV[41], currentAV[40],currentAV[39], currentAV[38], currentAV[37], currentAV[36], currentAV[35]
-					])
-
-		identities.append((GameState(currentBoard, state.playerTurn), currentAV))
-
-		return identities
+    def __init__(self):
+        self.currentPlayer = 1
+        self.gameState = GameState(np.array(
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+             0, 0, 0, 0, 0, 0], dtype=np.int), 1)
+        self.actionSpace = np.array(
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+             0, 0, 0, 0, 0, 0], dtype=np.int)
+        self.pieces = {'1': 'X', '0': '-', '-1': 'O'}
+        self.grid_shape = (6, 7)
+        self.input_shape = (2, 6, 7)
+        self.name = 'connect4'
+        self.state_size = len(self.gameState.binary)
+        self.action_size = len(self.actionSpace)
+
+    def reset(self):
+        self.gameState = GameState(np.array(
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+             0, 0, 0, 0, 0, 0], dtype=np.int), 1)
+        self.currentPlayer = 1
+        return self.gameState
+
+    def step(self, action):
+        next_state, value, done = self.gameState.takeAction(action)
+        self.gameState = next_state
+        self.currentPlayer = -self.currentPlayer
+        info = None
+        return ((next_state, value, done, info))
+
+    def identities(self, state, actionValues):
+        identities = [(state, actionValues)]
+
+        currentBoard = state.board
+        currentAV = actionValues
+
+        currentBoard = np.array([
+            currentBoard[6], currentBoard[5], currentBoard[4], currentBoard[3], currentBoard[2], currentBoard[1],
+            currentBoard[0]
+            , currentBoard[13], currentBoard[12], currentBoard[11], currentBoard[10], currentBoard[9], currentBoard[8],
+            currentBoard[7]
+            , currentBoard[20], currentBoard[19], currentBoard[18], currentBoard[17], currentBoard[16],
+            currentBoard[15], currentBoard[14]
+            , currentBoard[27], currentBoard[26], currentBoard[25], currentBoard[24], currentBoard[23],
+            currentBoard[22], currentBoard[21]
+            , currentBoard[34], currentBoard[33], currentBoard[32], currentBoard[31], currentBoard[30],
+            currentBoard[29], currentBoard[28]
+            , currentBoard[41], currentBoard[40], currentBoard[39], currentBoard[38], currentBoard[37],
+            currentBoard[36], currentBoard[35]
+        ])
+
+        currentAV = np.array([
+            currentAV[6], currentAV[5], currentAV[4], currentAV[3], currentAV[2], currentAV[1], currentAV[0]
+            , currentAV[13], currentAV[12], currentAV[11], currentAV[10], currentAV[9], currentAV[8], currentAV[7]
+            , currentAV[20], currentAV[19], currentAV[18], currentAV[17], currentAV[16], currentAV[15], currentAV[14]
+            , currentAV[27], currentAV[26], currentAV[25], currentAV[24], currentAV[23], currentAV[22], currentAV[21]
+            , currentAV[34], currentAV[33], currentAV[32], currentAV[31], currentAV[30], currentAV[29], currentAV[28]
+            , currentAV[41], currentAV[40], currentAV[39], currentAV[38], currentAV[37], currentAV[36], currentAV[35]
+        ])
+
+        identities.append((GameState(currentBoard, state.playerTurn), currentAV))
+
+        return identities
 
 
 class GameState():
-	def __init__(self, board, playerTurn):
-		self.board = board
-		self.pieces = {'1':'X', '0': '-', '-1':'O'}
-		self.winners = [
-			[0,1,2,3],
-			[1,2,3,4],
-			[2,3,4,5],
-			[3,4,5,6],
-			[7,8,9,10],
-			[8,9,10,11],
-			[9,10,11,12],
-			[10,11,12,13],
-			[14,15,16,17],
-			[15,16,17,18],
-			[16,17,18,19],
-			[17,18,19,20],
-			[21,22,23,24],
-			[22,23,24,25],
-			[23,24,25,26],
-			[24,25,26,27],
-			[28,29,30,31],
-			[29,30,31,32],
-			[30,31,32,33],
-			[31,32,33,34],
-			[35,36,37,38],
-			[36,37,38,39],
-			[37,38,39,40],
-			[38,39,40,41],
-
-			[0,7,14,21],
-			[7,14,21,28],
-			[14,21,28,35],
-			[1,8,15,22],
-			[8,15,22,29],
-			[15,22,29,36],
-			[2,9,16,23],
-			[9,16,23,30],
-			[16,23,30,37],
-			[3,10,17,24],
-			[10,17,24,31],
-			[17,24,31,38],
-			[4,11,18,25],
-			[11,18,25,32],
-			[18,25,32,39],
-			[5,12,19,26],
-			[12,19,26,33],
-			[19,26,33,40],
-			[6,13,20,27],
-			[13,20,27,34],
-			[20,27,34,41],
-
-			[3,9,15,21],
-			[4,10,16,22],
-			[10,16,22,28],
-			[5,11,17,23],
-			[11,17,23,29],
-			[17,23,29,35],
-			[6,12,18,24],
-			[12,18,24,30],
-			[18,24,30,36],
-			[13,19,25,31],
-			[19,25,31,37],
-			[20,26,32,38],
-
-			[3,11,19,27],
-			[2,10,18,26],
-			[10,18,26,34],
-			[1,9,17,25],
-			[9,17,25,33],
-			[17,25,33,41],
-			[0,8,16,24],
-			[8,16,24,32],
-			[16,24,32,40],
-			[7,15,23,31],
-			[15,23,31,39],
-			[14,22,30,38],
-			]
-		self.playerTurn = playerTurn
-		self.binary = self._binary()
-		self.id = self._convertStateToId()
-		self.allowedActions = self._allowedActions()
-		self.isEndGame = self._checkForEndGame()
-		self.value = self._getValue()
-		self.score = self._getScore()
-
-	def _allowedActions(self):
-		allowed = []
-		for i in range(len(self.board)):
-			if i >= len(self.board) - 7:
-				if self.board[i]==0:
-					allowed.append(i)
-			else:
-				if self.board[i] == 0 and self.board[i+7] != 0:
-					allowed.append(i)
-
-		return allowed
-
-	def _binary(self):
-
-		currentplayer_position = np.zeros(len(self.board), dtype=np.int)
-		currentplayer_position[self.board==self.playerTurn] = 1
-
-		other_position = np.zeros(len(self.board), dtype=np.int)
-		other_position[self.board==-self.playerTurn] = 1
-
-		position = np.append(currentplayer_position,other_position)
-
-		return (position)
-
-	def _convertStateToId(self):
-		player1_position = np.zeros(len(self.board), dtype=np.int)
-		player1_position[self.board==1] = 1
-
-		other_position = np.zeros(len(self.board), dtype=np.int)
-		other_position[self.board==-1] = 1
-
-		position = np.append(player1_position,other_position)
-
-		id = ''.join(map(str,position))
-
-		return id
-
-	def _checkForEndGame(self):
-		if np.count_nonzero(self.board) == 42:
-			return 1
-
-		for x,y,z,a in self.winners:
-			if (self.board[x] + self.board[y] + self.board[z] + self.board[a] == 4 * -self.playerTurn):
-				return 1
-		return 0
-
-
-	def _getValue(self):
-		# This is the value of the state for the current player
-		# i.e. if the previous player played a winning move, you lose
-		for x,y,z,a in self.winners:
-			if (self.board[x] + self.board[y] + self.board[z] + self.board[a] == 4 * -self.playerTurn):
-				return (-1, -1, 1)
-		return (0, 0, 0)
-
-
-	def _getScore(self):
-		tmp = self.value
-		return (tmp[1], tmp[2])
-
-
-
-
-	def takeAction(self, action):
-		newBoard = np.array(self.board)
-		newBoard[action]=self.playerTurn
-		
-		newState = GameState(newBoard, -self.playerTurn)
-
-		value = 0
-		done = 0
-
-		if newState.isEndGame:
-			value = newState.value[0]
-			done = 1
-
-		return (newState, value, done) 
-
-
-
-
-	def render(self, logger):
-		for r in range(6):
-			logger.info([self.pieces[str(x)] for x in self.board[7*r : (7*r + 7)]])
-		logger.info('--------------')
-
-
-
+    def __init__(self, board, playerTurn):
+        self.board = board
+        self.pieces = {'1': 'X', '0': '-', '-1': 'O'}
+        self.winners = [
+            [0, 1, 2, 3],
+            [1, 2, 3, 4],
+            [2, 3, 4, 5],
+            [3, 4, 5, 6],
+            [7, 8, 9, 10],
+            [8, 9, 10, 11],
+            [9, 10, 11, 12],
+            [10, 11, 12, 13],
+            [14, 15, 16, 17],
+            [15, 16, 17, 18],
+            [16, 17, 18, 19],
+            [17, 18, 19, 20],
+            [21, 22, 23, 24],
+            [22, 23, 24, 25],
+            [23, 24, 25, 26],
+            [24, 25, 26, 27],
+            [28, 29, 30, 31],
+            [29, 30, 31, 32],
+            [30, 31, 32, 33],
+            [31, 32, 33, 34],
+            [35, 36, 37, 38],
+            [36, 37, 38, 39],
+            [37, 38, 39, 40],
+            [38, 39, 40, 41],
+
+            [0, 7, 14, 21],
+            [7, 14, 21, 28],
+            [14, 21, 28, 35],
+            [1, 8, 15, 22],
+            [8, 15, 22, 29],
+            [15, 22, 29, 36],
+            [2, 9, 16, 23],
+            [9, 16, 23, 30],
+            [16, 23, 30, 37],
+            [3, 10, 17, 24],
+            [10, 17, 24, 31],
+            [17, 24, 31, 38],
+            [4, 11, 18, 25],
+            [11, 18, 25, 32],
+            [18, 25, 32, 39],
+            [5, 12, 19, 26],
+            [12, 19, 26, 33],
+            [19, 26, 33, 40],
+            [6, 13, 20, 27],
+            [13, 20, 27, 34],
+            [20, 27, 34, 41],
+
+            [3, 9, 15, 21],
+            [4, 10, 16, 22],
+            [10, 16, 22, 28],
+            [5, 11, 17, 23],
+            [11, 17, 23, 29],
+            [17, 23, 29, 35],
+            [6, 12, 18, 24],
+            [12, 18, 24, 30],
+            [18, 24, 30, 36],
+            [13, 19, 25, 31],
+            [19, 25, 31, 37],
+            [20, 26, 32, 38],
+
+            [3, 11, 19, 27],
+            [2, 10, 18, 26],
+            [10, 18, 26, 34],
+            [1, 9, 17, 25],
+            [9, 17, 25, 33],
+            [17, 25, 33, 41],
+            [0, 8, 16, 24],
+            [8, 16, 24, 32],
+            [16, 24, 32, 40],
+            [7, 15, 23, 31],
+            [15, 23, 31, 39],
+            [14, 22, 30, 38],
+        ]
+        self.playerTurn = playerTurn
+        self.binary = self._binary()
+        self.id = self._convertStateToId()
+        self.allowedActions = self._allowedActions()
+        self.isEndGame = self._checkForEndGame()
+        self.value = self._getValue()
+        self.score = self._getScore()
+
+    def _allowedActions(self):
+        allowed = []
+        for i in range(len(self.board)):
+            if i >= len(self.board) - 7:
+                if self.board[i] == 0:
+                    allowed.append(i)
+            else:
+                if self.board[i] == 0 and self.board[i + 7] != 0:
+                    allowed.append(i)
+
+        return allowed
+
+    def _binary(self):
+
+        currentplayer_position = np.zeros(len(self.board), dtype=np.int)
+        currentplayer_position[self.board == self.playerTurn] = 1
+
+        other_position = np.zeros(len(self.board), dtype=np.int)
+        other_position[self.board == -self.playerTurn] = 1
+
+        position = np.append(currentplayer_position, other_position)
+
+        return (position)
+
+    def _convertStateToId(self):
+        player1_position = np.zeros(len(self.board), dtype=np.int)
+        player1_position[self.board == 1] = 1
+
+        other_position = np.zeros(len(self.board), dtype=np.int)
+        other_position[self.board == -1] = 1
+
+        position = np.append(player1_position, other_position)
+
+        id = ''.join(map(str, position))
+
+        return id
+
+    def _checkForEndGame(self):
+        if np.count_nonzero(self.board) == 42:
+            return 1
+
+        for x, y, z, a in self.winners:
+            if (self.board[x] + self.board[y] + self.board[z] + self.board[a] == 4 * -self.playerTurn):
+                return 1
+        return 0
+
+    def _getValue(self):
+        # This is the value of the state for the current player
+        # i.e. if the previous player played a winning move, you lose
+        for x, y, z, a in self.winners:
+            if (self.board[x] + self.board[y] + self.board[z] + self.board[a] == 4 * -self.playerTurn):
+                return (-1, -1, 1)
+        return (0, 0, 0)
+
+    def _getScore(self):
+        tmp = self.value
+        return (tmp[1], tmp[2])
+
+    def takeAction(self, action):
+        newBoard = np.array(self.board)
+        newBoard[action] = self.playerTurn
+
+        newState = GameState(newBoard, -self.playerTurn)
+
+        value = 0
+        done = 0
+
+        if newState.isEndGame:
+            value = newState.value[0]
+            done = 1
+
+        return (newState, value, done)
+
+    def render(self, logger):
+        for r in range(6):
+            logger.info([self.pieces[str(x)] for x in self.board[7 * r: (7 * r + 7)]])
+        logger.info('--------------')
diff --git a/games/metasquares/game.py b/games/metasquares/game.py
index 7418280f..e4c719ec 100644
--- a/games/metasquares/game.py
+++ b/games/metasquares/game.py
@@ -1,271 +1,266 @@
 import numpy as np
 import logging
 
-class Game:
-
-	def __init__(self):		
-		self.currentPlayer = 1
-		self.gameState = GameState(np.array([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], dtype=np.int), 1)
-		self.actionSpace = np.array([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], dtype=np.int)
-		self.pieces = {'1':'X', '0': '-', '-1':'O'}
-		self.grid_shape = (5,5)
-		self.input_shape = (2,5,5)
-		self.name = 'metaSquares'
-		self.state_size = len(self.gameState.binary)
-		self.action_size = len(self.actionSpace)
-
-	def reset(self):
-		self.gameState = GameState(np.array([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], dtype=np.int), 1)
-		self.currentPlayer = 1
-		return self.gameState
-
-	def step(self, action):
-		next_state, value, done = self.gameState.takeAction(action)
-		self.gameState = next_state
-		self.currentPlayer = -self.currentPlayer
-		info = None
-		return ((next_state, value, done, info))
-
-	def identities(self, state, actionValues):
-		identities = []
-		currentBoard = state.board
-		currentAV = actionValues
-
-		for n in range(5):
-			currentBoard = np.array([
-						  currentBoard[20], currentBoard[15],currentBoard[10], currentBoard[5],currentBoard[0]
-						, currentBoard[21], currentBoard[16],currentBoard[11], currentBoard[6],currentBoard[1]
-						, currentBoard[22], currentBoard[17],currentBoard[12], currentBoard[7],currentBoard[2]
-						, currentBoard[23], currentBoard[18],currentBoard[13], currentBoard[8],currentBoard[3]
-						, currentBoard[24], currentBoard[19],currentBoard[14], currentBoard[9],currentBoard[4]
-						])
-
-			currentAV = np.array([
-						  currentAV[20], currentAV[15],currentAV[10], currentAV[5],currentAV[0]
-						, currentAV[21], currentAV[16],currentAV[11], currentAV[6],currentAV[1]
-						, currentAV[22], currentAV[17],currentAV[12], currentAV[7],currentAV[2]
-						, currentAV[23], currentAV[18],currentAV[13], currentAV[8],currentAV[3]
-						, currentAV[24], currentAV[19],currentAV[14], currentAV[9],currentAV[4]
-
-						])
-			
-			identities.append((GameState(currentBoard, state.playerTurn), currentAV))
-
-		currentBoard = np.array([
-					  currentBoard[4], currentBoard[3],currentBoard[2], currentBoard[1],currentBoard[0]
-					, currentBoard[9], currentBoard[8],currentBoard[7], currentBoard[6],currentBoard[5]
-					, currentBoard[14], currentBoard[13],currentBoard[12], currentBoard[11],currentBoard[10]
-					, currentBoard[19], currentBoard[18],currentBoard[17], currentBoard[16],currentBoard[15]
-					, currentBoard[24], currentBoard[23],currentBoard[22], currentBoard[21],currentBoard[20]
-					])
-
-		currentAV = np.array([
-					  currentAV[4], currentAV[3],currentAV[2], currentAV[1],currentAV[0]
-					, currentAV[9], currentAV[8],currentAV[7], currentAV[6],currentAV[5]
-					, currentAV[14], currentAV[13],currentAV[12], currentAV[11],currentAV[10]
-					, currentAV[19], currentAV[18],currentAV[17], currentAV[16],currentAV[15]
-					, currentAV[24], currentAV[23],currentAV[22], currentAV[21],currentAV[20]
-
-					])
-
-		for n in range(5):
-			currentBoard = np.array([
-						  currentBoard[20], currentBoard[15],currentBoard[10], currentBoard[5],currentBoard[0]
-						, currentBoard[21], currentBoard[16],currentBoard[11], currentBoard[6],currentBoard[1]
-						, currentBoard[22], currentBoard[17],currentBoard[12], currentBoard[7],currentBoard[2]
-						, currentBoard[23], currentBoard[18],currentBoard[13], currentBoard[8],currentBoard[3]
-						, currentBoard[24], currentBoard[19],currentBoard[14], currentBoard[9],currentBoard[4]
-						])
-
-			currentAV = np.array([
-						  currentAV[20], currentAV[15],currentAV[10], currentAV[5],currentAV[0]
-						, currentAV[21], currentAV[16],currentAV[11], currentAV[6],currentAV[1]
-						, currentAV[22], currentAV[17],currentAV[12], currentAV[7],currentAV[2]
-						, currentAV[23], currentAV[18],currentAV[13], currentAV[8],currentAV[3]
-						, currentAV[24], currentAV[19],currentAV[14], currentAV[9],currentAV[4]
-
-						])
-
-			identities.append((GameState(currentBoard, state.playerTurn), currentAV))
-
-		return identities
 
+class Game:
 
+    def __init__(self):
+        self.currentPlayer = 1
+        self.gameState = GameState(
+            np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=np.int), 1)
+        self.actionSpace = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                                    dtype=np.int)
+        self.pieces = {'1': 'X', '0': '-', '-1': 'O'}
+        self.grid_shape = (5, 5)
+        self.input_shape = (2, 5, 5)
+        self.name = 'metaSquares'
+        self.state_size = len(self.gameState.binary)
+        self.action_size = len(self.actionSpace)
+
+    def reset(self):
+        self.gameState = GameState(
+            np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=np.int), 1)
+        self.currentPlayer = 1
+        return self.gameState
+
+    def step(self, action):
+        next_state, value, done = self.gameState.takeAction(action)
+        self.gameState = next_state
+        self.currentPlayer = -self.currentPlayer
+        info = None
+        return ((next_state, value, done, info))
+
+    def identities(self, state, actionValues):
+        identities = []
+        currentBoard = state.board
+        currentAV = actionValues
+
+        for n in range(5):
+            currentBoard = np.array([
+                currentBoard[20], currentBoard[15], currentBoard[10], currentBoard[5], currentBoard[0]
+                , currentBoard[21], currentBoard[16], currentBoard[11], currentBoard[6], currentBoard[1]
+                , currentBoard[22], currentBoard[17], currentBoard[12], currentBoard[7], currentBoard[2]
+                , currentBoard[23], currentBoard[18], currentBoard[13], currentBoard[8], currentBoard[3]
+                , currentBoard[24], currentBoard[19], currentBoard[14], currentBoard[9], currentBoard[4]
+            ])
+
+            currentAV = np.array([
+                currentAV[20], currentAV[15], currentAV[10], currentAV[5], currentAV[0]
+                , currentAV[21], currentAV[16], currentAV[11], currentAV[6], currentAV[1]
+                , currentAV[22], currentAV[17], currentAV[12], currentAV[7], currentAV[2]
+                , currentAV[23], currentAV[18], currentAV[13], currentAV[8], currentAV[3]
+                , currentAV[24], currentAV[19], currentAV[14], currentAV[9], currentAV[4]
+
+            ])
+
+            identities.append((GameState(currentBoard, state.playerTurn), currentAV))
+
+        currentBoard = np.array([
+            currentBoard[4], currentBoard[3], currentBoard[2], currentBoard[1], currentBoard[0]
+            , currentBoard[9], currentBoard[8], currentBoard[7], currentBoard[6], currentBoard[5]
+            , currentBoard[14], currentBoard[13], currentBoard[12], currentBoard[11], currentBoard[10]
+            , currentBoard[19], currentBoard[18], currentBoard[17], currentBoard[16], currentBoard[15]
+            , currentBoard[24], currentBoard[23], currentBoard[22], currentBoard[21], currentBoard[20]
+        ])
+
+        currentAV = np.array([
+            currentAV[4], currentAV[3], currentAV[2], currentAV[1], currentAV[0]
+            , currentAV[9], currentAV[8], currentAV[7], currentAV[6], currentAV[5]
+            , currentAV[14], currentAV[13], currentAV[12], currentAV[11], currentAV[10]
+            , currentAV[19], currentAV[18], currentAV[17], currentAV[16], currentAV[15]
+            , currentAV[24], currentAV[23], currentAV[22], currentAV[21], currentAV[20]
+
+        ])
+
+        for n in range(5):
+            currentBoard = np.array([
+                currentBoard[20], currentBoard[15], currentBoard[10], currentBoard[5], currentBoard[0]
+                , currentBoard[21], currentBoard[16], currentBoard[11], currentBoard[6], currentBoard[1]
+                , currentBoard[22], currentBoard[17], currentBoard[12], currentBoard[7], currentBoard[2]
+                , currentBoard[23], currentBoard[18], currentBoard[13], currentBoard[8], currentBoard[3]
+                , currentBoard[24], currentBoard[19], currentBoard[14], currentBoard[9], currentBoard[4]
+            ])
+
+            currentAV = np.array([
+                currentAV[20], currentAV[15], currentAV[10], currentAV[5], currentAV[0]
+                , currentAV[21], currentAV[16], currentAV[11], currentAV[6], currentAV[1]
+                , currentAV[22], currentAV[17], currentAV[12], currentAV[7], currentAV[2]
+                , currentAV[23], currentAV[18], currentAV[13], currentAV[8], currentAV[3]
+                , currentAV[24], currentAV[19], currentAV[14], currentAV[9], currentAV[4]
+
+            ])
+
+            identities.append((GameState(currentBoard, state.playerTurn), currentAV))
+
+        return identities
 
 
 class GameState():
-	def __init__(self, board, playerTurn):
-		self.board = board
-		self.pieces = {'1':'X', '0': '-', '-1':'O'}
-		self.winners = [
-			{'points': 1, 'tiles' : [
-			[0,1,5,6]
-			,[1,2,6,7]
-			,[2,3,7,8]
-			,[3,4,8,9]
-			,[5,6,10,11]
-			,[6,7,11,12]
-			,[7,8,12,13]
-			,[8,9,13,14]
-			,[10,11,15,16]
-			,[11,12,16,17]
-			,[12,13,17,18]
-			,[13,14,18,19]
-			,[15,16,20,21]
-			,[16,17,21,22]
-			,[17,18,22,23]
-			,[18,19,23,24]
-			]},
-			{'points': 2, 'tiles' : [
-			[1,5,7,11]
-			,[2,6,8,12]
-			,[3,7,9,13]
-			,[6,10,12,16]
-			,[7,11,13,17]
-			,[8,12,14,18]
-			,[11,15,17,21]
-			,[12,16,18,22]
-			,[13,17,19,23]
-			]},
-			{'points': 4, 'tiles' : [
-			[0,2,10,12]
-			,[1,3,11,13]
-			,[2,4,12,14]
-			,[5,7,15,17]
-			,[6,8,16,18]
-			,[7,9,17,19]
-			,[10,12,20,22]
-			,[11,13,21,23]
-			,[12,14,22,24]
-			]},
-			{'points': 5, 'tiles' : [
-			[1,10,8,17]
-			,[6,15,13,22]
-			,[2,11,9,18]
-			,[7,16,14,23]
-			,[2,5,13,16]
-			,[7,10,18,21]
-			,[3,6,14,17]
-			,[8,11,19,22]
-			]},
-			{'points': 8, 'tiles' : [
-			[2,10,14,22]
-			]},
-			{'points': 9, 'tiles' : [
-			[0,3,15,18]
-			,[1,4,16,19]
-			,[5,8,20,23]
-			,[6,9,21,24]
-			]},
-			{'points': 10, 'tiles' : [
-			[1,9,23,15]
-			,[5,3,19,21]
-			]},
-			{'points': 16, 'tiles' : [
-			[0,4,20,24]
-			]},
-			]
-		self.playerTurn = playerTurn
-		self.binary = self._binary()
-		self.id = self._convertStateToId()
-		self.allowedActions = self._allowedActions()
-		self.isEndGame = self._checkForEndGame()
-		self.value = self._getValue()
-		self.score = self._getScore()
-
-	def _allowedActions(self):
-		return np.where(self.board == 0)[0]
-
-	def _binary(self):
-
-		currentplayer_position = np.zeros(len(self.board), dtype=np.int)
-		currentplayer_position[self.board==self.playerTurn] = 1
-
-		other_position = np.zeros(len(self.board), dtype=np.int)
-		other_position[self.board==-self.playerTurn] = 1
-
-		position = np.append(currentplayer_position,other_position)
-
-		return (position)
-
-	def _convertStateToId(self):
-		player1_position = np.zeros(len(self.board), dtype=np.int)
-		player1_position[self.board==1] = 1
-
-		other_position = np.zeros(len(self.board), dtype=np.int)
-		other_position[self.board==-1] = 1
-
-		position = np.append(player1_position,other_position)
-
-		id = ''.join(map(str,position))
-
-		return id
-
-
-
-
-	def _checkForEndGame(self):
-		if np.count_nonzero(self.board) == 24:
-			return 1
-		return 0
-
-	def _getValue(self):
-		currentPlayerPoints = 0
-		for squareType in self.winners:
-			points = squareType['points']
-			for tiles in squareType['tiles']:
-				checkFlag = 0
-				tilenum = 0
-				while tilenum < 4 and checkFlag == 0:
-					if self.board[tiles[tilenum]] != self.playerTurn:
-						checkFlag = 1
-					tilenum = tilenum + 1
-				if checkFlag == 0:
-					currentPlayerPoints = currentPlayerPoints + points
-					
-		opponentPlayerPoints = 0
-		for squareType in self.winners:
-			points = squareType['points']
-			for tiles in squareType['tiles']:
-				checkFlag = 0
-				tilenum = 0
-				while tilenum < 4 and checkFlag == 0:
-					if self.board[tiles[tilenum]] != -self.playerTurn:
-						checkFlag = 1
-					tilenum = tilenum + 1
-				if checkFlag == 0:
-					opponentPlayerPoints = opponentPlayerPoints + points
-
-		if currentPlayerPoints > opponentPlayerPoints:
-			return (1, currentPlayerPoints, opponentPlayerPoints)
-		elif currentPlayerPoints < opponentPlayerPoints:
-			return (-1, currentPlayerPoints, opponentPlayerPoints)
-		else:
-			return (0, currentPlayerPoints, opponentPlayerPoints)
-
-
-	def _getScore(self):
-		tmp = self.value
-		return (tmp[1], tmp[2])
-
-	def takeAction(self, action):
-		newBoard = np.array(self.board)
-		newBoard[action] = self.playerTurn
-		newState = GameState(newBoard, -self.playerTurn)
-
-		value = 0
-		done = 0
-
-		if newState.isEndGame:
-			value = newState.value[0]
-			done = 1
-
-		return (newState, value, done) 
-
-	def render(self, logger):
-		for r in range(5):
-			logger.info([self.pieces[str(x)] for x in self.board[5*r : (5*r + 5)]])
-		logger.info('--------------')
-
-
-
+    def __init__(self, board, playerTurn):
+        self.board = board
+        self.pieces = {'1': 'X', '0': '-', '-1': 'O'}
+        self.winners = [
+            {'points': 1, 'tiles': [
+                [0, 1, 5, 6]
+                , [1, 2, 6, 7]
+                , [2, 3, 7, 8]
+                , [3, 4, 8, 9]
+                , [5, 6, 10, 11]
+                , [6, 7, 11, 12]
+                , [7, 8, 12, 13]
+                , [8, 9, 13, 14]
+                , [10, 11, 15, 16]
+                , [11, 12, 16, 17]
+                , [12, 13, 17, 18]
+                , [13, 14, 18, 19]
+                , [15, 16, 20, 21]
+                , [16, 17, 21, 22]
+                , [17, 18, 22, 23]
+                , [18, 19, 23, 24]
+            ]},
+            {'points': 2, 'tiles': [
+                [1, 5, 7, 11]
+                , [2, 6, 8, 12]
+                , [3, 7, 9, 13]
+                , [6, 10, 12, 16]
+                , [7, 11, 13, 17]
+                , [8, 12, 14, 18]
+                , [11, 15, 17, 21]
+                , [12, 16, 18, 22]
+                , [13, 17, 19, 23]
+            ]},
+            {'points': 4, 'tiles': [
+                [0, 2, 10, 12]
+                , [1, 3, 11, 13]
+                , [2, 4, 12, 14]
+                , [5, 7, 15, 17]
+                , [6, 8, 16, 18]
+                , [7, 9, 17, 19]
+                , [10, 12, 20, 22]
+                , [11, 13, 21, 23]
+                , [12, 14, 22, 24]
+            ]},
+            {'points': 5, 'tiles': [
+                [1, 10, 8, 17]
+                , [6, 15, 13, 22]
+                , [2, 11, 9, 18]
+                , [7, 16, 14, 23]
+                , [2, 5, 13, 16]
+                , [7, 10, 18, 21]
+                , [3, 6, 14, 17]
+                , [8, 11, 19, 22]
+            ]},
+            {'points': 8, 'tiles': [
+                [2, 10, 14, 22]
+            ]},
+            {'points': 9, 'tiles': [
+                [0, 3, 15, 18]
+                , [1, 4, 16, 19]
+                , [5, 8, 20, 23]
+                , [6, 9, 21, 24]
+            ]},
+            {'points': 10, 'tiles': [
+                [1, 9, 23, 15]
+                , [5, 3, 19, 21]
+            ]},
+            {'points': 16, 'tiles': [
+                [0, 4, 20, 24]
+            ]},
+        ]
+        self.playerTurn = playerTurn
+        self.binary = self._binary()
+        self.id = self._convertStateToId()
+        self.allowedActions = self._allowedActions()
+        self.isEndGame = self._checkForEndGame()
+        self.value = self._getValue()
+        self.score = self._getScore()
+
+    def _allowedActions(self):
+        return np.where(self.board == 0)[0]
+
+    def _binary(self):
+
+        currentplayer_position = np.zeros(len(self.board), dtype=np.int)
+        currentplayer_position[self.board == self.playerTurn] = 1
+
+        other_position = np.zeros(len(self.board), dtype=np.int)
+        other_position[self.board == -self.playerTurn] = 1
+
+        position = np.append(currentplayer_position, other_position)
+
+        return (position)
+
+    def _convertStateToId(self):
+        player1_position = np.zeros(len(self.board), dtype=np.int)
+        player1_position[self.board == 1] = 1
+
+        other_position = np.zeros(len(self.board), dtype=np.int)
+        other_position[self.board == -1] = 1
+
+        position = np.append(player1_position, other_position)
+
+        id = ''.join(map(str, position))
+
+        return id
+
+    def _checkForEndGame(self):
+        if np.count_nonzero(self.board) == 24:
+            return 1
+        return 0
+
+    def _getValue(self):
+        currentPlayerPoints = 0
+        for squareType in self.winners:
+            points = squareType['points']
+            for tiles in squareType['tiles']:
+                checkFlag = 0
+                tilenum = 0
+                while tilenum < 4 and checkFlag == 0:
+                    if self.board[tiles[tilenum]] != self.playerTurn:
+                        checkFlag = 1
+                    tilenum = tilenum + 1
+                if checkFlag == 0:
+                    currentPlayerPoints = currentPlayerPoints + points
+
+        opponentPlayerPoints = 0
+        for squareType in self.winners:
+            points = squareType['points']
+            for tiles in squareType['tiles']:
+                checkFlag = 0
+                tilenum = 0
+                while tilenum < 4 and checkFlag == 0:
+                    if self.board[tiles[tilenum]] != -self.playerTurn:
+                        checkFlag = 1
+                    tilenum = tilenum + 1
+                if checkFlag == 0:
+                    opponentPlayerPoints = opponentPlayerPoints + points
+
+        if currentPlayerPoints > opponentPlayerPoints:
+            return (1, currentPlayerPoints, opponentPlayerPoints)
+        elif currentPlayerPoints < opponentPlayerPoints:
+            return (-1, currentPlayerPoints, opponentPlayerPoints)
+        else:
+            return (0, currentPlayerPoints, opponentPlayerPoints)
+
+    def _getScore(self):
+        tmp = self.value
+        return (tmp[1], tmp[2])
+
+    def takeAction(self, action):
+        newBoard = np.array(self.board)
+        newBoard[action] = self.playerTurn
+        newState = GameState(newBoard, -self.playerTurn)
+
+        value = 0
+        done = 0
+
+        if newState.isEndGame:
+            value = newState.value[0]
+            done = 1
+
+        return (newState, value, done)
+
+    def render(self, logger):
+        for r in range(5):
+            logger.info([self.pieces[str(x)] for x in self.board[5 * r: (5 * r + 5)]])
+        logger.info('--------------')
diff --git a/initialise.py b/initialise.py
index 88126380..28feb7e1 100644
--- a/initialise.py
+++ b/initialise.py
@@ -1,3 +1,3 @@
 INITIAL_RUN_NUMBER = None
 INITIAL_MODEL_VERSION = None
-INITIAL_MEMORY_VERSION =  None
+INITIAL_MEMORY_VERSION = None
diff --git a/loggers.py b/loggers.py
index 901cc3d8..18f7c481 100644
--- a/loggers.py
+++ b/loggers.py
@@ -1,4 +1,3 @@
-
 from utils import setup_logger
 from settings import run_folder
 
@@ -6,12 +5,11 @@
 ### WARNING: the mcts log file gets big quite quickly
 
 LOGGER_DISABLED = {
-'main':False
-, 'memory':False
-, 'tourney':False
-, 'mcts':False
-, 'model': False}
-
+    'main': False
+    , 'memory': False
+    , 'tourney': False
+    , 'mcts': False
+    , 'model': False}
 
 logger_mcts = setup_logger('logger_mcts', run_folder + 'logs/logger_mcts.log')
 logger_mcts.disabled = LOGGER_DISABLED['mcts']
@@ -27,4 +25,3 @@
 
 logger_model = setup_logger('logger_model', run_folder + 'logs/logger_model.log')
 logger_model.disabled = LOGGER_DISABLED['model']
- 
\ No newline at end of file
diff --git a/loss.py b/loss.py
index 44035040..cef1f3c1 100644
--- a/loss.py
+++ b/loss.py
@@ -1,19 +1,16 @@
-
 import tensorflow as tf
 
-def softmax_cross_entropy_with_logits(y_true, y_pred):
-
-	p = y_pred
-	pi = y_true
 
-	zero = tf.zeros(shape = tf.shape(pi), dtype=tf.float32)
-	where = tf.equal(pi, zero)
-
-	negatives = tf.fill(tf.shape(pi), -100.0) 
-	p = tf.where(where, negatives, p)
+def softmax_cross_entropy_with_logits(y_true, y_pred):
+    p = y_pred
+    pi = y_true
 
-	loss = tf.nn.softmax_cross_entropy_with_logits(labels = pi, logits = p)
+    zero = tf.zeros(shape=tf.shape(pi), dtype=tf.float32)
+    where = tf.equal(pi, zero)
 
-	return loss
+    negatives = tf.fill(tf.shape(pi), -100.0)
+    p = tf.where(where, negatives, p)
 
+    loss = tf.nn.softmax_cross_entropy_with_logits(labels=pi, logits=p)
 
+    return loss
diff --git a/main.py b/main.py
index 3a498d7a..bed8c668 100644
--- a/main.py
+++ b/main.py
@@ -2,13 +2,13 @@
 # %matplotlib inline
 
 import numpy as np
+
 np.set_printoptions(suppress=True)
 
 from shutil import copyfile
 import random
 from importlib import reload
 
-
 from keras.utils import plot_model
 
 from game import Game, GameState
@@ -23,7 +23,6 @@
 import initialise
 import pickle
 
-
 lg.logger_main.info('=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*')
 lg.logger_main.info('=*=*=*=*=*=.      NEW LOG      =*=*=*=*=*')
 lg.logger_main.info('=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*')
@@ -32,7 +31,8 @@
 
 # If loading an existing neural network, copy the config file to root
 if initialise.INITIAL_RUN_NUMBER != None:
-    copyfile(run_archive_folder  + env.name + '/run' + str(initialise.INITIAL_RUN_NUMBER).zfill(4) + '/config.py', './config.py')
+    copyfile(run_archive_folder + env.name + '/run' + str(initialise.INITIAL_RUN_NUMBER).zfill(4) + '/config.py',
+             './config.py')
 
 import config
 
@@ -42,29 +42,33 @@
     memory = Memory(config.MEMORY_SIZE)
 else:
     print('LOADING MEMORY VERSION ' + str(initialise.INITIAL_MEMORY_VERSION) + '...')
-    memory = pickle.load( open( run_archive_folder + env.name + '/run' + str(initialise.INITIAL_RUN_NUMBER).zfill(4) + "/memory/memory" + str(initialise.INITIAL_MEMORY_VERSION).zfill(4) + ".p",   "rb" ) )
+    memory = pickle.load(open(
+        run_archive_folder + env.name + '/run' + str(initialise.INITIAL_RUN_NUMBER).zfill(4) + "/memory/memory" + str(
+            initialise.INITIAL_MEMORY_VERSION).zfill(4) + ".p", "rb"))
 
 ######## LOAD MODEL IF NECESSARY ########
 
 # create an untrained neural network objects from the config file
-current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2,) + env.grid_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)
-best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2,) +  env.grid_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)
+current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2,) + env.grid_shape, env.action_size,
+                          config.HIDDEN_CNN_LAYERS)
+best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2,) + env.grid_shape, env.action_size,
+                       config.HIDDEN_CNN_LAYERS)
 
-#If loading an existing neural netwrok, set the weights from that model
+# If loading an existing neural netwrok, set the weights from that model
 if initialise.INITIAL_MODEL_VERSION != None:
-    best_player_version  = initialise.INITIAL_MODEL_VERSION
+    best_player_version = initialise.INITIAL_MODEL_VERSION
     print('LOADING MODEL VERSION ' + str(initialise.INITIAL_MODEL_VERSION) + '...')
     m_tmp = best_NN.read(env.name, initialise.INITIAL_RUN_NUMBER, best_player_version)
     current_NN.model.set_weights(m_tmp.get_weights())
     best_NN.model.set_weights(m_tmp.get_weights())
-#otherwise just ensure the weights on the two players are the same
+# otherwise just ensure the weights on the two players are the same
 else:
     best_player_version = 0
     best_NN.model.set_weights(current_NN.model.get_weights())
 
-#copy the config file to the run folder
+# copy the config file to the run folder
 copyfile('./config.py', run_folder + 'config.py')
-plot_model(current_NN.model, to_file=run_folder + 'models/model.png', show_shapes = True)
+plot_model(current_NN.model, to_file=run_folder + 'models/model.png', show_shapes=True)
 
 print('\n')
 
@@ -72,7 +76,7 @@
 
 current_player = Agent('current_player', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, current_NN)
 best_player = Agent('best_player', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, best_NN)
-#user_player = User('player1', env.state_size, env.action_size)
+# user_player = User('player1', env.state_size, env.action_size)
 iteration = 0
 
 while 1:
@@ -80,19 +84,20 @@
     iteration += 1
     reload(lg)
     reload(config)
-    
+
     print('ITERATION NUMBER ' + str(iteration))
-    
+
     lg.logger_main.info('BEST PLAYER VERSION: %d', best_player_version)
     print('BEST PLAYER VERSION ' + str(best_player_version))
 
     ######## SELF PLAY ########
     print('SELF PLAYING ' + str(config.EPISODES) + ' EPISODES...')
-    _, memory, _, _ = playMatches(best_player, best_player, config.EPISODES, lg.logger_main, turns_until_tau0 = config.TURNS_UNTIL_TAU0, memory = memory)
+    _, memory, _, _ = playMatches(best_player, best_player, config.EPISODES, lg.logger_main,
+                                  turns_until_tau0=config.TURNS_UNTIL_TAU0, memory=memory)
     print('\n')
-    
+
     memory.clear_stmemory()
-    
+
     if len(memory.ltmemory) >= config.MEMORY_SIZE:
 
         ######## RETRAINING ########
@@ -101,14 +106,14 @@
         print('')
 
         if iteration % 5 == 0:
-            pickle.dump( memory, open( run_folder + "memory/memory" + str(iteration).zfill(4) + ".p", "wb" ) )
+            pickle.dump(memory, open(run_folder + "memory/memory" + str(iteration).zfill(4) + ".p", "wb"))
 
         lg.logger_memory.info('====================')
         lg.logger_memory.info('NEW MEMORIES')
         lg.logger_memory.info('====================')
-        
+
         memory_samp = random.sample(memory.ltmemory, min(1000, len(memory.ltmemory)))
-        
+
         for s in memory_samp:
             current_value, current_probs, _ = current_player.get_preds(s['state'])
             best_value, best_probs, _ = best_player.get_preds(s['state'])
@@ -116,22 +121,23 @@
             lg.logger_memory.info('MCTS VALUE FOR %s: %f', s['playerTurn'], s['value'])
             lg.logger_memory.info('CUR PRED VALUE FOR %s: %f', s['playerTurn'], current_value)
             lg.logger_memory.info('BES PRED VALUE FOR %s: %f', s['playerTurn'], best_value)
-            lg.logger_memory.info('THE MCTS ACTION VALUES: %s', ['%.2f' % elem for elem in s['AV']]  )
-            lg.logger_memory.info('CUR PRED ACTION VALUES: %s', ['%.2f' % elem for elem in  current_probs])
-            lg.logger_memory.info('BES PRED ACTION VALUES: %s', ['%.2f' % elem for elem in  best_probs])
+            lg.logger_memory.info('THE MCTS ACTION VALUES: %s', ['%.2f' % elem for elem in s['AV']])
+            lg.logger_memory.info('CUR PRED ACTION VALUES: %s', ['%.2f' % elem for elem in current_probs])
+            lg.logger_memory.info('BES PRED ACTION VALUES: %s', ['%.2f' % elem for elem in best_probs])
             lg.logger_memory.info('ID: %s', s['state'].id)
             lg.logger_memory.info('INPUT TO MODEL: %s', current_player.model.convertToModelInput(s['state']))
 
             s['state'].render(lg.logger_memory)
-            
+
         ######## TOURNAMENT ########
         print('TOURNAMENT...')
-        scores, _, points, sp_scores = playMatches(best_player, current_player, config.EVAL_EPISODES, lg.logger_tourney, turns_until_tau0 = 0, memory = None)
+        scores, _, points, sp_scores = playMatches(best_player, current_player, config.EVAL_EPISODES, lg.logger_tourney,
+                                                   turns_until_tau0=0, memory=None)
         print('\nSCORES')
         print(scores)
         print('\nSTARTING PLAYER / NON-STARTING PLAYER SCORES')
         print(sp_scores)
-        #print(points)
+        # print(points)
 
         print('\n\n')
 
diff --git a/memory.py b/memory.py
index 856e0f53..5905f704 100644
--- a/memory.py
+++ b/memory.py
@@ -3,27 +3,27 @@
 
 import config
 
+
 class Memory:
-	def __init__(self, MEMORY_SIZE):
-		self.MEMORY_SIZE = config.MEMORY_SIZE
-		self.ltmemory = deque(maxlen=config.MEMORY_SIZE)
-		self.stmemory = deque(maxlen=config.MEMORY_SIZE)
+    def __init__(self, MEMORY_SIZE):
+        self.MEMORY_SIZE = config.MEMORY_SIZE
+        self.ltmemory = deque(maxlen=config.MEMORY_SIZE)
+        self.stmemory = deque(maxlen=config.MEMORY_SIZE)
 
-	def commit_stmemory(self, identities, state, actionValues):
-		for r in identities(state, actionValues):
-			self.stmemory.append({
-				'board': r[0].board
-				, 'state': r[0]
-				, 'id': r[0].id
-				, 'AV': r[1]
-				, 'playerTurn': r[0].playerTurn
-				})
+    def commit_stmemory(self, identities, state, actionValues):
+        for r in identities(state, actionValues):
+            self.stmemory.append({
+                'board': r[0].board
+                , 'state': r[0]
+                , 'id': r[0].id
+                , 'AV': r[1]
+                , 'playerTurn': r[0].playerTurn
+            })
 
-	def commit_ltmemory(self):
-		for i in self.stmemory:
-			self.ltmemory.append(i)
-		self.clear_stmemory()
+    def commit_ltmemory(self):
+        for i in self.stmemory:
+            self.ltmemory.append(i)
+        self.clear_stmemory()
 
-	def clear_stmemory(self):
-		self.stmemory = deque(maxlen=config.MEMORY_SIZE)
-		
\ No newline at end of file
+    def clear_stmemory(self):
+        self.stmemory = deque(maxlen=config.MEMORY_SIZE)
diff --git a/model.py b/model.py
index a98b27e8..d98bcd79 100644
--- a/model.py
+++ b/model.py
@@ -19,227 +19,229 @@
 
 from settings import run_folder, run_archive_folder
 
+
 class Gen_Model():
-	def __init__(self, reg_const, learning_rate, input_dim, output_dim):
-		self.reg_const = reg_const
-		self.learning_rate = learning_rate
-		self.input_dim = input_dim
-		self.output_dim = output_dim
-
-	def predict(self, x):
-		return self.model.predict(x)
-
-	def fit(self, states, targets, epochs, verbose, validation_split, batch_size):
-		return self.model.fit(states, targets, epochs=epochs, verbose=verbose, validation_split = validation_split, batch_size = batch_size)
-
-	def write(self, game, version):
-		self.model.save(run_folder + 'models/version' + "{0:0>4}".format(version) + '.h5')
-
-	def read(self, game, run_number, version):
-		return load_model( run_archive_folder + game + '/run' + str(run_number).zfill(4) + "/models/version" + "{0:0>4}".format(version) + '.h5', custom_objects={'softmax_cross_entropy_with_logits': softmax_cross_entropy_with_logits})
-
-	def printWeightAverages(self):
-		layers = self.model.layers
-		for i, l in enumerate(layers):
-			try:
-				x = l.get_weights()[0]
-				lg.logger_model.info('WEIGHT LAYER %d: ABSAV = %f, SD =%f, ABSMAX =%f, ABSMIN =%f', i, np.mean(np.abs(x)), np.std(x), np.max(np.abs(x)), np.min(np.abs(x)))
-			except:
-				pass
-		lg.logger_model.info('------------------')
-		for i, l in enumerate(layers):
-			try:
-				x = l.get_weights()[1]
-				lg.logger_model.info('BIAS LAYER %d: ABSAV = %f, SD =%f, ABSMAX =%f, ABSMIN =%f', i, np.mean(np.abs(x)), np.std(x), np.max(np.abs(x)), np.min(np.abs(x)))
-			except:
-				pass
-		lg.logger_model.info('******************')
-
-
-	def viewLayers(self):
-		layers = self.model.layers
-		for i, l in enumerate(layers):
-			x = l.get_weights()
-			print('LAYER ' + str(i))
-
-			try:
-				weights = x[0]
-				s = weights.shape
-
-				fig = plt.figure(figsize=(s[2], s[3]))  # width, height in inches
-				channel = 0
-				filter = 0
-				for i in range(s[2] * s[3]):
-
-					sub = fig.add_subplot(s[3], s[2], i + 1)
-					sub.imshow(weights[:,:,channel,filter], cmap='coolwarm', clim=(-1, 1),aspect="auto")
-					channel = (channel + 1) % s[2]
-					filter = (filter + 1) % s[3]
-
-			except:
-	
-				try:
-					fig = plt.figure(figsize=(3, len(x)))  # width, height in inches
-					for i in range(len(x)):
-						sub = fig.add_subplot(len(x), 1, i + 1)
-						if i == 0:
-							clim = (0,2)
-						else:
-							clim = (0, 2)
-						sub.imshow([x[i]], cmap='coolwarm', clim=clim,aspect="auto")
-						
-					plt.show()
-
-				except:
-					try:
-						fig = plt.figure(figsize=(3, 3))  # width, height in inches
-						sub = fig.add_subplot(1, 1, 1)
-						sub.imshow(x[0], cmap='coolwarm', clim=(-1, 1),aspect="auto")
-						
-						plt.show()
-
-					except:
-						pass
-
-			plt.show()
-				
-		lg.logger_model.info('------------------')
+    def __init__(self, reg_const, learning_rate, input_dim, output_dim):
+        self.reg_const = reg_const
+        self.learning_rate = learning_rate
+        self.input_dim = input_dim
+        self.output_dim = output_dim
+
+    def predict(self, x):
+        return self.model.predict(x)
+
+    def fit(self, states, targets, epochs, verbose, validation_split, batch_size):
+        return self.model.fit(states, targets, epochs=epochs, verbose=verbose, validation_split=validation_split,
+                              batch_size=batch_size)
+
+    def write(self, game, version):
+        self.model.save(run_folder + 'models/version' + "{0:0>4}".format(version) + '.h5')
+
+    def read(self, game, run_number, version):
+        return load_model(
+            run_archive_folder + game + '/run' + str(run_number).zfill(4) + "/models/version" + "{0:0>4}".format(
+                version) + '.h5',
+            custom_objects={'softmax_cross_entropy_with_logits': softmax_cross_entropy_with_logits})
+
+    def printWeightAverages(self):
+        layers = self.model.layers
+        for i, l in enumerate(layers):
+            try:
+                x = l.get_weights()[0]
+                lg.logger_model.info('WEIGHT LAYER %d: ABSAV = %f, SD =%f, ABSMAX =%f, ABSMIN =%f', i,
+                                     np.mean(np.abs(x)), np.std(x), np.max(np.abs(x)), np.min(np.abs(x)))
+            except:
+                pass
+        lg.logger_model.info('------------------')
+        for i, l in enumerate(layers):
+            try:
+                x = l.get_weights()[1]
+                lg.logger_model.info('BIAS LAYER %d: ABSAV = %f, SD =%f, ABSMAX =%f, ABSMIN =%f', i, np.mean(np.abs(x)),
+                                     np.std(x), np.max(np.abs(x)), np.min(np.abs(x)))
+            except:
+                pass
+        lg.logger_model.info('******************')
+
+    def viewLayers(self):
+        layers = self.model.layers
+        for i, l in enumerate(layers):
+            x = l.get_weights()
+            print('LAYER ' + str(i))
+
+            try:
+                weights = x[0]
+                s = weights.shape
+
+                fig = plt.figure(figsize=(s[2], s[3]))  # width, height in inches
+                channel = 0
+                filter = 0
+                for i in range(s[2] * s[3]):
+                    sub = fig.add_subplot(s[3], s[2], i + 1)
+                    sub.imshow(weights[:, :, channel, filter], cmap='coolwarm', clim=(-1, 1), aspect="auto")
+                    channel = (channel + 1) % s[2]
+                    filter = (filter + 1) % s[3]
+
+            except:
+
+                try:
+                    fig = plt.figure(figsize=(3, len(x)))  # width, height in inches
+                    for i in range(len(x)):
+                        sub = fig.add_subplot(len(x), 1, i + 1)
+                        if i == 0:
+                            clim = (0, 2)
+                        else:
+                            clim = (0, 2)
+                        sub.imshow([x[i]], cmap='coolwarm', clim=clim, aspect="auto")
+
+                    plt.show()
+
+                except:
+                    try:
+                        fig = plt.figure(figsize=(3, 3))  # width, height in inches
+                        sub = fig.add_subplot(1, 1, 1)
+                        sub.imshow(x[0], cmap='coolwarm', clim=(-1, 1), aspect="auto")
+
+                        plt.show()
+
+                    except:
+                        pass
+
+            plt.show()
+
+        lg.logger_model.info('------------------')
 
 
 class Residual_CNN(Gen_Model):
-	def __init__(self, reg_const, learning_rate, input_dim,  output_dim, hidden_layers):
-		Gen_Model.__init__(self, reg_const, learning_rate, input_dim, output_dim)
-		self.hidden_layers = hidden_layers
-		self.num_layers = len(hidden_layers)
-		self.model = self._build_model()
-
-	def residual_layer(self, input_block, filters, kernel_size):
-
-		x = self.conv_layer(input_block, filters, kernel_size)
-
-		x = Conv2D(
-		filters = filters
-		, kernel_size = kernel_size
-		, data_format="channels_first"
-		, padding = 'same'
-		, use_bias=False
-		, activation='linear'
-		, kernel_regularizer = regularizers.l2(self.reg_const)
-		)(x)
-
-		x = BatchNormalization(axis=1)(x)
-
-		x = add([input_block, x])
-
-		x = LeakyReLU()(x)
+    def __init__(self, reg_const, learning_rate, input_dim, output_dim, hidden_layers):
+        Gen_Model.__init__(self, reg_const, learning_rate, input_dim, output_dim)
+        self.hidden_layers = hidden_layers
+        self.num_layers = len(hidden_layers)
+        self.model = self._build_model()
+
+    def residual_layer(self, input_block, filters, kernel_size):
+
+        x = self.conv_layer(input_block, filters, kernel_size)
 
-		return (x)
+        x = Conv2D(
+            filters=filters
+            , kernel_size=kernel_size
+            , data_format="channels_first"
+            , padding='same'
+            , use_bias=False
+            , activation='linear'
+            , kernel_regularizer=regularizers.l2(self.reg_const)
+        )(x)
 
-	def conv_layer(self, x, filters, kernel_size):
+        x = BatchNormalization(axis=1)(x)
 
-		x = Conv2D(
-		filters = filters
-		, kernel_size = kernel_size
-		, data_format="channels_first"
-		, padding = 'same'
-		, use_bias=False
-		, activation='linear'
-		, kernel_regularizer = regularizers.l2(self.reg_const)
-		)(x)
+        x = add([input_block, x])
 
-		x = BatchNormalization(axis=1)(x)
-		x = LeakyReLU()(x)
+        x = LeakyReLU()(x)
 
-		return (x)
+        return (x)
 
-	def value_head(self, x):
+    def conv_layer(self, x, filters, kernel_size):
 
-		x = Conv2D(
-		filters = 1
-		, kernel_size = (1,1)
-		, data_format="channels_first"
-		, padding = 'same'
-		, use_bias=False
-		, activation='linear'
-		, kernel_regularizer = regularizers.l2(self.reg_const)
-		)(x)
+        x = Conv2D(
+            filters=filters
+            , kernel_size=kernel_size
+            , data_format="channels_first"
+            , padding='same'
+            , use_bias=False
+            , activation='linear'
+            , kernel_regularizer=regularizers.l2(self.reg_const)
+        )(x)
 
+        x = BatchNormalization(axis=1)(x)
+        x = LeakyReLU()(x)
 
-		x = BatchNormalization(axis=1)(x)
-		x = LeakyReLU()(x)
+        return (x)
 
-		x = Flatten()(x)
+    def value_head(self, x):
 
-		x = Dense(
-			20
-			, use_bias=False
-			, activation='linear'
-			, kernel_regularizer=regularizers.l2(self.reg_const)
-			)(x)
+        x = Conv2D(
+            filters=1
+            , kernel_size=(1, 1)
+            , data_format="channels_first"
+            , padding='same'
+            , use_bias=False
+            , activation='linear'
+            , kernel_regularizer=regularizers.l2(self.reg_const)
+        )(x)
 
-		x = LeakyReLU()(x)
+        x = BatchNormalization(axis=1)(x)
+        x = LeakyReLU()(x)
 
-		x = Dense(
-			1
-			, use_bias=False
-			, activation='tanh'
-			, kernel_regularizer=regularizers.l2(self.reg_const)
-			, name = 'value_head'
-			)(x)
+        x = Flatten()(x)
 
+        x = Dense(
+            20
+            , use_bias=False
+            , activation='linear'
+            , kernel_regularizer=regularizers.l2(self.reg_const)
+        )(x)
 
+        x = LeakyReLU()(x)
 
-		return (x)
+        x = Dense(
+            1
+            , use_bias=False
+            , activation='tanh'
+            , kernel_regularizer=regularizers.l2(self.reg_const)
+            , name='value_head'
+        )(x)
 
-	def policy_head(self, x):
+        return (x)
 
-		x = Conv2D(
-		filters = 2
-		, kernel_size = (1,1)
-		, data_format="channels_first"
-		, padding = 'same'
-		, use_bias=False
-		, activation='linear'
-		, kernel_regularizer = regularizers.l2(self.reg_const)
-		)(x)
+    def policy_head(self, x):
 
-		x = BatchNormalization(axis=1)(x)
-		x = LeakyReLU()(x)
+        x = Conv2D(
+            filters=2
+            , kernel_size=(1, 1)
+            , data_format="channels_first"
+            , padding='same'
+            , use_bias=False
+            , activation='linear'
+            , kernel_regularizer=regularizers.l2(self.reg_const)
+        )(x)
 
-		x = Flatten()(x)
+        x = BatchNormalization(axis=1)(x)
+        x = LeakyReLU()(x)
 
-		x = Dense(
-			self.output_dim
-			, use_bias=False
-			, activation='linear'
-			, kernel_regularizer=regularizers.l2(self.reg_const)
-			, name = 'policy_head'
-			)(x)
+        x = Flatten()(x)
 
-		return (x)
+        x = Dense(
+            self.output_dim
+            , use_bias=False
+            , activation='linear'
+            , kernel_regularizer=regularizers.l2(self.reg_const)
+            , name='policy_head'
+        )(x)
 
-	def _build_model(self):
+        return (x)
 
-		main_input = Input(shape = self.input_dim, name = 'main_input')
+    def _build_model(self):
 
-		x = self.conv_layer(main_input, self.hidden_layers[0]['filters'], self.hidden_layers[0]['kernel_size'])
+        main_input = Input(shape=self.input_dim, name='main_input')
 
-		if len(self.hidden_layers) > 1:
-			for h in self.hidden_layers[1:]:
-				x = self.residual_layer(x, h['filters'], h['kernel_size'])
+        x = self.conv_layer(main_input, self.hidden_layers[0]['filters'], self.hidden_layers[0]['kernel_size'])
 
-		vh = self.value_head(x)
-		ph = self.policy_head(x)
+        if len(self.hidden_layers) > 1:
+            for h in self.hidden_layers[1:]:
+                x = self.residual_layer(x, h['filters'], h['kernel_size'])
 
-		model = Model(inputs=[main_input], outputs=[vh, ph])
-		model.compile(loss={'value_head': 'mean_squared_error', 'policy_head': softmax_cross_entropy_with_logits},
-			optimizer=SGD(lr=self.learning_rate, momentum = config.MOMENTUM),	
-			loss_weights={'value_head': 0.5, 'policy_head': 0.5}	
-			)
+        vh = self.value_head(x)
+        ph = self.policy_head(x)
 
-		return model
+        model = Model(inputs=[main_input], outputs=[vh, ph])
+        model.compile(loss={'value_head': 'mean_squared_error', 'policy_head': softmax_cross_entropy_with_logits},
+                      optimizer=SGD(lr=self.learning_rate, momentum=config.MOMENTUM),
+                      loss_weights={'value_head': 0.5, 'policy_head': 0.5}
+                      )
 
-	def convertToModelInput(self, state):
-		inputToModel =  state.binary #np.append(state.binary, [(state.playerTurn + 1)/2] * self.input_dim[1] * self.input_dim[2])
-		inputToModel = np.reshape(inputToModel, self.input_dim) 
-		return (inputToModel)
+        return model
+
+    def convertToModelInput(self, state):
+        inputToModel = state.binary  # np.append(state.binary, [(state.playerTurn + 1)/2] * self.input_dim[1] * self.input_dim[2])
+        inputToModel = np.reshape(inputToModel, self.input_dim)
+        return (inputToModel)
diff --git a/run.ipynb b/run.ipynb
index d8f42f46..68e47482 100644
--- a/run.ipynb
+++ b/run.ipynb
@@ -90,7 +90,6 @@
     {
      "ename": "KeyboardInterrupt",
      "evalue": "",
-     "output_type": "error",
      "traceback": [
       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
       "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
@@ -102,7 +101,8 @@
       "\u001b[0;32m~/Git/ADSP/Public/AppliedDataSciencePartners/DeepReinforcementLearning/app/game.py\u001b[0m in \u001b[0;36mtakeAction\u001b[0;34m(self, action)\u001b[0m\n\u001b[1;32m    209\u001b[0m                 \u001b[0mnewBoard\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0maction\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplayerTurn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    210\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 211\u001b[0;31m                 \u001b[0mnewState\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mGameState\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnewBoard\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m-\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplayerTurn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    212\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    213\u001b[0m                 \u001b[0mvalue\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
       "\u001b[0;32m~/Git/ADSP/Public/AppliedDataSciencePartners/DeepReinforcementLearning/app/game.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, board, playerTurn)\u001b[0m\n\u001b[1;32m    121\u001b[0m                         \u001b[0;34m[\u001b[0m\u001b[0;36m20\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m26\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m32\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m38\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    122\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 123\u001b[0;31m                         \u001b[0;34m[\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m11\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m19\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m27\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    124\u001b[0m                         \u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m18\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m26\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    125\u001b[0m                         \u001b[0;34m[\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m18\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m26\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m34\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
       "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
-     ]
+     ],
+     "output_type": "error"
     }
    ],
    "source": [
@@ -258,7 +258,6 @@
     {
      "ename": "KeyboardInterrupt",
      "evalue": "",
-     "output_type": "error",
      "traceback": [
       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
       "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
@@ -279,7 +278,8 @@
       "\u001b[0;32m~/.virtualenvs/py3_deepreinforcement/lib/python3.6/site-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36m_do_call\u001b[0;34m(self, fn, *args)\u001b[0m\n\u001b[1;32m   1359\u001b[0m   \u001b[0;32mdef\u001b[0m \u001b[0m_do_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1360\u001b[0m     \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1361\u001b[0;31m       \u001b[0;32mreturn\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1362\u001b[0m     \u001b[0;32mexcept\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mOpError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1363\u001b[0m       \u001b[0mmessage\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcompat\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mas_text\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmessage\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
       "\u001b[0;32m~/.virtualenvs/py3_deepreinforcement/lib/python3.6/site-packages/tensorflow/python/client/session.py\u001b[0m in \u001b[0;36m_run_fn\u001b[0;34m(session, feed_dict, fetch_list, target_list, options, run_metadata)\u001b[0m\n\u001b[1;32m   1338\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1339\u001b[0m           return tf_session.TF_Run(session, options, feed_dict, fetch_list,\n\u001b[0;32m-> 1340\u001b[0;31m                                    target_list, status, run_metadata)\n\u001b[0m\u001b[1;32m   1341\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1342\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_prun_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msession\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhandle\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfeed_dict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfetch_list\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
       "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
-     ]
+     ],
+     "output_type": "error"
     }
    ],
    "source": [
@@ -306,7 +306,6 @@
     {
      "ename": "ValueError",
      "evalue": "cannot reshape array of size 84 into shape (2,5,5)",
-     "output_type": "error",
      "traceback": [
       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
       "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
@@ -316,7 +315,8 @@
       "\u001b[0;32m~/.virtualenvs/py3_deepreinforcement/lib/python3.6/site-packages/numpy/core/fromnumeric.py\u001b[0m in \u001b[0;36mreshape\u001b[0;34m(a, newshape, order)\u001b[0m\n\u001b[1;32m    255\u001b[0m            [5, 6]])\n\u001b[1;32m    256\u001b[0m     \"\"\"\n\u001b[0;32m--> 257\u001b[0;31m     \u001b[0;32mreturn\u001b[0m \u001b[0m_wrapfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'reshape'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnewshape\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0morder\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    258\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    259\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
       "\u001b[0;32m~/.virtualenvs/py3_deepreinforcement/lib/python3.6/site-packages/numpy/core/fromnumeric.py\u001b[0m in \u001b[0;36m_wrapfunc\u001b[0;34m(obj, method, *args, **kwds)\u001b[0m\n\u001b[1;32m     50\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_wrapfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     51\u001b[0m     \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 52\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     53\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     54\u001b[0m     \u001b[0;31m# An AttributeError occurs if the object does not have\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
       "\u001b[0;31mValueError\u001b[0m: cannot reshape array of size 84 into shape (2,5,5)"
-     ]
+     ],
+     "output_type": "error"
     }
    ],
    "source": [
@@ -372,7 +372,6 @@
     {
      "ename": "KeyboardInterrupt",
      "evalue": "",
-     "output_type": "error",
      "traceback": [
       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
       "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
@@ -403,7 +402,8 @@
       "\u001b[0;32m~/.virtualenvs/py3_deepreinforcement/lib/python3.6/site-packages/matplotlib/ticker.py\u001b[0m in \u001b[0;36mset_locs\u001b[0;34m(self, locs)\u001b[0m\n\u001b[1;32m    664\u001b[0m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_compute_offset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    665\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_set_orderOfMagnitude\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0md\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 666\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_set_format\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvmin\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvmax\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    667\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    668\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_compute_offset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
       "\u001b[0;32m~/.virtualenvs/py3_deepreinforcement/lib/python3.6/site-packages/matplotlib/ticker.py\u001b[0m in \u001b[0;36m_set_format\u001b[0;34m(self, vmin, vmax)\u001b[0m\n\u001b[1;32m    741\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    742\u001b[0m             \u001b[0m_locs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlocs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 743\u001b[0;31m         \u001b[0mlocs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0masarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_locs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moffset\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;36m10.\u001b[0m \u001b[0;34m**\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0morderOfMagnitude\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    744\u001b[0m         \u001b[0mloc_range\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mptp\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlocs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    745\u001b[0m         \u001b[0;31m# Curvilinear coordinates can yield two identical points.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
       "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
-     ]
+     ],
+     "output_type": "error"
     }
    ],
    "source": [
diff --git a/run/config.py b/run/config.py
index dd2c4c85..17075c0b 100644
--- a/run/config.py
+++ b/run/config.py
@@ -2,12 +2,11 @@
 EPISODES = 30
 MCTS_SIMS = 50
 MEMORY_SIZE = 30000
-TURNS_UNTIL_TAU0 = 10 # turn on which it starts playing deterministically
+TURNS_UNTIL_TAU0 = 10  # turn on which it starts playing deterministically
 CPUCT = 1
 EPSILON = 0.2
 ALPHA = 0.8
 
-
 #### RETRAINING
 BATCH_SIZE = 256
 EPOCHS = 1
@@ -17,14 +16,14 @@
 TRAINING_LOOPS = 10
 
 HIDDEN_CNN_LAYERS = [
-	{'filters':75, 'kernel_size': (4,4)}
-	 , {'filters':75, 'kernel_size': (4,4)}
-	 , {'filters':75, 'kernel_size': (4,4)}
-	 , {'filters':75, 'kernel_size': (4,4)}
-	 , {'filters':75, 'kernel_size': (4,4)}
-	 , {'filters':75, 'kernel_size': (4,4)}
-	]
+    {'filters': 75, 'kernel_size': (4, 4)}
+    , {'filters': 75, 'kernel_size': (4, 4)}
+    , {'filters': 75, 'kernel_size': (4, 4)}
+    , {'filters': 75, 'kernel_size': (4, 4)}
+    , {'filters': 75, 'kernel_size': (4, 4)}
+    , {'filters': 75, 'kernel_size': (4, 4)}
+]
 
 #### EVALUATION
 EVAL_EPISODES = 20
-SCORING_THRESHOLD = 1.3
\ No newline at end of file
+SCORING_THRESHOLD = 1.3
diff --git a/settings.py b/settings.py
index 73c70aa3..1a94a43d 100644
--- a/settings.py
+++ b/settings.py
@@ -1,2 +1,2 @@
 run_folder = './run/'
-run_archive_folder = './run_archive/'
\ No newline at end of file
+run_archive_folder = './run_archive/'
diff --git a/utils.py b/utils.py
index 67c54919..737b040e 100644
--- a/utils.py
+++ b/utils.py
@@ -1,11 +1,10 @@
-
 import logging
 
-def setup_logger(name, log_file, level=logging.INFO):
 
+def setup_logger(name, log_file, level=logging.INFO):
     formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
 
-    handler = logging.FileHandler(log_file)        
+    handler = logging.FileHandler(log_file)
     handler.setFormatter(formatter)
 
     logger = logging.getLogger(name)