-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfuncs.py
144 lines (107 loc) · 5.74 KB
/
funcs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import numpy as np
import random
import loggers as lg
from game import Game, GameState
from model import Residual_CNN
from agent import Agent, User
import config
def playMatchesBetweenVersions(env, run_version, player1version, player2version, EPISODES, logger, turns_until_tau0, goes_first = 0):
if player1version == -1:
player1 = User('player1', env.state_size, env.action_size)
else:
player1_NN = Residual_CNN(config.REG_CONST, env.input_shape, env.action_size, config.HIDDEN_CNN_LAYERS)
if player1version > 0:
player1_network = player1_NN.read(env.name, run_version, player1version)
player1_NN.load_state_dict(player1_network)
player1 = Agent('player1', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player1_NN)
if player2version == -1:
player2 = User('player2', env.state_size, env.action_size)
else:
player2_NN = Residual_CNN(config.REG_CONST, env.input_shape, env.action_size, config.HIDDEN_CNN_LAYERS)
if player2version > 0:
player2_network = player2_NN.read(env.name, run_version, player2version)
player2_NN.load_state_dict(player2_network)
player2 = Agent('player2', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player2_NN)
scores, memory, points, sp_scores = playMatches(player1, player2, EPISODES, logger, turns_until_tau0, None, goes_first)
return (scores, memory, points, sp_scores)
def playMatches(player1, player2, EPISODES, logger, turns_until_tau0, memory = None, goes_first = 0):
env = Game()
scores = {player1.name:0, "drawn": 0, player2.name:0}
sp_scores = {'sp':0, "drawn": 0, 'nsp':0}
points = {player1.name:[], player2.name:[]}
for e in range(EPISODES):
logger.info('====================')
logger.info('EPISODE %d OF %d', e+1, EPISODES)
logger.info('====================')
print (str(e+1) + ' ', end='')
state = env.reset()
done = 0
turn = 0
player1.mcts = None
player2.mcts = None
if goes_first == 0:
player1Starts = random.randint(0,1) * 2 - 1
else:
player1Starts = goes_first
if player1Starts == 1:
players = {1:{"agent": player1, "name":player1.name}
, -1: {"agent": player2, "name":player2.name}
}
logger.info(player1.name + ' plays as X')
else:
players = {1:{"agent": player2, "name":player2.name}
, -1: {"agent": player1, "name":player1.name}
}
logger.info(player2.name + ' plays as X')
logger.info('--------------')
env.gameState.render(logger)
while done == 0:
turn = turn + 1
#### Run the MCTS algo and return an action
if turn < turns_until_tau0:
action, pi, MCTS_value, NN_value = players[state.playerTurn]['agent'].act(state, 1)
else:
action, pi, MCTS_value, NN_value = players[state.playerTurn]['agent'].act(state, 0)
if memory != None:
####Commit the move to memory
memory.commit_stmemory(env.identities, state, pi)
logger.info('action: %d', action)
for r in range(env.grid_shape[0]):
logger.info(['----' if x == 0 else '{0:.2f}'.format(np.round(x,2)) for x in pi[env.grid_shape[1]*r : (env.grid_shape[1]*r + env.grid_shape[1])]])
logger.info('MCTS perceived value for %s: %f', state.pieces[str(state.playerTurn)] ,np.round(MCTS_value,2))
logger.info('NN perceived value for %s: %f', state.pieces[str(state.playerTurn)] ,np.round(NN_value,2))
logger.info('====================')
### Do the action
state, value, done, _ = env.step(action) #the value of the newState from the POV of the new playerTurn i.e. -1 if the previous player played a winning move
env.gameState.render(logger)
if done == 1:
if memory != None:
#### If the game is finished, assign the values correctly to the game moves
for move in memory.stmemory:
if move['playerTurn'] == state.playerTurn:
move['value'] = value
else:
move['value'] = -value
memory.commit_ltmemory()
if value == 1:
logger.info('%s WINS!', players[state.playerTurn]['name'])
scores[players[state.playerTurn]['name']] = scores[players[state.playerTurn]['name']] + 1
if state.playerTurn == 1:
sp_scores['sp'] = sp_scores['sp'] + 1
else:
sp_scores['nsp'] = sp_scores['nsp'] + 1
elif value == -1:
logger.info('%s WINS!', players[-state.playerTurn]['name'])
scores[players[-state.playerTurn]['name']] = scores[players[-state.playerTurn]['name']] + 1
if state.playerTurn == 1:
sp_scores['nsp'] = sp_scores['nsp'] + 1
else:
sp_scores['sp'] = sp_scores['sp'] + 1
else:
logger.info('DRAW...')
scores['drawn'] = scores['drawn'] + 1
sp_scores['drawn'] = sp_scores['drawn'] + 1
pts = state.score
points[players[state.playerTurn]['name']].append(pts[0])
points[players[-state.playerTurn]['name']].append(pts[1])
return (scores, memory, points, sp_scores)