-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmetrics.py
70 lines (53 loc) · 2.35 KB
/
metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from abc import ABCMeta, abstractmethod
from humblerl import Callback
from algos.board_games import ELOScoreboard
class Tournament(Callback):
"""Calculates winning rates of player one (wannabe) and player two (best) and draws.
Args:
update_threshold (float): If current player win count divided by number of games (draws
doesn't count) is greater then this threshold, then current player is better then
opponent. (Default: 0.5)
Note:
This is supposed to be used with board games.
"""
def __init__(self, update_threshold=.5):
self.threshold = update_threshold
def on_loop_start(self):
self.reset()
def on_step_taken(self, step, transition, info):
if transition.is_terminal:
# NOTE: Because players have fixed player id, and reward is returned from perspective
# of current player, we transform it into perspective of player one and check
# who wins.
player = transition.state[1]
reward = player * transition.reward
if reward == 0:
self.draws += 1
elif reward > 0:
self.wannabe += 1
else:
self.best += 1
def reset(self):
self.wannabe, self.best, self.draws = 0, 0, 0
@property
def metrics(self):
return {"wannabe": self.wannabe, "best": self.best, "draws": self.draws}
@property
def results(self):
return self.wannabe, self.best, self.draws
def compare(self, other_score):
"""Compare two agents, one that you are and the other one.
Args:
other_score (float): Other agent score.
Return:
float: Current agent score.
bool: If current agent is better then other agent.
"""
wins, losses, draws = self.results
# Update ELO rating, use best player ELO as current player ELO
# NOTE: We update it this way as we don't need exact ELO values, we just need to see
# how much if at all has current player improved.
# Decision based on: https://github.com/gcp/leela-zero/issues/354
current_score, _ = ELOScoreboard.calculate_update(
other_score, other_score, wins, losses, draws)
return current_score, wins > 0 and float(wins) / (wins + losses) > self.threshold