Skip to content

Remaining MTD algorithms & Introducing Monte Carlo Tree Search #13

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions easyAI/AI/DUAL.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ def __call__(self,game):
scoring = self.scoring if self.scoring else (
lambda g: g.scoring() ) # horrible hack

first = -self.win_score #essence of DUAL algorithm
next = (lambda lowerbound, upperbound, bestValue: bestValue + 1)
first = (lambda game, tt: -self.win_score) #essence of DUAL algorithm
next = (lambda lowerbound, upperbound, bestValue, bound: bestValue + 1)

self.alpha = mtd(game,
first, next,
Expand Down
24 changes: 16 additions & 8 deletions easyAI/AI/DictTT.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,22 @@ def __init__(self, num_buckets=1024, own_hash = None):
self.dict = []
for i in range(num_buckets):
self.dict.append((None, None))
self.keys = dict()
#self.keys = dict()
self.hash = hash
if own_hash != None:
own_hash.modulo = len(self.dict)
self.hash = own_hash.get_hash
self.num_collisions = 0
self.num_calls = 0
self.num_lookups = 0

def hash_key(self, key):
"""
Given a key this will create a number and then convert it to
an index for the dict.
"""
self.num_calls += 1
return self.hash(key) % len(self.dict)
return self.hash(key) & len(self.dict)-1

def get_slot(self, key, default=None):
"""
Expand All @@ -44,7 +45,8 @@ def get_slot(self, key, default=None):
def get(self, key, default=None):
"""
Gets the value for the given key, or the default.
"""
"""
self.num_lookups += 1
i, k, v = self.get_slot(key, default=default)
return v

Expand All @@ -59,10 +61,10 @@ def set(self, key, value):

self.dict[slot] = (key, value)

if self.keys.__contains__(key):
self.keys[key] = self.keys[key] + 1
else:
self.keys[key] = 1
#if self.keys.__contains__(key):
# self.keys[key] = self.keys[key] + 1
#else:
# self.keys[key] = 1

def delete(self, key):
"""
Expand Down Expand Up @@ -97,4 +99,10 @@ def __iter__(self):

def __contains__(self, key):
return self.keys.__contains__(key)


def print_stats(self):
print ('-'*10)
print ('Statistics of custom dictionary:')
print ('Calls of hash: ', self.num_calls)
print ('Collisions: ', self.num_collisions)
print ('Num lookups: ', self.num_lookups)
5 changes: 3 additions & 2 deletions easyAI/AI/HashTT.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def before(self, key):
Returns initial value of hash.
It's also the place where you can initialize some auxiliary variables
"""
return 0
return 1

def after(self, key, hash):
"""
Expand All @@ -25,7 +25,8 @@ def get_hash(self, key, depth = 0):
"""
Recursively computes a hash
"""
ret_hash = self.before(key)
if depth == 0:
ret_hash = self.before(key)
if type(key) is int:
return self.hash_int(key)
if type(key) is str and len(key) <= 1:
Expand Down
2 changes: 1 addition & 1 deletion easyAI/AI/Hashes.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def before(self, key):
return 0
def join(self, one, two):
one = (one << 4) + two;
self.g = one & 0xf0000000L;
self.g = one & 0xf0000000;

if self.g != 0:
one ^= self.g >> 24
Expand Down
130 changes: 130 additions & 0 deletions easyAI/AI/MCTS.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
#contributed by mrfesol (Tomasz Wesolowski)

import random
from math import sqrt, log

class MCTS:
"""
This implements Monte Carlo Tree Search algorithm.
More information at: http://mcts.ai/index.html
The following example shows
how to setup the AI and play a Connect Four game:

>>> from easyAI import Human_Player, AI_Player, MTDf
>>> AI = MonteCarloTreeSearch()
>>> game = ConnectFour([AI_Player(AI),Human_Player()])
>>> game.play()

Parameters
-----------

iterations:
Indicates how many iteration algorithm should perform.
Larger value = More accurate result

max_depth:
How many moves in advance should the AI think ?
(2 moves = 1 complete turn)

expand_factor:
Defines how much is algorithm willing to expand unvisited nodes.
Usually between 0.3 and 1.0

scoring:
A function f(game)-> score. If no scoring is provided
and the game object has a ``scoring`` method it ill be used.
Scoring function MUST return values from interval [0, win_score]

win_score:
The largest score of game.
It's required to run algorithm.

"""

def __init__(self, iterations = 5000, winscore=100, depth = 20, expand_factor=0.3, scoring=None):
self.scoring = scoring
self.iterations = iterations
self.winscore = winscore
self.max_depth = depth
self.expand_factor = expand_factor

def __call__(self,game):
"""
Returns the AI's best move given the current state of the game.
"""
rootnode = MCTSNode(state = game)

scoring = self.scoring if self.scoring else (
lambda g: g.scoring() ) # horrible hack

for i in range(self.iterations):
node = rootnode
state = game.copy()
depth = 0

# Select
while node.untried == [] and node.children != []:
node = node.select_child(self.expand_factor)
state.make_move(node.move)
state.switch_player()
depth += 1

# Expand
if node.untried != []:
m = random.choice(node.untried)
state.make_move(m)
state.switch_player()
node = node.add_child(m,state)

# Rollout
while state.possible_moves() != [] and depth < self.max_depth:
state.make_move(random.choice(state.possible_moves()))
state.switch_player()
depth += 1

# Backpropagate
score = 1 - max(0, (scoring(state)/self.winscore))
while node != None:
node.update(score)
node = node.parent
score = 1-score

rootnode.children.sort(key = lambda c: c.visits)
return rootnode.children[-1].move

class MCTSNode:
def __init__(self, move = None, parent = None, state = None):
self.move = move
self.parent = parent
self.children = []
self.wins = 0.0
self.visits = 0.0
self.untried = state.possible_moves()
self.last_player = state.nopponent

def formula(self):
return self.wins/self.visits

def formula_exp(self):
return 0.3*sqrt(2*log(self.parent.visits)/self.visits)

def select_child(self, expand_factor):
""" Using the UCB1 formula to select_child a child node.
"""
return sorted(self.children, key = lambda c: c.wins/c.visits + \
expand_factor*sqrt(2*log(self.visits)/c.visits))[-1]

def add_child(self, m, s):
n = MCTSNode(move = m, parent = self, state = s)
self.untried.remove(m)
self.children.append(n)
return n

def update(self, result):
self.visits += 1
self.wins += result

def __repr__(self):
return "[P: " + str(self.last_player) + " M:" + str(self.move) + \
" W/V:" + str(self.wins) + "/" + str(self.visits) + " F: " + \
str(self.formula()) + " F_exp: " + str(self.formula_exp()) + "]"
73 changes: 73 additions & 0 deletions easyAI/AI/MTDbi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#contributed by mrfesol (Tomasz Wesolowski)

from easyAI.AI.MTdriver import mtd

class MTDbi:
"""
This implements MTD-bi algorithm. The following example shows
how to setup the AI and play a Connect Four game:

>>> from easyAI import Human_Player, AI_Player, MTDbi
>>> AI = MTDbi(7)
>>> game = ConnectFour([AI_Player(AI),Human_Player()])
>>> game.play()

Parameters
-----------

depth:
How many moves in advance should the AI think ?
(2 moves = 1 complete turn)

scoring:
A function f(game)-> score. If no scoring is provided
and the game object has a ``scoring`` method it ill be used.

win_score:
Score LARGER than the largest score of game, but smaller than inf.
It's required to run algorithm.

tt:
A transposition table (a table storing game states and moves)
scoring: can be none if the game that the AI will be given has a
``scoring`` method.

Notes
-----

The score of a given game is given by

>>> scoring(current_game) - 0.01*sign*current_depth

for instance if a lose is -100 points, then losing after 4 moves
will score -99.96 points but losing after 8 moves will be -99.92
points. Thus, the AI will chose the move that leads to defeat in
8 turns, which makes it more difficult for the (human) opponent.
This will not always work if a ``win_score`` argument is provided.

"""

def __init__(self, depth, scoring=None, win_score=100000, tt=None):
self.scoring = scoring
self.depth = depth
self.tt = tt
self.win_score= win_score

def __call__(self,game):
"""
Returns the AI's best move given the current state of the game.
"""

scoring = self.scoring if self.scoring else (
lambda g: g.scoring() ) # horrible hack

first = (lambda game, tt: 0) #essence of MTDbi algorithm
next = (lambda lowerbound, upperbound, bestValue, bound: (lowerbound + upperbound)/2)

self.alpha = mtd(game,
first, next,
self.depth,
scoring,
self.tt)

return game.ai_move
83 changes: 83 additions & 0 deletions easyAI/AI/MTDf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#contributed by mrfesol (Tomasz Wesolowski)

from easyAI.AI.MTdriver import mtd

class MTDf:
"""
This implements MTD-f algorithm. The following example shows
how to setup the AI and play a Connect Four game:

>>> from easyAI import Human_Player, AI_Player, MTDf
>>> AI = MTDf(7)
>>> game = ConnectFour([AI_Player(AI),Human_Player()])
>>> game.play()

Parameters
-----------

depth:
How many moves in advance should the AI think ?
(2 moves = 1 complete turn)

scoring:
A function f(game)-> score. If no scoring is provided
and the game object has a ``scoring`` method it ill be used.

win_score:
Score LARGER than the largest score of game, but smaller than inf.
It's required to run algorithm.

tt:
A transposition table (a table storing game states and moves)
scoring: can be none if the game that the AI will be given has a
``scoring`` method.

Notes
-----

The score of a given game is given by

>>> scoring(current_game) - 0.01*sign*current_depth

for instance if a lose is -100 points, then losing after 4 moves
will score -99.96 points but losing after 8 moves will be -99.92
points. Thus, the AI will chose the move that leads to defeat in
8 turns, which makes it more difficult for the (human) opponent.
This will not always work if a ``win_score`` argument is provided.

"""

def __init__(self, depth, scoring=None, win_score=100000, tt=None):
self.scoring = scoring
self.depth = depth
self.tt = tt
self.win_score= win_score

@staticmethod
def first(game, tt):
lookup = None if (tt is None) else tt.lookup(game)
if lookup == None:
return 0
lowerbound, upperbound = lookup['lowerbound'], lookup['upperbound']
return (lowerbound+upperbound)/2

def __call__(self,game):
"""
Returns the AI's best move given the current state of the game.
"""

scoring = self.scoring if self.scoring else (
lambda g: g.scoring() ) # horrible hack


first = MTDf.first #essence of MTDf algorithm
next = (lambda lowerbound, upperbound, bestValue, bound: bestValue
if bestValue < bound else bestValue + 1)

self.alpha = mtd(game,
first, next,
self.depth,
scoring,
self.tt)

return game.ai_move
Loading