diff --git a/gothok/.gitignore b/gothok/.gitignore index 4da9718..234df13 100644 --- a/gothok/.gitignore +++ b/gothok/.gitignore @@ -6,3 +6,4 @@ python/ *.pyx *.so *.bin +*.pyc diff --git a/gothok/game.py b/gothok/game.py index e6d7fbc..4e8fe63 100644 --- a/gothok/game.py +++ b/gothok/game.py @@ -2,6 +2,7 @@ import capnp import state_capnp as game +from mcts import GameNode # https://blog.theofekfoundation.org/artificial-intelligence/2016/06/27/what-is-the-monte-carlo-tree-search/ @@ -24,8 +25,9 @@ class State(): def playMove(self, move): (xx, yy) = move + picked_card = self.board[xx][yy] - varys = self.findVarys + varys = self.findVarys() vx = varys.board.x vy = varys.board.y @@ -42,13 +44,20 @@ class State(): else: raise Exception("Invalid move") + picked_cards = [] + + # TODO: switch this to a filter on cards_attempted + for (row, col) in cards_attempted: card = self.board[row][col] + print(card.house, picked_card.house) # If it is of the same house as declared - if (card == picked_card): + if (card.house == picked_card.house): # Pick it up - state['cards'][current].append(picked_card) - state['board'][row][col] = self.EMPTY + picked_cards.append(card) + self.board[row][col] = game.House.empty + + print(picked_cards) def findVarys(self): for card in self.state.cardlist: @@ -106,102 +115,8 @@ class State(): seen[str(house)] = True -class GameNode(object): - """docstring for GameNode""" - - def __init__(self, state, parent): - super(GameNode, self).__init__() - self.state = state - self.parent = parent - - self.hits = 0 - self.misses = 0 - self.totalTrials = 0 - - def backPropagate(self, simulation): - if (simulation > 0): - self.hits += 1 - elif (simulation < 0): - self.misses += 1 - self.totalTrials += 1 - - if self.parent: - self.parent.backPropagate(-simulation) - - def childPotential(self, child): - w = child.misses - n = child.totalTrials - - # Chosen empirically - c = math.sqrt(2) - t = self.totalTrials - - return (w / n) + (c * math.sqrt(log(t) / n)) - - def runSimulation(self): - self.backPropagate(self.simulate()) - - def simulate(self): - state = self.state - - while not state.gameOver: - moves = state.getPossibleMoves() - randomMove = random.choice(possibleMoves) - state = state.playMove(randomMove) - - return self.state.result(state) - - def getChildren(self): - possibleMoves = self.state.getPossibleMoves() - children = [] - - for move in possibleMoves: - newState = self.state.playMove(move) - childNode = GameNode(newState, self.state) - children.append(childNode) - - return children - - def chooseChild(self): - # Define children nodes - if(not self.children): - self.children = self.getChildren() - - # Run simulation on leaf nodes - if(len(self.children) == 0): - self.runSimulation() - else: - unexplored = [] - - # Get all unexplored nodes - for child in self.children: - if (child.totalTrials == 0): - unexplored.append(child) - - # Pick a random unexplored node - # and run the simulation on it - if (len(unexplored) > 0): - random.choice(unexplored).runSimulation() - else: - # Find the best child - bestChild = self.children[0] - bestPotential = self.childPotential(bestChild) - - for child in self.children: - potential = self.childPotential(child) - - if (potential > bestPotential): - bestPotential = potential - bestChild = child - - bestChild.chooseChild() - - f = open('state.bin', 'rb') initial_state = game.State.read_packed(f) - -# print(initial_state) s = State(initial_state) -print((s.getPossibleMoves())) - -# 3,2 +root_node = GameNode(s, None) +print(root_node.chooseChild()) diff --git a/gothok/mcts.py b/gothok/mcts.py new file mode 100644 index 0000000..edde17b --- /dev/null +++ b/gothok/mcts.py @@ -0,0 +1,93 @@ +import random + + +class GameNode(object): + """docstring for GameNode""" + + def __init__(self, state, parent): + super(GameNode, self).__init__() + self.state = state + self.parent = parent + + self.hits = 0 + self.misses = 0 + self.totalTrials = 0 + + def backPropagate(self, simulation): + if (simulation > 0): + self.hits += 1 + elif (simulation < 0): + self.misses += 1 + self.totalTrials += 1 + + if self.parent: + self.parent.backPropagate(-simulation) + + def childPotential(self, child): + w = child.misses + n = child.totalTrials + + # Chosen empirically + c = math.sqrt(2) + t = self.totalTrials + + return (w / n) + (c * math.sqrt(log(t) / n)) + + def runSimulation(self): + self.backPropagate(self.simulate()) + + def simulate(self): + state = self.state + + while not state.gameOver: + moves = state.getPossibleMoves() + randomMove = random.choice(possibleMoves) + state = state.playMove(randomMove) + + return self.state.result(state) + + def getChildren(self): + possibleMoves = self.state.getPossibleMoves() + children = [] + + for move in possibleMoves: + newState = self.state.playMove(move) + childNode = GameNode(newState, self.state) + children.append(childNode) + + return children + + def chooseChild(self): + # Define children nodes + try: + self.children + except Exception as e: + self.children = self.getChildren() + + if(len(self.children) == 0): + self.runSimulation() + else: + unexplored = [] + + # Get all unexplored nodes + for child in self.children: + if (child.totalTrials == 0): + unexplored.append(child) + + # Pick a random unexplored node + # and run the simulation on it + if (len(unexplored) > 0): + random.choice(unexplored).runSimulation() + else: + # Find the best child + bestChild = self.children[0] + bestPotential = self.childPotential(bestChild) + + for child in self.children: + potential = self.childPotential(child) + + if (potential > bestPotential): + bestPotential = potential + bestChild = child + + bestChild.chooseChild()