# Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for mcts.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import copy import tensorflow as tf # pylint: disable=g-bad-import-order import coords import go from mcts import MCTSNode import numpy as np import utils_test tf.logging.set_verbosity(tf.logging.ERROR) ALMOST_DONE_BOARD = utils_test.load_board(''' .XO.XO.OO X.XXOOOO. XXXXXOOOO XXXXXOOOO .XXXXOOO. XXXXXOOOO .XXXXOOO. XXXXXOOOO XXXXOOOOO ''') TEST_POSITION = go.Position( utils_test.BOARD_SIZE, board=ALMOST_DONE_BOARD, n=105, komi=2.5, caps=(1, 4), ko=None, recent=(go.PlayerMove(go.BLACK, (0, 1)), go.PlayerMove(go.WHITE, (0, 8))), to_play=go.BLACK ) SEND_TWO_RETURN_ONE = go.Position( utils_test.BOARD_SIZE, board=ALMOST_DONE_BOARD, n=75, komi=0.5, caps=(0, 0), ko=None, recent=( go.PlayerMove(go.BLACK, (0, 1)), go.PlayerMove(go.WHITE, (0, 8)), go.PlayerMove(go.BLACK, (1, 0))), to_play=go.WHITE ) MAX_DEPTH = (utils_test.BOARD_SIZE ** 2) * 1.4 class TestMctsNodes(utils_test.MiniGoUnitTest): def test_action_flipping(self): np.random.seed(1) probs = np.array([.02] * ( utils_test.BOARD_SIZE * utils_test.BOARD_SIZE + 1)) probs += np.random.random( [utils_test.BOARD_SIZE * utils_test.BOARD_SIZE + 1]) * 0.001 black_root = MCTSNode( utils_test.BOARD_SIZE, go.Position(utils_test.BOARD_SIZE)) white_root = MCTSNode(utils_test.BOARD_SIZE, go.Position( utils_test.BOARD_SIZE, to_play=go.WHITE)) black_root.select_leaf().incorporate_results(probs, 0, black_root) white_root.select_leaf().incorporate_results(probs, 0, white_root) # No matter who is to play, when we know nothing else, the priors # should be respected, and the same move should be picked black_leaf = black_root.select_leaf() white_leaf = white_root.select_leaf() self.assertEqual(black_leaf.fmove, white_leaf.fmove) self.assertEqualNPArray( black_root.child_action_score, white_root.child_action_score) def test_select_leaf(self): flattened = coords.to_flat(utils_test.BOARD_SIZE, coords.from_kgs( utils_test.BOARD_SIZE, 'D9')) probs = np.array([.02] * ( utils_test.BOARD_SIZE * utils_test.BOARD_SIZE + 1)) probs[flattened] = 0.4 root = MCTSNode(utils_test.BOARD_SIZE, SEND_TWO_RETURN_ONE) root.select_leaf().incorporate_results(probs, 0, root) self.assertEqual(root.position.to_play, go.WHITE) self.assertEqual(root.select_leaf(), root.children[flattened]) def test_backup_incorporate_results(self): probs = np.array([.02] * ( utils_test.BOARD_SIZE * utils_test.BOARD_SIZE + 1)) root = MCTSNode(utils_test.BOARD_SIZE, SEND_TWO_RETURN_ONE) root.select_leaf().incorporate_results(probs, 0, root) leaf = root.select_leaf() leaf.incorporate_results(probs, -1, root) # white wins! # Root was visited twice: first at the root, then at this child. self.assertEqual(root.N, 2) # Root has 0 as a prior and two visits with value 0, -1 self.assertAlmostEqual(root.Q, -1/3) # average of 0, 0, -1 # Leaf should have one visit self.assertEqual(root.child_N[leaf.fmove], 1) self.assertEqual(leaf.N, 1) # And that leaf's value had its parent's Q (0) as a prior, so the Q # should now be the average of 0, -1 self.assertAlmostEqual(root.child_Q[leaf.fmove], -0.5) self.assertAlmostEqual(leaf.Q, -0.5) # We're assuming that select_leaf() returns a leaf like: # root # \ # leaf # \ # leaf2 # which happens in this test because root is W to play and leaf was a W win. self.assertEqual(root.position.to_play, go.WHITE) leaf2 = root.select_leaf() leaf2.incorporate_results(probs, -0.2, root) # another white semi-win self.assertEqual(root.N, 3) # average of 0, 0, -1, -0.2 self.assertAlmostEqual(root.Q, -0.3) self.assertEqual(leaf.N, 2) self.assertEqual(leaf2.N, 1) # average of 0, -1, -0.2 self.assertAlmostEqual(leaf.Q, root.child_Q[leaf.fmove]) self.assertAlmostEqual(leaf.Q, -0.4) # average of -1, -0.2 self.assertAlmostEqual(leaf.child_Q[leaf2.fmove], -0.6) self.assertAlmostEqual(leaf2.Q, -0.6) def test_do_not_explore_past_finish(self): probs = np.array([0.02] * ( utils_test.BOARD_SIZE * utils_test.BOARD_SIZE + 1), dtype=np.float32) root = MCTSNode(utils_test.BOARD_SIZE, go.Position(utils_test.BOARD_SIZE)) root.select_leaf().incorporate_results(probs, 0, root) first_pass = root.maybe_add_child( coords.to_flat(utils_test.BOARD_SIZE, None)) first_pass.incorporate_results(probs, 0, root) second_pass = first_pass.maybe_add_child( coords.to_flat(utils_test.BOARD_SIZE, None)) with self.assertRaises(AssertionError): second_pass.incorporate_results(probs, 0, root) node_to_explore = second_pass.select_leaf() # should just stop exploring at the end position. self.assertEqual(node_to_explore, second_pass) def test_add_child(self): root = MCTSNode(utils_test.BOARD_SIZE, go.Position(utils_test.BOARD_SIZE)) child = root.maybe_add_child(17) self.assertIn(17, root.children) self.assertEqual(child.parent, root) self.assertEqual(child.fmove, 17) def test_add_child_idempotency(self): root = MCTSNode(utils_test.BOARD_SIZE, go.Position(utils_test.BOARD_SIZE)) child = root.maybe_add_child(17) current_children = copy.copy(root.children) child2 = root.maybe_add_child(17) self.assertEqual(child, child2) self.assertEqual(current_children, root.children) def test_never_select_illegal_moves(self): probs = np.array([0.02] * ( utils_test.BOARD_SIZE * utils_test.BOARD_SIZE + 1)) # let's say the NN were to accidentally put a high weight on an illegal move probs[1] = 0.99 root = MCTSNode(utils_test.BOARD_SIZE, SEND_TWO_RETURN_ONE) root.incorporate_results(probs, 0, root) # and let's say the root were visited a lot of times, which pumps up the # action score for unvisited moves... root.N = 100000 root.child_N[root.position.all_legal_moves()] = 10000 # this should not throw an error... leaf = root.select_leaf() # the returned leaf should not be the illegal move self.assertNotEqual(leaf.fmove, 1) # and even after injecting noise, we should still not select an illegal move for _ in range(10): root.inject_noise() leaf = root.select_leaf() self.assertNotEqual(leaf.fmove, 1) def test_dont_pick_unexpanded_child(self): probs = np.array([0.001] * ( utils_test.BOARD_SIZE * utils_test.BOARD_SIZE + 1)) # make one move really likely so that tree search goes down that path twice # even with a virtual loss probs[17] = 0.999 root = MCTSNode(utils_test.BOARD_SIZE, go.Position(utils_test.BOARD_SIZE)) root.incorporate_results(probs, 0, root) leaf1 = root.select_leaf() self.assertEqual(leaf1.fmove, 17) leaf1.add_virtual_loss(up_to=root) # the second select_leaf pick should return the same thing, since the child # hasn't yet been sent to neural net for eval + result incorporation leaf2 = root.select_leaf() self.assertIs(leaf1, leaf2) if __name__ == '__main__': tf.test.main()