selfplay_mcts.py

# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Play a self-play match with a given DualNet model."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import random
import sys
import time

import coords
from gtp_wrapper import MCTSPlayer


def play(board_size, network, readouts, resign_threshold, simultaneous_leaves,
         verbosity=0):
  """Plays out a self-play match.

  Args:
    board_size: the go board size
    network: the DualNet model
    readouts: the number of readouts in MCTS
    resign_threshold: the threshold to resign at in the match
    simultaneous_leaves: the number of simultaneous leaves in MCTS
    verbosity: the verbosity of the self-play match

  Returns:
    the final position
    the n x 362 tensor of floats representing the mcts search probabilities
    the n-ary tensor of floats representing the original value-net estimate
      where n is the number of moves in the game.
  """
  player = MCTSPlayer(board_size, network, resign_threshold=resign_threshold,
                      verbosity=verbosity, num_parallel=simultaneous_leaves)
  # Disable resign in 5% of games
  if random.random() < 0.05:
    player.resign_threshold = -1.0

  player.initialize_game()

  # Must run this once at the start, so that noise injection actually
  # affects the first move of the game.
  first_node = player.root.select_leaf()
  prob, val = network.run(first_node.position)
  first_node.incorporate_results(prob, val, first_node)

  while True:
    start = time.time()
    player.root.inject_noise()
    current_readouts = player.root.N
    # we want to do "X additional readouts", rather than "up to X readouts".
    while player.root.N < current_readouts + readouts:
      player.tree_search()

    if verbosity >= 3:
      print(player.root.position)
      print(player.root.describe())

    if player.should_resign():
      player.set_result(-1 * player.root.position.to_play, was_resign=True)
      break
    move = player.pick_move()
    player.play_move(move)
    if player.root.is_done():
      player.set_result(player.root.position.result(), was_resign=False)
      break

    if (verbosity >= 2) or (
        verbosity >= 1 and player.root.position.n % 10 == 9):
      print("Q: {:.5f}".format(player.root.Q))
      dur = time.time() - start
      print("%d: %d readouts, %.3f s/100. (%.2f sec)" % (
          player.root.position.n, readouts, dur / readouts * 100.0, dur))
    if verbosity >= 3:
      print("Played >>",
            coords.to_kgs(coords.from_flat(player.root.fmove)))

  if verbosity >= 2:
    print("%s: %.3f" % (player.result_string, player.root.Q), file=sys.stderr)
    print(player.root.position,
          player.root.position.score(), file=sys.stderr)

  return player