strategies.py 9.75 KB
Newer Older
Yanhui Liang's avatar
Yanhui Liang committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""The strategy to play each move with MCTS."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import random
import sys
import time

import coords
import go
from mcts import MCTSNode
import numpy as np
import sgf_wrapper


def time_recommendation(move_num, seconds_per_move=5, time_limit=15*60,
                        decay_factor=0.98):
34
  """Compute the time can be used."""
Yanhui Liang's avatar
Yanhui Liang committed
35

36
37
38
39
40
41
42
  # Given current move number and "desired" seconds per move,
  # return how much time should actually be used. To be used specifically
  # for CGOS time controls, which are absolute 15 minute time.

  # The strategy is to spend the maximum time possible using seconds_per_move,
  # and then switch to an exponentially decaying time usage, calibrated so that
  # we have enough time for an infinite number of moves.
Yanhui Liang's avatar
Yanhui Liang committed
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71

  # divide by two since you only play half the moves in a game.
  player_move_num = move_num / 2

  # sum of geometric series maxes out at endgame_time seconds.
  endgame_time = seconds_per_move / (1 - decay_factor)

  if endgame_time > time_limit:
    # there is so little main time that we're already in "endgame" mode.
    base_time = time_limit * (1 - decay_factor)
    return base_time * decay_factor ** player_move_num

  # leave over endgame_time seconds for the end, and play at seconds_per_move
  # for as long as possible
  core_time = time_limit - endgame_time
  core_moves = core_time / seconds_per_move

  if player_move_num < core_moves:
    return seconds_per_move
  else:
    return seconds_per_move * decay_factor ** (player_move_num - core_moves)


def _get_temperature_cutoff(board_size):
  # When to do deterministic move selection.  ~30 moves on a 19x19, ~8 on 9x9
  return int((board_size * board_size) / 12)


class MCTSPlayerMixin(object):
72

Yanhui Liang's avatar
Yanhui Liang committed
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
  # If 'simulations_per_move' is nonzero, it will perform that many reads
  # before playing. Otherwise, it uses 'seconds_per_move' of wall time'
  def __init__(self, board_size, network, seconds_per_move=5,
               simulations_per_move=0, resign_threshold=-0.90,
               verbosity=0, two_player_mode=False, num_parallel=8):
    self.board_size = board_size
    self.network = network
    self.seconds_per_move = seconds_per_move
    self.simulations_per_move = simulations_per_move
    self.verbosity = verbosity
    self.two_player_mode = two_player_mode
    if two_player_mode:
      self.temp_threshold = -1
    else:
      self.temp_threshold = _get_temperature_cutoff(board_size)
    self.num_parallel = num_parallel
    self.qs = []
    self.comments = []
    self.searches_pi = []
    self.root = None
    self.result = 0
    self.result_string = None
    self.resign_threshold = -abs(resign_threshold)

  def initialize_game(self, position=None):
    if position is None:
      position = go.Position(self.board_size)
    self.root = MCTSNode(self.board_size, position)
    self.result = 0
    self.result_string = None
    self.comments = []
    self.searches_pi = []
    self.qs = []

  def suggest_move(self, position):
108
109
110
    """ Used for playing a single game."""
    # For parallel play, use initialize_move, select_leaf,
    # incorporate_results, and pick_move
Yanhui Liang's avatar
Yanhui Liang committed
111
112
113
114
115
116
117
118
119
120
121

    start = time.time()

    if self.simulations_per_move == 0:
      while time.time() - start < self.seconds_per_move:
        self.tree_search()
    else:
      current_readouts = self.root.N
      while self.root.N < current_readouts + self.simulations_per_move:
        self.tree_search()
      if self.verbosity > 0:
122
        print('%d: Searched %d times in %s seconds\n\n' % (
Yanhui Liang's avatar
Yanhui Liang committed
123
124
125
126
127
128
129
130
131
132
133
134
135
            position.n, self.simulations_per_move, time.time() - start),
              file=sys.stderr)

    # print some stats on anything with probability > 1%
    if self.verbosity > 2:
      print(self.root.describe(), file=sys.stderr)
      print('\n\n', file=sys.stderr)
    if self.verbosity > 3:
      print(self.root.position, file=sys.stderr)

    return self.pick_move()

  def play_move(self, c):
136
137
138
139
140
141
142
    """Play a move."""

    # Notable side effects:
    #   - finalizes the probability distribution according to
    #   this roots visit counts into the class' running tally, `searches_pi`
    #   - Makes the node associated with this move the root, for future
    #   `inject_noise` calls.
Yanhui Liang's avatar
Yanhui Liang committed
143
144
145
146
147
148
149
150
151
152
153
154
155
156
    if not self.two_player_mode:
      self.searches_pi.append(
          self.root.children_as_pi(self.root.position.n < self.temp_threshold))
    self.qs.append(self.root.Q)  # Save our resulting Q.
    self.comments.append(self.root.describe())
    self.root = self.root.maybe_add_child(coords.to_flat(self.board_size, c))
    self.position = self.root.position  # for showboard
    del self.root.parent.children
    return True  # GTP requires positive result.

  def pick_move(self):
    """Picks a move to play, based on MCTS readout statistics.

    Highest N is most robust indicator. In the early stage of the game, pick
157
158
    a move weighted by visit count; later on, pick the absolute max.
    """
Yanhui Liang's avatar
Yanhui Liang committed
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
    if self.root.position.n > self.temp_threshold:
      fcoord = np.argmax(self.root.child_N)
    else:
      cdf = self.root.child_N.cumsum()
      cdf /= cdf[-1]
      selection = random.random()
      fcoord = cdf.searchsorted(selection)
      assert self.root.child_N[fcoord] != 0
    return coords.from_flat(self.board_size, fcoord)

  def tree_search(self, num_parallel=None):
    if num_parallel is None:
      num_parallel = self.num_parallel
    leaves = []
    failsafe = 0
    while len(leaves) < num_parallel and failsafe < num_parallel * 2:
      failsafe += 1
      leaf = self.root.select_leaf()
      if self.verbosity >= 4:
        print(self.show_path_to_root(leaf))
      # if game is over, override the value estimate with the true score
      if leaf.is_done():
        value = 1 if leaf.position.score() > 0 else -1
        leaf.backup_value(value, up_to=self.root)
        continue
      leaf.add_virtual_loss(up_to=self.root)
      leaves.append(leaf)
    if leaves:
      move_probs, values = self.network.run_many(
          [leaf.position for leaf in leaves])
      for leaf, move_prob, value in zip(leaves, move_probs, values):
        leaf.revert_virtual_loss(up_to=self.root)
        leaf.incorporate_results(move_prob, value, up_to=self.root)

  def show_path_to_root(self, node):
194
    max_depth = (self.board_size ** 2) * 1.4  # 505 moves for 19x19, 113 for 9x9
Yanhui Liang's avatar
Yanhui Liang committed
195
196
    pos = node.position
    diff = node.position.n - self.root.position.n
197
    if pos.recent is None:
Yanhui Liang's avatar
Yanhui Liang committed
198
199
200
      return

    def fmt(move):
201
      return '{}-{}'.format('b' if move.color == 1 else 'w',
Yanhui Liang's avatar
Yanhui Liang committed
202
                            coords.to_kgs(self.board_size, move.move))
203
204
205
    path = ' '.join(fmt(move) for move in pos.recent[-diff:])
    if node.position.n >= max_depth:
      path += ' (depth cutoff reached) %0.1f' % node.position.score()
Yanhui Liang's avatar
Yanhui Liang committed
206
    elif node.position.is_game_over():
207
      path += ' (game over) %0.1f' % node.position.score()
Yanhui Liang's avatar
Yanhui Liang committed
208
209
210
211
212
213
214
215
216
217
218
219
    return path

  def should_resign(self):
    """Returns true if the player resigned.

    No further moves should be played.
    """
    return self.root.Q_perspective < self.resign_threshold

  def set_result(self, winner, was_resign):
    self.result = winner
    if was_resign:
220
      string = 'B+R' if winner == go.BLACK else 'W+R'
Yanhui Liang's avatar
Yanhui Liang committed
221
222
223
224
225
226
227
228
229
    else:
      string = self.root.position.result_string()
    self.result_string = string

  def to_sgf(self, use_comments=True):
    assert self.result_string is not None
    pos = self.root.position
    if use_comments:
      comments = self.comments or ['No comments.']
230
      comments[0] = ('Resign Threshold: %0.3f\n' %
Yanhui Liang's avatar
Yanhui Liang committed
231
232
233
234
235
                     self.resign_threshold) + comments[0]
    else:
      comments = []
    return sgf_wrapper.make_sgf(
        self.board_size, pos.recent, self.result_string,
236
237
        white_name=os.path.basename(self.network.save_file) or 'Unknown',
        black_name=os.path.basename(self.network.save_file) or 'Unknown',
Yanhui Liang's avatar
Yanhui Liang committed
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
        comments=comments)

  def is_done(self):
    return self.result != 0 or self.root.is_done()

  def extract_data(self):
    assert len(self.searches_pi) == self.root.position.n
    assert self.result != 0
    for pwc, pi in zip(go.replay_position(
        self.board_size, self.root.position, self.result), self.searches_pi):
      yield pwc.position, pi, pwc.result

  def chat(self, msg_type, sender, text):
    default_response = (
        "Supported commands are 'winrate', 'nextplay', 'fortune', and 'help'.")
    if self.root is None or self.root.position.n == 0:
      return "I'm not playing right now.  " + default_response

    if 'winrate' in text.lower():
      wr = (abs(self.root.Q) + 1.0) / 2.0
258
259
      color = 'Black' if self.root.Q > 0 else 'White'
      return '{:s} {:.2f}%'.format(color, wr * 100.0)
Yanhui Liang's avatar
Yanhui Liang committed
260
261
262
263
264
265
266
267
268
269
270
271
    elif 'nextplay' in text.lower():
      return "I'm thinking... " + self.root.most_visited_path()
    elif 'fortune' in text.lower():
      return "You're feeling lucky!"
    elif 'help' in text.lower():
      return "I can't help much with go -- try ladders!  Otherwise: {}".format(
          default_response)
    else:
      return default_response


class CGOSPlayerMixin(MCTSPlayerMixin):
272

Yanhui Liang's avatar
Yanhui Liang committed
273
274
275
  def suggest_move(self, position):
    self.seconds_per_move = time_recommendation(position.n)
    return super().suggest_move(position)