minigo.py 18.2 KB
Newer Older
Yanhui Liang's avatar
Yanhui Liang committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Train MiniGo with several iterations of RL learning.

One iteration of RL learning consists of bootstrap, selfplay, gather and train:
  bootstrap: Initialize a random model
  selfplay: Play games with the latest model to produce data used for training
  gather: Group games played with the same model into larger files of tfexamples
  train: Train a new model with the selfplay results from the most recent
    N generations.
After training, validation can be performed on the holdout data.
Given two models, evaluation can be applied to choose a stronger model.
The training pipeline consists of multiple RL learning iterations to achieve
better models.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import os
import random
import socket
import sys
import time

import tensorflow as tf  # pylint: disable=g-bad-import-order

import dualnet
import evaluation
import go
import model_params
import preprocessing
import selfplay_mcts
import utils

_TF_RECORD_SUFFIX = '.tfrecord.zz'


def _ensure_dir_exists(directory):
  """Check if directory exists. If not, create it.

  Args:
    directory: A given directory
  """
  if os.path.isdir(directory) is False:
    tf.gfile.MakeDirs(directory)


def bootstrap(estimator_model_dir, trained_models_dir, params):
  """Initialize the model with random weights.

  Args:
    estimator_model_dir: tf.estimator model directory.
    trained_models_dir: Dir to save the trained models. Here to export the first
      bootstrapped generation.
69
    params: A MiniGoParams instance of hyperparameters for the model.
Yanhui Liang's avatar
Yanhui Liang committed
70
71
72
73
74
75
76
77
78
79
80
81
  """
  bootstrap_name = utils.generate_model_name(0)
  _ensure_dir_exists(trained_models_dir)
  bootstrap_model_path = os.path.join(trained_models_dir, bootstrap_name)
  _ensure_dir_exists(estimator_model_dir)

  print('Bootstrapping with working dir {}\n Model 0 exported to {}'.format(
      estimator_model_dir, bootstrap_model_path))
  dualnet.bootstrap(estimator_model_dir, params)
  dualnet.export_model(estimator_model_dir, bootstrap_model_path)


82
def selfplay(selfplay_dirs, selfplay_model, params):
Yanhui Liang's avatar
Yanhui Liang committed
83
84
85
  """Perform selfplay with a specific model.

  Args:
86
87
88
89
90
91
92
93
94
    selfplay_dirs: A dict to specify the directories used in selfplay.
      selfplay_dirs = {
          'output_dir': output_dir,
          'holdout_dir': holdout_dir,
          'clean_sgf': clean_sgf,
          'full_sgf': full_sgf
      }
    selfplay_model: The actual Dualnet runner for selfplay.
    params: A MiniGoParams instance of hyperparameters for the model.
Yanhui Liang's avatar
Yanhui Liang committed
95
96
97
  """
  with utils.logged_timer('Playing game'):
    player = selfplay_mcts.play(
98
        params.board_size, selfplay_model, params.selfplay_readouts,
Yanhui Liang's avatar
Yanhui Liang committed
99
100
101
102
103
104
105
106
107
108
        params.selfplay_resign_threshold, params.simultaneous_leaves,
        params.selfplay_verbose)

  output_name = '{}-{}'.format(int(time.time()), socket.gethostname())

  def _write_sgf_data(dir_sgf, use_comments):
    with tf.gfile.GFile(
        os.path.join(dir_sgf, '{}.sgf'.format(output_name)), 'w') as f:
      f.write(player.to_sgf(use_comments=use_comments))

109
110
  _write_sgf_data(selfplay_dirs['clean_sgf'], use_comments=False)
  _write_sgf_data(selfplay_dirs['full_sgf'], use_comments=True)
Yanhui Liang's avatar
Yanhui Liang committed
111
112
113
114
115
116
117

  game_data = player.extract_data()
  tf_examples = preprocessing.make_dataset_from_selfplay(game_data, params)

  # Hold out 5% of games for evaluation.
  if random.random() < params.holdout_pct:
    fname = os.path.join(
118
        selfplay_dirs['holdout_dir'], output_name + _TF_RECORD_SUFFIX)
Yanhui Liang's avatar
Yanhui Liang committed
119
120
  else:
    fname = os.path.join(
121
        selfplay_dirs['output_dir'], output_name + _TF_RECORD_SUFFIX)
Yanhui Liang's avatar
Yanhui Liang committed
122
123
124
125
126
127
128
129
130
131
132

  preprocessing.write_tf_examples(fname, tf_examples)


def gather(selfplay_dir, training_chunk_dir, params):
  """Gather selfplay data into large training chunk.

  Args:
    selfplay_dir: Where to look for games. Set as 'base_dir/data/selfplay/'.
    training_chunk_dir: where to put collected games. Set as
      'base_dir/data/training_chunks/'.
133
    params: A MiniGoParams instance of hyperparameters for the model.
Yanhui Liang's avatar
Yanhui Liang committed
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
  """
  # Check the selfplay data from the most recent 50 models.
  _ensure_dir_exists(training_chunk_dir)
  sorted_model_dirs = sorted(tf.gfile.ListDirectory(selfplay_dir))
  models = [model_dir.strip('/')
            for model_dir in sorted_model_dirs[-params.gather_generation:]]

  with utils.logged_timer('Finding existing tfrecords...'):
    model_gamedata = {
        model: tf.gfile.Glob(
            os.path.join(selfplay_dir, model, '*'+_TF_RECORD_SUFFIX))
        for model in models
    }
  print('Found {} models'.format(len(models)))
  for model_name, record_files in sorted(model_gamedata.items()):
    print('    {}: {} files'.format(model_name, len(record_files)))

  meta_file = os.path.join(training_chunk_dir, 'meta.txt')
  try:
    with tf.gfile.GFile(meta_file, 'r') as f:
      already_processed = set(f.read().split())
  except tf.errors.NotFoundError:
    already_processed = set()

  num_already_processed = len(already_processed)

  for model_name, record_files in sorted(model_gamedata.items()):
    if set(record_files) <= already_processed:
      continue
    print('Gathering files from {}:'.format(model_name))
    tf_examples = preprocessing.shuffle_tf_examples(
        params.shuffle_buffer_size, params.examples_per_chunk, record_files)
    # tqdm to make the loops show a smart progress meter
    for i, example_batch in enumerate(tf_examples):
      output_record = os.path.join(
          training_chunk_dir,
          ('{}-{}'+_TF_RECORD_SUFFIX).format(model_name, str(i)))
      preprocessing.write_tf_examples(
          output_record, example_batch, serialize=False)
    already_processed.update(record_files)

  print('Processed {} new files'.format(
      len(already_processed) - num_already_processed))
  with tf.gfile.GFile(meta_file, 'w') as f:
    f.write('\n'.join(sorted(already_processed)))


181
182
def train(trained_models_dir, estimator_model_dir, training_chunk_dir,
          generation, params):
Yanhui Liang's avatar
Yanhui Liang committed
183
184
185
186
187
188
  """Train the latest model from gathered data.

  Args:
    trained_models_dir: Where to export the completed generation.
    estimator_model_dir: tf.estimator model directory.
    training_chunk_dir: Directory where gathered training chunks are.
189
190
    generation: Which generation you are training.
    params: A MiniGoParams instance of hyperparameters for the model.
Yanhui Liang's avatar
Yanhui Liang committed
191
  """
192
  new_model_name = utils.generate_model_name(generation)
Yanhui Liang's avatar
Yanhui Liang committed
193
  print('New model will be {}'.format(new_model_name))
194
  new_model = os.path.join(trained_models_dir, new_model_name)
Yanhui Liang's avatar
Yanhui Liang committed
195

196
  print('Training on gathered game data...')
Yanhui Liang's avatar
Yanhui Liang committed
197
198
199
200
201
202
203
  tf_records = sorted(
      tf.gfile.Glob(os.path.join(training_chunk_dir, '*'+_TF_RECORD_SUFFIX)))
  tf_records = tf_records[
      -(params.train_window_size // params.examples_per_chunk):]

  print('Training from: {} to {}'.format(tf_records[0], tf_records[-1]))
  with utils.logged_timer('Training'):
204
205
    dualnet.train(estimator_model_dir, tf_records, generation, params)
    dualnet.export_model(estimator_model_dir, new_model)
Yanhui Liang's avatar
Yanhui Liang committed
206
207
208
209
210
211
212
213
214


def validate(trained_models_dir, holdout_dir, estimator_model_dir, params):
  """Validate the latest model on the holdout dataset.

  Args:
    trained_models_dir: Directories where the completed generations/models are.
    holdout_dir: Directories where holdout data are.
    estimator_model_dir: tf.estimator model directory.
215
    params: A MiniGoParams instance of hyperparameters for the model.
Yanhui Liang's avatar
Yanhui Liang committed
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
  """
  model_num, _ = utils.get_latest_model(trained_models_dir)

  # Get the holdout game data
  nums_names = utils.get_models(trained_models_dir)

  # Model N was trained on games up through model N-1, so the validation set
  # should only be for models through N-1 as well, thus the (model_num) term.
  models = [num_name for num_name in nums_names if num_name[0] < model_num]

  # pair is a tuple of (model_num, model_name), like (13, 000013-modelname)
  holdout_dirs = [os.path.join(holdout_dir, pair[1])
                  for pair in models[-params.holdout_generation:]]
  tf_records = []
  with utils.logged_timer('Building lists of holdout files'):
    for record_dir in holdout_dirs:
      if os.path.exists(record_dir):  # make sure holdout dir exists
        tf_records.extend(
            tf.gfile.Glob(os.path.join(record_dir, '*'+_TF_RECORD_SUFFIX)))

236
237
238
239
240
  if not tf_records:
    print('No holdout dataset for validation! '
          'Please check your holdout directory: {}'.format(holdout_dir))
    return

Yanhui Liang's avatar
Yanhui Liang committed
241
242
243
244
245
246
247
248
  print('The length of tf_records is {}.'.format(len(tf_records)))
  first_tf_record = os.path.basename(tf_records[0])
  last_tf_record = os.path.basename(tf_records[-1])
  with utils.logged_timer('Validating from {} to {}'.format(
      first_tf_record, last_tf_record)):
    dualnet.validate(estimator_model_dir, tf_records, params)


249
def evaluate(black_model_name, black_net, white_model_name, white_net,
Yanhui Liang's avatar
Yanhui Liang committed
250
251
252
             evaluate_dir, params):
  """Evaluate with two models.

253
254
255
  With two DualNetRunners to play as black and white in a Go match. Two models
  play several games, and the model that wins by a margin of 55% will be the
  winner.
Yanhui Liang's avatar
Yanhui Liang committed
256
257
258

  Args:
    black_model_name: The name of the model playing black.
259
    black_net: The DualNetRunner model for black
Yanhui Liang's avatar
Yanhui Liang committed
260
    white_model_name: The name of the model playing white.
261
    white_net: The DualNetRunner model for white.
Yanhui Liang's avatar
Yanhui Liang committed
262
    evaluate_dir: Where to write the evaluation results. Set as
263
264
      'base_dir/sgf/evaluate/'.
    params: A MiniGoParams instance of hyperparameters for the model.
Yanhui Liang's avatar
Yanhui Liang committed
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282

  Returns:
    The model name of the winner.

  Raises:
      ValueError: if neither `WHITE` or `BLACK` is returned.
  """
  with utils.logged_timer('{} games'.format(params.eval_games)):
    winner = evaluation.play_match(
        params, black_net, white_net, params.eval_games,
        params.eval_readouts, evaluate_dir, params.eval_verbose)

  if winner != go.WHITE_NAME and winner != go.BLACK_NAME:
    raise ValueError('Winner should be either White or Black!')

  return black_model_name if winner == go.BLACK_NAME else white_model_name


283
284
285
286
287
288
289
290
291
def _set_params(flags):
  """Set hyperparameters from board size.

  Args:
    flags: Flags from Argparser.

  Returns:
  An MiniGoParams instance of hyperparameters.
  """
Yanhui Liang's avatar
Yanhui Liang committed
292
  params = model_params.MiniGoParams()
293
  k = utils.round_power_of_two(flags.board_size ** 2 / 3)
Yanhui Liang's avatar
Yanhui Liang committed
294
295
  params.num_filters = k  # Number of filters in the convolution layer
  params.fc_width = 2 * k  # Width of each fully connected layer
296
297
  params.num_shared_layers = flags.board_size  # Number of shared trunk layers
  params.board_size = flags.board_size  # Board size
Yanhui Liang's avatar
Yanhui Liang committed
298
299

  # How many positions can fit on a graphics card. 256 for 9s, 16 or 32 for 19s.
300
301
302
303
304
  if flags.batch_size is None:
    if flags.board_size == 9:
      params.batch_size = 256
    else:
      params.batch_size = 32
Yanhui Liang's avatar
Yanhui Liang committed
305
  else:
306
307
    params.batch_size = flags.batch_size

Yanhui Liang's avatar
Yanhui Liang committed
308
309
310
  return params


311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
def _prepare_selfplay(
    model_name, trained_models_dir, selfplay_dir, holdout_dir, sgf_dir, params):
  """Set directories and load the network for selfplay.

  Args:
    model_name: The name of the model for self-play
    trained_models_dir: Directories where the completed generations/models are.
    selfplay_dir: Where to write the games. Set as 'base_dir/data/selfplay/'.
    holdout_dir: Where to write the holdout data. Set as
      'base_dir/data/holdout/'.
    sgf_dir: Where to write the sgf (Smart Game Format) files. Set as
      'base_dir/sgf/'.
    params: A MiniGoParams instance of hyperparameters for the model.

  Returns:
    The directories and network model for selfplay.
  """
  # Set paths for the model with 'model_name'
  model_path = os.path.join(trained_models_dir, model_name)
  output_dir = os.path.join(selfplay_dir, model_name)
  holdout_dir = os.path.join(holdout_dir, model_name)
  # clean_sgf is to write sgf file without comments.
  # full_sgf is to write sgf file with comments.
  clean_sgf = os.path.join(sgf_dir, model_name, 'clean')
  full_sgf = os.path.join(sgf_dir, model_name, 'full')

  _ensure_dir_exists(output_dir)
  _ensure_dir_exists(holdout_dir)
  _ensure_dir_exists(clean_sgf)
  _ensure_dir_exists(full_sgf)
  selfplay_dirs = {
      'output_dir': output_dir,
      'holdout_dir': holdout_dir,
      'clean_sgf': clean_sgf,
      'full_sgf': full_sgf
  }
  # cache the network model for self-play
  with utils.logged_timer('Loading weights from {} ... '.format(model_path)):
    network = dualnet.DualNetRunner(model_path, params)
  return selfplay_dirs, network


def run_selfplay(selfplay_model, selfplay_games, dirs, params):
  """Run selfplay to generate training data.

  Args:
    selfplay_model: The model name for selfplay.
    selfplay_games: The number of selfplay games.
    dirs: A MiniGoDirectory instance of directories used in each step.
    params: A MiniGoParams instance of hyperparameters for the model.
  """
  selfplay_dirs, network = _prepare_selfplay(
      selfplay_model, dirs.trained_models_dir, dirs.selfplay_dir,
      dirs.holdout_dir, dirs.sgf_dir, params)

  print('Self-play with model: {}'.format(selfplay_model))
  for _ in range(selfplay_games):
    selfplay(selfplay_dirs, network, params)


Yanhui Liang's avatar
Yanhui Liang committed
371
372
373
374
def main(_):
  """Run the reinforcement learning loop."""
  tf.logging.set_verbosity(tf.logging.INFO)

375
  params = _set_params(FLAGS)
Yanhui Liang's avatar
Yanhui Liang committed
376
377

  # A dummy model for debug/testing purpose with fewer games and iterations
378
  if FLAGS.test:
Yanhui Liang's avatar
Yanhui Liang committed
379
    params = model_params.DummyMiniGoParams()
380
381
382
383
    base_dir = FLAGS.base_dir + str(FLAGS.board_size) + '_size_dummy/'
  else:
    # Set directories for models and datasets
    base_dir = FLAGS.base_dir + str(FLAGS.board_size) + '_size/'
Yanhui Liang's avatar
Yanhui Liang committed
384
385

  dirs = utils.MiniGoDirectory(base_dir)
386
387
388
389
390
391
392
393
394
395

  # Run selfplay only if user specifies the argument.
  if FLAGS.selfplay:
    selfplay_model_name = FLAGS.selfplay_model_name or utils.get_latest_model(
        dirs.trained_models_dir)[1]
    max_games = FLAGS.selfplay_max_games or params.max_games_per_generation
    run_selfplay(selfplay_model_name, max_games, dirs, params)
    return

  # Run the RL pipeline
Yanhui Liang's avatar
Yanhui Liang committed
396
  # if no models have been trained, start from bootstrap model
397
398

  if not os.path.isdir(dirs.trained_models_dir):
Yanhui Liang's avatar
Yanhui Liang committed
399
400
401
402
403
404
405
406
407
408
    print('No trained model exists! Starting from Bootstrap...')
    print('Creating random initial weights...')
    bootstrap(dirs.estimator_model_dir, dirs.trained_models_dir, params)
  else:
    print('A MiniGo base directory has been found! ')
    print('Start from the last checkpoint...')

  _, best_model_so_far = utils.get_latest_model(dirs.trained_models_dir)
  for rl_iter in range(params.max_iters_per_pipeline):
    print('RL_iteration: {}'.format(rl_iter))
409
410
411
    # Self-play with the best model to generate training data
    run_selfplay(
        best_model_so_far, params.max_games_per_generation, dirs, params)
Yanhui Liang's avatar
Yanhui Liang committed
412

413
    # gather selfplay data for training
Yanhui Liang's avatar
Yanhui Liang committed
414
415
416
    print('Gathering game output...')
    gather(dirs.selfplay_dir, dirs.training_chunk_dir, params)

417
418
    # train the next generation model
    model_num, _ = utils.get_latest_model(dirs.trained_models_dir)
Yanhui Liang's avatar
Yanhui Liang committed
419
420
    print('Training on gathered game data...')
    train(dirs.trained_models_dir, dirs.estimator_model_dir,
421
          dirs.training_chunk_dir, model_num + 1, params)
Yanhui Liang's avatar
Yanhui Liang committed
422

423
    # validate the latest model if needed
Yanhui Liang's avatar
Yanhui Liang committed
424
425
426
427
428
429
    if FLAGS.validation:
      print('Validating on the holdout game data...')
      validate(dirs.trained_models_dir, dirs.holdout_dir,
               dirs.estimator_model_dir, params)

    _, current_model = utils.get_latest_model(dirs.trained_models_dir)
430

Yanhui Liang's avatar
Yanhui Liang committed
431
    if FLAGS.evaluation:  # Perform evaluation if needed
432
433
434
435
436
437
438
439
440
      print('Evaluate models between {} and {}'.format(
          best_model_so_far, current_model))
      black_model = os.path.join(dirs.trained_models_dir, best_model_so_far)
      white_model = os.path.join(dirs.trained_models_dir, current_model)
      _ensure_dir_exists(dirs.evaluate_dir)
      with utils.logged_timer('Loading weights'):
        black_net = dualnet.DualNetRunner(black_model, params)
        white_net = dualnet.DualNetRunner(white_model, params)

Yanhui Liang's avatar
Yanhui Liang committed
441
      best_model_so_far = evaluate(
442
          best_model_so_far, black_net, current_model, white_net,
Yanhui Liang's avatar
Yanhui Liang committed
443
          dirs.evaluate_dir, params)
444
      print('Winner of evaluation: {}!'.format(best_model_so_far))
Yanhui Liang's avatar
Yanhui Liang committed
445
446
447
448
449
450
    else:
      best_model_so_far = current_model


if __name__ == '__main__':
  parser = argparse.ArgumentParser()
451
  # flags to run the RL pipeline
Yanhui Liang's avatar
Yanhui Liang committed
452
453
454
455
456
457
458
459
460
461
462
463
464
  parser.add_argument(
      '--base_dir',
      type=str,
      default='/tmp/minigo/',
      metavar='BD',
      help='Base directory for the MiniGo models and datasets.')
  parser.add_argument(
      '--board_size',
      type=int,
      default=9,
      metavar='N',
      choices=[9, 19],
      help='Go board size. The default size is 9.')
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
  parser.add_argument(
      '--batch_size',
      type=int,
      default=None,
      metavar='BS',
      help='Batch size for training. The default size is None')
  # Test the pipeline with a dummy model
  parser.add_argument(
      '--test',
      action='store_true',
      help='A boolean to test RL pipeline with a dummy model.')
  # Run RL pipeline with the validation step
  parser.add_argument(
      '--validation',
      action='store_true',
      help='A boolean to specify validation in the RL pipeline.')
  # Run RL pipeline with the evaluation step
Yanhui Liang's avatar
Yanhui Liang committed
482
483
484
485
  parser.add_argument(
      '--evaluation',
      action='store_true',
      help='A boolean to specify evaluation in the RL pipeline.')
486
487

  # self-play only
Yanhui Liang's avatar
Yanhui Liang committed
488
  parser.add_argument(
489
      '--selfplay',
Yanhui Liang's avatar
Yanhui Liang committed
490
      action='store_true',
491
      help='A boolean to run self-play only.')
Yanhui Liang's avatar
Yanhui Liang committed
492
  parser.add_argument(
493
494
495
496
497
498
499
500
501
502
503
      '--selfplay_model_name',
      type=str,
      default=None,
      metavar='SM',
      help='The model used for self-play only.')
  parser.add_argument(
      '--selfplay_max_games',
      type=int,
      default=None,
      metavar='SMG',
      help='The number of game data self-play only needs to generate')
Yanhui Liang's avatar
Yanhui Liang committed
504
505
506

  FLAGS, unparsed = parser.parse_known_args()
  tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)