model_params.py 3.57 KB
Newer Older
Yanhui Liang's avatar
Yanhui Liang committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Defines MiniGo parameters."""


class MiniGoParams(object):
  """Parameters for MiniGo."""

  # Go params
  board_size = 9

  # RL pipeline
  max_games_per_generation = 10  # Number of games per selfplay generation
  max_iters_per_pipeline = 2  # Number of RL iterations in one pipeline

  # The shuffle buffer size determines how far an example could end up from
  # where it started; this and the interleave parameters in preprocessing can
  # give us an approximation of a uniform sampling.  The default of 4M is used
  # in training, but smaller numbers can be used for aggregation or validation.
  shuffle_buffer_size = 2000000  # shuffle buffer size in preprocessing

  # dual_net
  # How many positions to look at per generation.
  # Per AlphaGo Zero (AGZ), 2048 minibatch * 1k = 2M positions/generation
  examples_per_generation = 2000000

  # for learning rate
  l2_strength = 1e-4  # Regularization strength
  momentum = 0.9  # Momentum used in SGD

  kernel_size = [3, 3]  # kernel size of conv and res blocks is from AGZ paper

  # selfplay
  selfplay_readouts = 100  # How many simulations to run per move
  selfplay_verbose = 1  # >=2 will print debug info, >=3 will print boards
  # an absolute value of threshold to resign at
  selfplay_resign_threshold = 0.95

  # the number of simultaneous leaves in MCTS
  simultaneous_leaves = 8

  holdout_pct = 0.05  # How many games to hold out for validation
  holdout_generation = 50  # How many recent generations/models for holdout data

  # gather
  gather_generation = 50  # How many recent generations/models for gathered data

  # How many positions we should aggregate per 'chunk'.
  examples_per_chunk = 10000
  # How many positions to draw from for our training window.
  # AGZ used the most recent 500k games, which, assuming 250 moves/game = 125M
  train_window_size = 125000000

  # evaluation
  eval_games = 50  # The number of games to play in evaluation
  eval_readouts = 100  # How many readouts to make per move in evaluation
  eval_verbose = 1  # How verbose the players should be in evaluation
  eval_win_rate = 0.55  # Winner needs to win by a margin of 55%.


class DummyMiniGoParams(MiniGoParams):
  """Parameters for a dummy model."""
  num_filters = 8  # Number of filters in the convolution layer
  fc_width = 16  # Width of each fully connected layer
  num_shared_layers = 1  # Number of shared trunk layers
  batch_size = 16
  examples_per_generation = 64
  max_games_per_generation = 2
  max_iters_per_pipeline = 1
  selfplay_readouts = 10

  shuffle_buffer_size = 1000

    # evaluation
  eval_games = 10  # The number of games to play in evaluation
  eval_readouts = 10  # How many readouts to make per move in evaluation
  eval_verbose = 1  # How verbose the players should be in evaluation


class DummyValidationParams(DummyMiniGoParams, MiniGoParams):
  """Parameters for a dummy model."""
  holdout_pct = 1  # Set holdout percent as 1 for validation testing purpose