misc.py 9.85 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the 'License');
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an 'AS IS' BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Misc for Transformer."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

Toby Boyd's avatar
Toby Boyd committed
21
# pylint: disable=g-bad-import-order
22
from absl import flags
Toby Boyd's avatar
Toby Boyd committed
23
import tensorflow as tf
24

25
from official.nlp.transformer import model_params
26
from official.utils.flags import core as flags_core
Toby Boyd's avatar
Toby Boyd committed
27
28
29
from official.utils.misc import keras_utils

FLAGS = flags.FLAGS
30
31

PARAMS_MAP = {
Toby Boyd's avatar
Toby Boyd committed
32
33
34
    'tiny': model_params.TINY_PARAMS,
    'base': model_params.BASE_PARAMS,
    'big': model_params.BIG_PARAMS,
35
36
37
38
39
40
}


def get_model_params(param_set, num_gpus):
  """Gets predefined model params."""
  if num_gpus > 1:
Toby Boyd's avatar
Toby Boyd committed
41
    if param_set == 'big':
42
      return model_params.BIG_MULTI_GPU_PARAMS.copy()
Toby Boyd's avatar
Toby Boyd committed
43
    elif param_set == 'base':
44
45
      return model_params.BASE_MULTI_GPU_PARAMS.copy()
    else:
Toby Boyd's avatar
Toby Boyd committed
46
      raise ValueError('Not valid params: param_set={} num_gpus={}'.format(
47
48
49
50
51
52
53
          param_set, num_gpus))

  return PARAMS_MAP[param_set].copy()


def define_transformer_flags():
  """Add flags and flag validators for running transformer_main."""
54
  # Add common flags (data_dir, model_dir, etc.).
55
  flags_core.define_base(num_gpu=True, distribution_strategy=True)
56
57
58
59
60
61
  flags_core.define_performance(
      num_parallel_calls=True,
      inter_op=False,
      intra_op=False,
      synthetic_data=True,
      max_train_steps=False,
62
63
      dtype=True,
      loss_scale=True,
Toby Boyd's avatar
Toby Boyd committed
64
      all_reduce_alg=True,
65
66
67
      num_packs=True,
      tf_gpu_thread_mode=True,
      datasets_num_private_threads=True,
68
      enable_xla=True,
Vinh Nguyen's avatar
Vinh Nguyen committed
69
      fp16_implementation=True
70
  )
Toby Boyd's avatar
Toby Boyd committed
71

72
73
74
  flags_core.define_benchmark()
  flags_core.define_device(tpu=True)

Toby Boyd's avatar
Toby Boyd committed
75
  flags.DEFINE_integer(
Toby Boyd's avatar
Toby Boyd committed
76
      name='train_steps', short_name='ts', default=300000,
Toby Boyd's avatar
Toby Boyd committed
77
78
      help=flags_core.help_wrap('The number of steps used to train.'))
  flags.DEFINE_integer(
79
      name='steps_between_evals', short_name='sbe', default=5000,
Toby Boyd's avatar
Toby Boyd committed
80
81
82
      help=flags_core.help_wrap(
          'The Number of training steps to run between evaluations. This is '
          'used if --train_steps is defined.'))
83
84
85
  flags.DEFINE_boolean(
      name='enable_time_history', default=True,
      help='Whether to enable TimeHistory callback.')
Toby Boyd's avatar
Toby Boyd committed
86
87
88
  flags.DEFINE_boolean(
      name='enable_tensorboard', default=False,
      help='Whether to enable Tensorboard callback.')
89
90
91
  flags.DEFINE_boolean(
      name='enable_metrics_in_training', default=False,
      help='Whether to enable metrics during training.')
92
93
94
95
  flags.DEFINE_boolean(
      name='enable_mlir_bridge',
      default=False,
      help='Whether to enable the TF to XLA bridge.')
Toby Boyd's avatar
Toby Boyd committed
96
  # Set flags from the flags_core module as 'key flags' so they're listed when
97
98
99
100
101
102
  # the '-h' flag is used. Without this line, the flags defined above are
  # only shown in the full `--helpful` help text.
  flags.adopt_module_key_flags(flags_core)

  # Add transformer-specific flags
  flags.DEFINE_enum(
Toby Boyd's avatar
Toby Boyd committed
103
      name='param_set', short_name='mp', default='big',
104
105
      enum_values=PARAMS_MAP.keys(),
      help=flags_core.help_wrap(
Toby Boyd's avatar
Toby Boyd committed
106
107
108
109
110
111
          'Parameter set to use when creating and training the model. The '
          'parameters define the input shape (batch size and max length), '
          'model configuration (size of embedding, # of hidden layers, etc.), '
          'and various other settings. The big parameter set increases the '
          'default batch size, embedding/hidden size, and filter size. For a '
          'complete list of parameters, please see model/model_params.py.'))
112
113

  flags.DEFINE_bool(
114
      name='static_batch', short_name='sb', default=False,
115
      help=flags_core.help_wrap(
Toby Boyd's avatar
Toby Boyd committed
116
117
118
119
120
121
          'Whether the batches in the dataset should have static shapes. In '
          'general, this setting should be False. Dynamic shapes allow the '
          'inputs to be grouped so that the number of padding tokens is '
          'minimized, and helps model training. In cases where the input shape '
          'must be static (e.g. running on TPU), this setting will be ignored '
          'and static batching will always be used.'))
122
123
124
125
126
127
  flags.DEFINE_integer(
      name='max_length', short_name='ml', default=256,
      help=flags_core.help_wrap(
          'Max sentence length for Transformer. Default is 256. Note: Usually '
          'it is more effective to use a smaller max length if static_batch is '
          'enabled, e.g. 64.'))
128
129
130

  # Flags for training with steps (may be used for debugging)
  flags.DEFINE_integer(
Toby Boyd's avatar
Toby Boyd committed
131
132
      name='validation_steps', short_name='vs', default=64,
      help=flags_core.help_wrap('The number of steps used in validation.'))
133
134
135

  # BLEU score computation
  flags.DEFINE_string(
Toby Boyd's avatar
Toby Boyd committed
136
      name='bleu_source', short_name='bls', default=None,
137
      help=flags_core.help_wrap(
Toby Boyd's avatar
Toby Boyd committed
138
139
          'Path to source file containing text translate when calculating the '
          'official BLEU score. Both --bleu_source and --bleu_ref must be set. '
140
          ))
141
  flags.DEFINE_string(
Toby Boyd's avatar
Toby Boyd committed
142
      name='bleu_ref', short_name='blr', default=None,
143
      help=flags_core.help_wrap(
Toby Boyd's avatar
Toby Boyd committed
144
145
          'Path to source file containing text translate when calculating the '
          'official BLEU score. Both --bleu_source and --bleu_ref must be set. '
146
          ))
147
  flags.DEFINE_string(
Toby Boyd's avatar
Toby Boyd committed
148
      name='vocab_file', short_name='vf', default=None,
149
      help=flags_core.help_wrap(
Toby Boyd's avatar
Toby Boyd committed
150
151
152
          'Path to subtoken vocabulary file. If data_download.py was used to '
          'download and encode the training data, look in the data_dir to find '
          'the vocab file.'))
153
  flags.DEFINE_string(
Toby Boyd's avatar
Toby Boyd committed
154
155
      name='mode', default='train',
      help=flags_core.help_wrap('mode: train, eval, or predict'))
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
156
157
158
159
160
  flags.DEFINE_bool(
      name='use_ctl',
      default=False,
      help=flags_core.help_wrap(
          'Whether the model runs with custom training loop.'))
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
  flags.DEFINE_integer(
      name='decode_batch_size',
      default=32,
      help=flags_core.help_wrap(
          'Global batch size used for Transformer autoregressive decoding on '
          'TPU.'))
  flags.DEFINE_integer(
      name='decode_max_length',
      default=97,
      help=flags_core.help_wrap(
          'Max sequence length of the decode/eval data. This is used by '
          'Transformer autoregressive decoding on TPU to have minimum '
          'paddings.'))
  flags.DEFINE_bool(
      name='padded_decode',
      default=False,
      help=flags_core.help_wrap(
          'Whether the autoregressive decoding runs with input data padded to '
          'the decode_max_length. For TPU/XLA-GPU runs, this flag has to be '
          'set due the static shape requirement. Although CPU/GPU could also '
          'use padded_decode, it has not been tested. In addition, this method '
          'will introduce unnecessary overheads which grow quadratically with '
          'the max sequence length.'))
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
184
185
186
187
188
189
  flags.DEFINE_bool(
      name='enable_checkpointing',
      default=True,
      help=flags_core.help_wrap(
          'Whether to do checkpointing during training. When running under '
          'benchmark harness, we will avoid checkpointing.'))
190

Toby Boyd's avatar
Toby Boyd committed
191
192
  flags_core.set_defaults(data_dir='/tmp/translate_ende',
                          model_dir='/tmp/transformer_model',
193
                          batch_size=None)
194
195
196

  # pylint: disable=unused-variable
  @flags.multi_flags_validator(
Toby Boyd's avatar
Toby Boyd committed
197
198
      ['bleu_source', 'bleu_ref'],
      message='Both or neither --bleu_source and --bleu_ref must be defined.')
199
  def _check_bleu_files(flags_dict):
Toby Boyd's avatar
Toby Boyd committed
200
201
    return (flags_dict['bleu_source'] is None) == (
        flags_dict['bleu_ref'] is None)
202
203

  @flags.multi_flags_validator(
Toby Boyd's avatar
Toby Boyd committed
204
205
206
      ['bleu_source', 'bleu_ref', 'vocab_file'],
      message='--vocab_file must be defined if --bleu_source and --bleu_ref '
              'are defined.')
207
  def _check_bleu_vocab_file(flags_dict):
Toby Boyd's avatar
Toby Boyd committed
208
209
    if flags_dict['bleu_source'] and flags_dict['bleu_ref']:
      return flags_dict['vocab_file'] is not None
210
211
212
    return True
  # pylint: enable=unused-variable

Toby Boyd's avatar
Toby Boyd committed
213

214
def get_callbacks():
Toby Boyd's avatar
Toby Boyd committed
215
216
  """Returns common callbacks."""
  callbacks = []
217
  if FLAGS.enable_time_history:
Will Cromar's avatar
Will Cromar committed
218
219
220
221
    time_callback = keras_utils.TimeHistory(
        FLAGS.batch_size,
        FLAGS.log_steps,
        FLAGS.model_dir if FLAGS.enable_tensorboard else None)
222
    callbacks.append(time_callback)
Toby Boyd's avatar
Toby Boyd committed
223
224
225
226
227
228
229
230
231

  if FLAGS.enable_tensorboard:
    tensorboard_callback = tf.keras.callbacks.TensorBoard(
        log_dir=FLAGS.model_dir)
    callbacks.append(tensorboard_callback)

  return callbacks


Tayo Oguntebi's avatar
Tayo Oguntebi committed
232
233
def update_stats(history, stats, callbacks):
  """Normalizes and updates dictionary of stats.
Toby Boyd's avatar
Toby Boyd committed
234
235
236

  Args:
    history: Results of the training step.
Tayo Oguntebi's avatar
Tayo Oguntebi committed
237
    stats: Dict with pre-existing training stats.
Toby Boyd's avatar
Toby Boyd committed
238
239
240
241
242
243
244
    callbacks: a list of callbacks which might include a time history callback
      used during keras.fit.
  """

  if history and history.history:
    train_hist = history.history
    # Gets final loss from training.
245
    stats['loss'] = float(train_hist['loss'][-1])
Toby Boyd's avatar
Toby Boyd committed
246
247

  if not callbacks:
Tayo Oguntebi's avatar
Tayo Oguntebi committed
248
    return
Toby Boyd's avatar
Toby Boyd committed
249
250
251
252
253
254
255
256
257
258
259
260

  # Look for the time history callback which was used during keras.fit
  for callback in callbacks:
    if isinstance(callback, keras_utils.TimeHistory):
      timestamp_log = callback.timestamp_log
      stats['step_timestamp_log'] = timestamp_log
      stats['train_finish_time'] = callback.train_finish_time
      if len(timestamp_log) > 1:
        stats['avg_exp_per_second'] = (
            callback.batch_size * callback.log_steps *
            (len(callback.timestamp_log)-1) /
            (timestamp_log[-1].timestamp - timestamp_log[0].timestamp))