misc.py 10.1 KB
Newer Older
Frederick Liu's avatar
Frederick Liu committed
1
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
2
#
Frederick Liu's avatar
Frederick Liu committed
3
# Licensed under the Apache License, Version 2.0 (the "License");
4
5
6
7
8
9
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
Frederick Liu's avatar
Frederick Liu committed
10
# distributed under the License is distributed on an "AS IS" BASIS,
11
12
13
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Frederick Liu's avatar
Frederick Liu committed
14

15
16
"""Misc for Transformer."""

Toby Boyd's avatar
Toby Boyd committed
17
# pylint: disable=g-bad-import-order
Hongkun Yu's avatar
Hongkun Yu committed
18

19
from absl import flags
Toby Boyd's avatar
Toby Boyd committed
20
import tensorflow as tf
21

22
from official.nlp.transformer import model_params
23
from official.utils.flags import core as flags_core
Toby Boyd's avatar
Toby Boyd committed
24
25
26
from official.utils.misc import keras_utils

FLAGS = flags.FLAGS
27
28

PARAMS_MAP = {
Toby Boyd's avatar
Toby Boyd committed
29
30
31
    'tiny': model_params.TINY_PARAMS,
    'base': model_params.BASE_PARAMS,
    'big': model_params.BIG_PARAMS,
32
33
34
35
36
37
}


def get_model_params(param_set, num_gpus):
  """Gets predefined model params."""
  if num_gpus > 1:
Toby Boyd's avatar
Toby Boyd committed
38
    if param_set == 'big':
39
      return model_params.BIG_MULTI_GPU_PARAMS.copy()
Toby Boyd's avatar
Toby Boyd committed
40
    elif param_set == 'base':
41
42
      return model_params.BASE_MULTI_GPU_PARAMS.copy()
    else:
Toby Boyd's avatar
Toby Boyd committed
43
      raise ValueError('Not valid params: param_set={} num_gpus={}'.format(
44
45
46
47
48
49
50
          param_set, num_gpus))

  return PARAMS_MAP[param_set].copy()


def define_transformer_flags():
  """Add flags and flag validators for running transformer_main."""
51
  # Add common flags (data_dir, model_dir, etc.).
52
  flags_core.define_base(num_gpu=True, distribution_strategy=True)
53
54
55
56
57
58
  flags_core.define_performance(
      num_parallel_calls=True,
      inter_op=False,
      intra_op=False,
      synthetic_data=True,
      max_train_steps=False,
59
60
      dtype=True,
      loss_scale=True,
Toby Boyd's avatar
Toby Boyd committed
61
      all_reduce_alg=True,
62
63
64
      num_packs=True,
      tf_gpu_thread_mode=True,
      datasets_num_private_threads=True,
65
      enable_xla=True,
Hongkun Yu's avatar
Hongkun Yu committed
66
      fp16_implementation=True)
Toby Boyd's avatar
Toby Boyd committed
67

68
69
70
  flags_core.define_benchmark()
  flags_core.define_device(tpu=True)

Toby Boyd's avatar
Toby Boyd committed
71
  flags.DEFINE_integer(
Hongkun Yu's avatar
Hongkun Yu committed
72
73
74
      name='train_steps',
      short_name='ts',
      default=300000,
Toby Boyd's avatar
Toby Boyd committed
75
76
      help=flags_core.help_wrap('The number of steps used to train.'))
  flags.DEFINE_integer(
Hongkun Yu's avatar
Hongkun Yu committed
77
78
79
      name='steps_between_evals',
      short_name='sbe',
      default=5000,
Toby Boyd's avatar
Toby Boyd committed
80
81
82
      help=flags_core.help_wrap(
          'The Number of training steps to run between evaluations. This is '
          'used if --train_steps is defined.'))
83
  flags.DEFINE_boolean(
Hongkun Yu's avatar
Hongkun Yu committed
84
85
      name='enable_time_history',
      default=True,
86
      help='Whether to enable TimeHistory callback.')
Toby Boyd's avatar
Toby Boyd committed
87
  flags.DEFINE_boolean(
Hongkun Yu's avatar
Hongkun Yu committed
88
89
      name='enable_tensorboard',
      default=False,
Toby Boyd's avatar
Toby Boyd committed
90
      help='Whether to enable Tensorboard callback.')
91
  flags.DEFINE_boolean(
Hongkun Yu's avatar
Hongkun Yu committed
92
93
      name='enable_metrics_in_training',
      default=False,
94
      help='Whether to enable metrics during training.')
95
96
97
98
  flags.DEFINE_boolean(
      name='enable_mlir_bridge',
      default=False,
      help='Whether to enable the TF to XLA bridge.')
Toby Boyd's avatar
Toby Boyd committed
99
  # Set flags from the flags_core module as 'key flags' so they're listed when
100
101
102
103
104
105
  # the '-h' flag is used. Without this line, the flags defined above are
  # only shown in the full `--helpful` help text.
  flags.adopt_module_key_flags(flags_core)

  # Add transformer-specific flags
  flags.DEFINE_enum(
Hongkun Yu's avatar
Hongkun Yu committed
106
107
108
      name='param_set',
      short_name='mp',
      default='big',
109
110
      enum_values=PARAMS_MAP.keys(),
      help=flags_core.help_wrap(
Toby Boyd's avatar
Toby Boyd committed
111
112
113
114
115
116
          'Parameter set to use when creating and training the model. The '
          'parameters define the input shape (batch size and max length), '
          'model configuration (size of embedding, # of hidden layers, etc.), '
          'and various other settings. The big parameter set increases the '
          'default batch size, embedding/hidden size, and filter size. For a '
          'complete list of parameters, please see model/model_params.py.'))
117
118

  flags.DEFINE_bool(
Hongkun Yu's avatar
Hongkun Yu committed
119
120
121
      name='static_batch',
      short_name='sb',
      default=False,
122
      help=flags_core.help_wrap(
Toby Boyd's avatar
Toby Boyd committed
123
124
125
126
127
128
          'Whether the batches in the dataset should have static shapes. In '
          'general, this setting should be False. Dynamic shapes allow the '
          'inputs to be grouped so that the number of padding tokens is '
          'minimized, and helps model training. In cases where the input shape '
          'must be static (e.g. running on TPU), this setting will be ignored '
          'and static batching will always be used.'))
129
  flags.DEFINE_integer(
Hongkun Yu's avatar
Hongkun Yu committed
130
131
132
      name='max_length',
      short_name='ml',
      default=256,
133
134
135
136
      help=flags_core.help_wrap(
          'Max sentence length for Transformer. Default is 256. Note: Usually '
          'it is more effective to use a smaller max length if static_batch is '
          'enabled, e.g. 64.'))
137
138
139

  # Flags for training with steps (may be used for debugging)
  flags.DEFINE_integer(
Hongkun Yu's avatar
Hongkun Yu committed
140
141
142
      name='validation_steps',
      short_name='vs',
      default=64,
Toby Boyd's avatar
Toby Boyd committed
143
      help=flags_core.help_wrap('The number of steps used in validation.'))
144
145
146

  # BLEU score computation
  flags.DEFINE_string(
Hongkun Yu's avatar
Hongkun Yu committed
147
148
149
      name='bleu_source',
      short_name='bls',
      default=None,
150
      help=flags_core.help_wrap(
Toby Boyd's avatar
Toby Boyd committed
151
152
          'Path to source file containing text translate when calculating the '
          'official BLEU score. Both --bleu_source and --bleu_ref must be set. '
Hongkun Yu's avatar
Hongkun Yu committed
153
      ))
154
  flags.DEFINE_string(
Hongkun Yu's avatar
Hongkun Yu committed
155
156
157
      name='bleu_ref',
      short_name='blr',
      default=None,
158
      help=flags_core.help_wrap(
Toby Boyd's avatar
Toby Boyd committed
159
160
          'Path to source file containing text translate when calculating the '
          'official BLEU score. Both --bleu_source and --bleu_ref must be set. '
Hongkun Yu's avatar
Hongkun Yu committed
161
      ))
162
  flags.DEFINE_string(
Hongkun Yu's avatar
Hongkun Yu committed
163
164
165
      name='vocab_file',
      short_name='vf',
      default=None,
166
      help=flags_core.help_wrap(
Toby Boyd's avatar
Toby Boyd committed
167
168
169
          'Path to subtoken vocabulary file. If data_download.py was used to '
          'download and encode the training data, look in the data_dir to find '
          'the vocab file.'))
170
  flags.DEFINE_string(
Hongkun Yu's avatar
Hongkun Yu committed
171
172
      name='mode',
      default='train',
Toby Boyd's avatar
Toby Boyd committed
173
      help=flags_core.help_wrap('mode: train, eval, or predict'))
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
174
175
176
177
178
  flags.DEFINE_bool(
      name='use_ctl',
      default=False,
      help=flags_core.help_wrap(
          'Whether the model runs with custom training loop.'))
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
  flags.DEFINE_integer(
      name='decode_batch_size',
      default=32,
      help=flags_core.help_wrap(
          'Global batch size used for Transformer autoregressive decoding on '
          'TPU.'))
  flags.DEFINE_integer(
      name='decode_max_length',
      default=97,
      help=flags_core.help_wrap(
          'Max sequence length of the decode/eval data. This is used by '
          'Transformer autoregressive decoding on TPU to have minimum '
          'paddings.'))
  flags.DEFINE_bool(
      name='padded_decode',
      default=False,
      help=flags_core.help_wrap(
          'Whether the autoregressive decoding runs with input data padded to '
          'the decode_max_length. For TPU/XLA-GPU runs, this flag has to be '
          'set due the static shape requirement. Although CPU/GPU could also '
          'use padded_decode, it has not been tested. In addition, this method '
          'will introduce unnecessary overheads which grow quadratically with '
          'the max sequence length.'))
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
202
203
204
205
206
207
  flags.DEFINE_bool(
      name='enable_checkpointing',
      default=True,
      help=flags_core.help_wrap(
          'Whether to do checkpointing during training. When running under '
          'benchmark harness, we will avoid checkpointing.'))
208
209
210
211
212
213
214
215
  flags.DEFINE_bool(
      name='save_weights_only',
      default=True,
      help=flags_core.help_wrap(
          'Only used when above `enable_checkpointing` is True. '
          'If True, then only the model\'s weights will be saved '
          '(`model.save_weights(filepath)`), else the full model is saved '
          '(`model.save(filepath)`)'))
216

Hongkun Yu's avatar
Hongkun Yu committed
217
218
219
220
  flags_core.set_defaults(
      data_dir='/tmp/translate_ende',
      model_dir='/tmp/transformer_model',
      batch_size=None)
221
222
223

  # pylint: disable=unused-variable
  @flags.multi_flags_validator(
Toby Boyd's avatar
Toby Boyd committed
224
225
      ['bleu_source', 'bleu_ref'],
      message='Both or neither --bleu_source and --bleu_ref must be defined.')
226
  def _check_bleu_files(flags_dict):
Toby Boyd's avatar
Toby Boyd committed
227
228
    return (flags_dict['bleu_source'] is None) == (
        flags_dict['bleu_ref'] is None)
229
230

  @flags.multi_flags_validator(
Toby Boyd's avatar
Toby Boyd committed
231
232
      ['bleu_source', 'bleu_ref', 'vocab_file'],
      message='--vocab_file must be defined if --bleu_source and --bleu_ref '
Hongkun Yu's avatar
Hongkun Yu committed
233
      'are defined.')
234
  def _check_bleu_vocab_file(flags_dict):
Toby Boyd's avatar
Toby Boyd committed
235
236
    if flags_dict['bleu_source'] and flags_dict['bleu_ref']:
      return flags_dict['vocab_file'] is not None
237
    return True
Hongkun Yu's avatar
Hongkun Yu committed
238

239
240
  # pylint: enable=unused-variable

Toby Boyd's avatar
Toby Boyd committed
241

242
def get_callbacks():
Toby Boyd's avatar
Toby Boyd committed
243
244
  """Returns common callbacks."""
  callbacks = []
245
  if FLAGS.enable_time_history:
Will Cromar's avatar
Will Cromar committed
246
247
248
    time_callback = keras_utils.TimeHistory(
        FLAGS.batch_size,
        FLAGS.log_steps,
Abdullah Rashwan's avatar
Abdullah Rashwan committed
249
        logdir=FLAGS.model_dir if FLAGS.enable_tensorboard else None)
250
    callbacks.append(time_callback)
Toby Boyd's avatar
Toby Boyd committed
251
252
253
254
255
256
257
258
259

  if FLAGS.enable_tensorboard:
    tensorboard_callback = tf.keras.callbacks.TensorBoard(
        log_dir=FLAGS.model_dir)
    callbacks.append(tensorboard_callback)

  return callbacks


Tayo Oguntebi's avatar
Tayo Oguntebi committed
260
261
def update_stats(history, stats, callbacks):
  """Normalizes and updates dictionary of stats.
Toby Boyd's avatar
Toby Boyd committed
262
263
264

  Args:
    history: Results of the training step.
Tayo Oguntebi's avatar
Tayo Oguntebi committed
265
    stats: Dict with pre-existing training stats.
Toby Boyd's avatar
Toby Boyd committed
266
267
268
269
270
271
272
    callbacks: a list of callbacks which might include a time history callback
      used during keras.fit.
  """

  if history and history.history:
    train_hist = history.history
    # Gets final loss from training.
273
    stats['loss'] = float(train_hist['loss'][-1])
Toby Boyd's avatar
Toby Boyd committed
274
275

  if not callbacks:
Tayo Oguntebi's avatar
Tayo Oguntebi committed
276
    return
Toby Boyd's avatar
Toby Boyd committed
277
278
279
280
281
282
283
284
285
286

  # Look for the time history callback which was used during keras.fit
  for callback in callbacks:
    if isinstance(callback, keras_utils.TimeHistory):
      timestamp_log = callback.timestamp_log
      stats['step_timestamp_log'] = timestamp_log
      stats['train_finish_time'] = callback.train_finish_time
      if len(timestamp_log) > 1:
        stats['avg_exp_per_second'] = (
            callback.batch_size * callback.log_steps *
Hongkun Yu's avatar
Hongkun Yu committed
287
            (len(callback.timestamp_log) - 1) /
Toby Boyd's avatar
Toby Boyd committed
288
            (timestamp_log[-1].timestamp - timestamp_log[0].timestamp))