misc.py 10.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the 'License');
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an 'AS IS' BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Misc for Transformer."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

Toby Boyd's avatar
Toby Boyd committed
21
# pylint: disable=g-bad-import-order
22
from absl import flags
Toby Boyd's avatar
Toby Boyd committed
23
import tensorflow as tf
24

25
26
27
28
# TODO(tianlin) Import internal library. Remove this when some functions for
# different TF versions are fixed.
from tensorflow.python import tf2 as tf2_internal

29
30
from official.transformer.model import model_params
from official.utils.flags import core as flags_core
Toby Boyd's avatar
Toby Boyd committed
31
32
33
from official.utils.misc import keras_utils

FLAGS = flags.FLAGS
34
35

PARAMS_MAP = {
Toby Boyd's avatar
Toby Boyd committed
36
37
38
    'tiny': model_params.TINY_PARAMS,
    'base': model_params.BASE_PARAMS,
    'big': model_params.BIG_PARAMS,
39
40
41
}


42
43
44
45
46
def is_v2():
  """Returns whether it is v2."""
  return tf2_internal.enabled()


47
48
49
def get_model_params(param_set, num_gpus):
  """Gets predefined model params."""
  if num_gpus > 1:
Toby Boyd's avatar
Toby Boyd committed
50
    if param_set == 'big':
51
      return model_params.BIG_MULTI_GPU_PARAMS.copy()
Toby Boyd's avatar
Toby Boyd committed
52
    elif param_set == 'base':
53
54
      return model_params.BASE_MULTI_GPU_PARAMS.copy()
    else:
Toby Boyd's avatar
Toby Boyd committed
55
      raise ValueError('Not valid params: param_set={} num_gpus={}'.format(
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
          param_set, num_gpus))

  return PARAMS_MAP[param_set].copy()


def define_transformer_flags():
  """Add flags and flag validators for running transformer_main."""
  # Add common flags (data_dir, model_dir, train_epochs, etc.).
  flags_core.define_base()
  flags_core.define_performance(
      num_parallel_calls=True,
      inter_op=False,
      intra_op=False,
      synthetic_data=True,
      max_train_steps=False,
71
72
      dtype=True,
      loss_scale=True,
Toby Boyd's avatar
Toby Boyd committed
73
      all_reduce_alg=True,
74
75
      enable_xla=True,
      force_v2_in_keras_compile=True
76
  )
Toby Boyd's avatar
Toby Boyd committed
77
78
79
80
81
82
83
84
85
86
87
88

  # Additional performance flags
  # TODO(b/76028325): Remove when generic layout optimizer is ready.
  flags.DEFINE_boolean(
      name='enable_grappler_layout_optimizer',
      default=True,
      help='Enable Grappler layout optimizer. Currently Grappler can '
           'de-optimize fp16 graphs by forcing NCHW layout for all '
           'convolutions and batch normalizations, and this flag allows to '
           'disable it.'
  )

89
90
91
  flags_core.define_benchmark()
  flags_core.define_device(tpu=True)

Toby Boyd's avatar
Toby Boyd committed
92
  flags.DEFINE_integer(
Toby Boyd's avatar
Toby Boyd committed
93
      name='train_steps', short_name='ts', default=300000,
Toby Boyd's avatar
Toby Boyd committed
94
95
96
97
98
99
      help=flags_core.help_wrap('The number of steps used to train.'))
  flags.DEFINE_integer(
      name='steps_between_evals', short_name='sbe', default=1000,
      help=flags_core.help_wrap(
          'The Number of training steps to run between evaluations. This is '
          'used if --train_steps is defined.'))
100
101
102
  flags.DEFINE_boolean(
      name='enable_time_history', default=True,
      help='Whether to enable TimeHistory callback.')
Toby Boyd's avatar
Toby Boyd committed
103
104
105
  flags.DEFINE_boolean(
      name='enable_tensorboard', default=False,
      help='Whether to enable Tensorboard callback.')
106
107
108
  flags.DEFINE_boolean(
      name='enable_metrics_in_training', default=False,
      help='Whether to enable metrics during training.')
Toby Boyd's avatar
Toby Boyd committed
109
110
111
112
113
114
115
116
117
  flags.DEFINE_string(
      name='profile_steps', default=None,
      help='Save profiling data to model dir at given range of steps. The '
      'value must be a comma separated pair of positive integers, specifying '
      'the first and last step to profile. For example, "--profile_steps=2,4" '
      'triggers the profiler to process 3 steps, starting from the 2nd step. '
      'Note that profiler has a non-trivial performance overhead, and the '
      'output file can be gigantic if profiling many steps.')
  # Set flags from the flags_core module as 'key flags' so they're listed when
118
119
120
121
122
123
  # the '-h' flag is used. Without this line, the flags defined above are
  # only shown in the full `--helpful` help text.
  flags.adopt_module_key_flags(flags_core)

  # Add transformer-specific flags
  flags.DEFINE_enum(
Toby Boyd's avatar
Toby Boyd committed
124
      name='param_set', short_name='mp', default='big',
125
126
      enum_values=PARAMS_MAP.keys(),
      help=flags_core.help_wrap(
Toby Boyd's avatar
Toby Boyd committed
127
128
129
130
131
132
          'Parameter set to use when creating and training the model. The '
          'parameters define the input shape (batch size and max length), '
          'model configuration (size of embedding, # of hidden layers, etc.), '
          'and various other settings. The big parameter set increases the '
          'default batch size, embedding/hidden size, and filter size. For a '
          'complete list of parameters, please see model/model_params.py.'))
133
134

  flags.DEFINE_bool(
135
      name='static_batch', short_name='sb', default=False,
136
      help=flags_core.help_wrap(
Toby Boyd's avatar
Toby Boyd committed
137
138
139
140
141
142
          'Whether the batches in the dataset should have static shapes. In '
          'general, this setting should be False. Dynamic shapes allow the '
          'inputs to be grouped so that the number of padding tokens is '
          'minimized, and helps model training. In cases where the input shape '
          'must be static (e.g. running on TPU), this setting will be ignored '
          'and static batching will always be used.'))
143
144
145
146
147
148
  flags.DEFINE_integer(
      name='max_length', short_name='ml', default=256,
      help=flags_core.help_wrap(
          'Max sentence length for Transformer. Default is 256. Note: Usually '
          'it is more effective to use a smaller max length if static_batch is '
          'enabled, e.g. 64.'))
149
150
151

  # Flags for training with steps (may be used for debugging)
  flags.DEFINE_integer(
Toby Boyd's avatar
Toby Boyd committed
152
153
      name='validation_steps', short_name='vs', default=64,
      help=flags_core.help_wrap('The number of steps used in validation.'))
154
155
156

  # BLEU score computation
  flags.DEFINE_string(
Toby Boyd's avatar
Toby Boyd committed
157
      name='bleu_source', short_name='bls', default=None,
158
      help=flags_core.help_wrap(
Toby Boyd's avatar
Toby Boyd committed
159
160
161
162
          'Path to source file containing text translate when calculating the '
          'official BLEU score. Both --bleu_source and --bleu_ref must be set. '
          'Use the flag --stop_threshold to stop the script based on the '
          'uncased BLEU score.'))
163
  flags.DEFINE_string(
Toby Boyd's avatar
Toby Boyd committed
164
      name='bleu_ref', short_name='blr', default=None,
165
      help=flags_core.help_wrap(
Toby Boyd's avatar
Toby Boyd committed
166
167
168
169
          'Path to source file containing text translate when calculating the '
          'official BLEU score. Both --bleu_source and --bleu_ref must be set. '
          'Use the flag --stop_threshold to stop the script based on the '
          'uncased BLEU score.'))
170
  flags.DEFINE_string(
Toby Boyd's avatar
Toby Boyd committed
171
      name='vocab_file', short_name='vf', default=None,
172
      help=flags_core.help_wrap(
Toby Boyd's avatar
Toby Boyd committed
173
174
175
          'Path to subtoken vocabulary file. If data_download.py was used to '
          'download and encode the training data, look in the data_dir to find '
          'the vocab file.'))
176
  flags.DEFINE_string(
Toby Boyd's avatar
Toby Boyd committed
177
178
      name='mode', default='train',
      help=flags_core.help_wrap('mode: train, eval, or predict'))
179

Toby Boyd's avatar
Toby Boyd committed
180
181
  flags_core.set_defaults(data_dir='/tmp/translate_ende',
                          model_dir='/tmp/transformer_model',
182
183
184
185
186
                          batch_size=None,
                          train_epochs=10)

  # pylint: disable=unused-variable
  @flags.multi_flags_validator(
Toby Boyd's avatar
Toby Boyd committed
187
188
      ['mode', 'train_epochs'],
      message='--train_epochs must be defined in train mode')
189
  def _check_train_limits(flag_dict):
Toby Boyd's avatar
Toby Boyd committed
190
191
    if flag_dict['mode'] == 'train':
      return flag_dict['train_epochs'] is not None
192
193
194
    return True

  @flags.multi_flags_validator(
Toby Boyd's avatar
Toby Boyd committed
195
196
      ['bleu_source', 'bleu_ref'],
      message='Both or neither --bleu_source and --bleu_ref must be defined.')
197
  def _check_bleu_files(flags_dict):
Toby Boyd's avatar
Toby Boyd committed
198
199
    return (flags_dict['bleu_source'] is None) == (
        flags_dict['bleu_ref'] is None)
200
201

  @flags.multi_flags_validator(
Toby Boyd's avatar
Toby Boyd committed
202
203
204
      ['bleu_source', 'bleu_ref', 'vocab_file'],
      message='--vocab_file must be defined if --bleu_source and --bleu_ref '
              'are defined.')
205
  def _check_bleu_vocab_file(flags_dict):
Toby Boyd's avatar
Toby Boyd committed
206
207
    if flags_dict['bleu_source'] and flags_dict['bleu_ref']:
      return flags_dict['vocab_file'] is not None
208
209
210
    return True

  @flags.multi_flags_validator(
Toby Boyd's avatar
Toby Boyd committed
211
212
      ['export_dir', 'vocab_file'],
      message='--vocab_file must be defined if --export_dir is set.')
213
  def _check_export_vocab_file(flags_dict):
Toby Boyd's avatar
Toby Boyd committed
214
215
    if flags_dict['export_dir']:
      return flags_dict['vocab_file'] is not None
216
217
218
    return True
  # pylint: enable=unused-variable

Toby Boyd's avatar
Toby Boyd committed
219
220
221
222
223
224
  flags_core.require_cloud_storage(['data_dir', 'model_dir', 'export_dir'])


def get_callbacks():
  """Returns common callbacks."""
  callbacks = []
225
226
227
  if FLAGS.enable_time_history:
    time_callback = keras_utils.TimeHistory(FLAGS.batch_size, FLAGS.log_steps)
    callbacks.append(time_callback)
Toby Boyd's avatar
Toby Boyd committed
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276

  if FLAGS.enable_tensorboard:
    tensorboard_callback = tf.keras.callbacks.TensorBoard(
        log_dir=FLAGS.model_dir)
    callbacks.append(tensorboard_callback)

  if FLAGS.profile_steps:
    profiler_callback = keras_utils.get_profiler_callback(
        FLAGS.model_dir,
        FLAGS.profile_steps,
        FLAGS.enable_tensorboard)
    callbacks.append(profiler_callback)

  return callbacks


def build_stats(history, callbacks):
  """Normalizes and returns dictionary of stats.

  Args:
    history: Results of the training step.
    callbacks: a list of callbacks which might include a time history callback
      used during keras.fit.

  Returns:
    Dictionary of normalized results.
  """
  stats = {}

  if history and history.history:
    train_hist = history.history
    # Gets final loss from training.
    stats['loss'] = train_hist['loss'][-1].item()

  if not callbacks:
    return stats

  # Look for the time history callback which was used during keras.fit
  for callback in callbacks:
    if isinstance(callback, keras_utils.TimeHistory):
      timestamp_log = callback.timestamp_log
      stats['step_timestamp_log'] = timestamp_log
      stats['train_finish_time'] = callback.train_finish_time
      if len(timestamp_log) > 1:
        stats['avg_exp_per_second'] = (
            callback.batch_size * callback.log_steps *
            (len(callback.timestamp_log)-1) /
            (timestamp_log[-1].timestamp - timestamp_log[0].timestamp))
  return stats