"voicechat2/llama.cpp/.devops/full.Dockerfile" did not exist on "8939e76b9f33e66527aa707553a147a0242aace3"
resnet_imagenet_main.py 13.5 KB
Newer Older
1
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Runs a ResNet model on the ImageNet dataset."""

Hongkun Yu's avatar
Hongkun Yu committed
17
18
import os

Hongkun Yu's avatar
Hongkun Yu committed
19
# Import libraries
Hongkun Yu's avatar
Hongkun Yu committed
20
from absl import app
21
from absl import flags
22
from absl import logging
23
import tensorflow as tf
24
from official.common import distribute_utils
Fan Yang's avatar
Fan Yang committed
25
26
27
28
from official.legacy.image_classification import test_utils
from official.legacy.image_classification.resnet import common
from official.legacy.image_classification.resnet import imagenet_preprocessing
from official.legacy.image_classification.resnet import resnet_model
29
from official.modeling import performance
30
from official.utils.flags import core as flags_core
Toby Boyd's avatar
Toby Boyd committed
31
from official.utils.misc import keras_utils
32
from official.utils.misc import model_helpers
33
34


35
36
37
38
def _cluster_last_three_conv2d_layers(model):
  """Helper method to cluster last three conv2d layers."""
  import tensorflow_model_optimization as tfmot  # pylint: disable=g-import-not-at-top
  last_three_conv2d_layers = [
39
      layer for layer in model.layers
Ruomei Yan's avatar
Ruomei Yan committed
40
41
      if isinstance(layer, tf.keras.layers.Conv2D)
    ][-3:]
42
43

  cluster_weights = tfmot.clustering.keras.cluster_weights
44
  centroid_initialization = tfmot.clustering.keras.CentroidInitialization
45
46
47

  def cluster_fn(layer):
    if layer not in last_three_conv2d_layers:
Ruomei Yan's avatar
Ruomei Yan committed
48
      return layer
49

Ruomei Yan's avatar
Ruomei Yan committed
50
51
52
    if layer == last_three_conv2d_layers[0] or \
      layer == last_three_conv2d_layers[1]:
      clustered = cluster_weights(layer, number_of_clusters=256, \
53
54
          cluster_centroids_init=centroid_initialization.LINEAR)
      print('Clustered {} with 256 clusters'.format(layer.name))
55
    else:
Ruomei Yan's avatar
Ruomei Yan committed
56
      clustered = cluster_weights(layer, number_of_clusters=32, \
57
58
          cluster_centroids_init=centroid_initialization.LINEAR)
      print('Clustered {} with 32 clusters'.format(layer.name))
59
    return clustered
60

61
  return tf.keras.models.clone_model(model, clone_function=cluster_fn)
62
63


Shining Sun's avatar
Shining Sun committed
64
def run(flags_obj):
65
66
67
68
69
70
71
  """Run ResNet ImageNet training and eval loop using native Keras APIs.

  Args:
    flags_obj: An object containing parsed flag values.

  Raises:
    ValueError: If fp16 is passed as it is not currently supported.
72
    NotImplementedError: If some features are not currently supported.
73
74
75

  Returns:
    Dictionary of training and eval stats.
76
  """
Toby Boyd's avatar
Toby Boyd committed
77
  keras_utils.set_session_config(
78
      enable_xla=flags_obj.enable_xla)
79
80
  # Execute flag override logic for better model performance
  if flags_obj.tf_gpu_thread_mode:
81
82
83
84
85
    keras_utils.set_gpu_thread_mode_and_count(
        per_gpu_thread_count=flags_obj.per_gpu_thread_count,
        gpu_thread_mode=flags_obj.tf_gpu_thread_mode,
        num_gpus=flags_obj.num_gpus,
        datasets_num_private_threads=flags_obj.datasets_num_private_threads)
86
  common.set_cudnn_batchnorm_mode()
87

88
  dtype = flags_core.get_tf_dtype(flags_obj)
89
  performance.set_mixed_precision_policy(
90
      flags_core.get_tf_dtype(flags_obj))
91

92
93
  data_format = flags_obj.data_format
  if data_format is None:
94
95
    data_format = ('channels_first' if tf.config.list_physical_devices('GPU')
                   else 'channels_last')
96
  tf.keras.backend.set_image_data_format(data_format)
97

98
  # Configures cluster spec for distribution strategy.
99
100
  _ = distribute_utils.configure_cluster(flags_obj.worker_hosts,
                                         flags_obj.task_index)
101

102
  strategy = distribute_utils.get_distribution_strategy(
103
104
      distribution_strategy=flags_obj.distribution_strategy,
      num_gpus=flags_obj.num_gpus,
105
      all_reduce_alg=flags_obj.all_reduce_alg,
106
107
      num_packs=flags_obj.num_packs,
      tpu_address=flags_obj.tpu)
108

rxsang's avatar
rxsang committed
109
110
111
112
113
114
115
  if strategy:
    # flags_obj.enable_get_next_as_optional controls whether enabling
    # get_next_as_optional behavior in DistributedIterator. If true, last
    # partial batch can be supported.
    strategy.extended.experimental_enable_get_next_as_optional = (
        flags_obj.enable_get_next_as_optional
    )
116

117
  strategy_scope = distribute_utils.get_strategy_scope(strategy)
118

119
120
  # pylint: disable=protected-access
  if flags_obj.use_synthetic_data:
121
    input_fn = common.get_synth_input_fn(
122
123
124
125
        height=imagenet_preprocessing.DEFAULT_IMAGE_SIZE,
        width=imagenet_preprocessing.DEFAULT_IMAGE_SIZE,
        num_channels=imagenet_preprocessing.NUM_CHANNELS,
        num_classes=imagenet_preprocessing.NUM_CLASSES,
126
127
        dtype=dtype,
        drop_remainder=True)
128
  else:
129
    input_fn = imagenet_preprocessing.input_fn
130

131
132
133
134
  # When `enable_xla` is True, we always drop the remainder of the batches
  # in the dataset, as XLA-GPU doesn't support dynamic shapes.
  drop_remainder = flags_obj.enable_xla

135
136
137
138
139
140
  # Current resnet_model.resnet50 input format is always channel-last.
  # We use keras_application mobilenet model which input format is depends on
  # the keras beckend image data format.
  # This use_keras_image_data_format flags indicates whether image preprocessor
  # output format should be same as the keras backend image data format or just
  # channel-last format.
Ruomei Yan's avatar
Ruomei Yan committed
141
  use_keras_image_data_format = \
142
    (flags_obj.model == 'mobilenet' or flags_obj.model == 'mobilenet_pretrained')
Alan Chiao's avatar
Alan Chiao committed
143

144
145
146
147
  train_input_dataset = input_fn(
      is_training=True,
      data_dir=flags_obj.data_dir,
      batch_size=flags_obj.batch_size,
148
149
      parse_record_fn=imagenet_preprocessing.get_parse_record_fn(
          use_keras_image_data_format=use_keras_image_data_format),
Reed's avatar
Reed committed
150
      datasets_num_private_threads=flags_obj.datasets_num_private_threads,
151
      dtype=dtype,
152
153
      drop_remainder=drop_remainder,
      tf_data_experimental_slack=flags_obj.tf_data_experimental_slack,
154
      training_dataset_cache=flags_obj.training_dataset_cache,
155
  )
156

157
158
159
160
161
162
  eval_input_dataset = None
  if not flags_obj.skip_eval:
    eval_input_dataset = input_fn(
        is_training=False,
        data_dir=flags_obj.data_dir,
        batch_size=flags_obj.batch_size,
163
164
        parse_record_fn=imagenet_preprocessing.get_parse_record_fn(
            use_keras_image_data_format=use_keras_image_data_format),
165
166
        dtype=dtype,
        drop_remainder=drop_remainder)
167

168
169
170
171
172
173
174
  lr_schedule = common.PiecewiseConstantDecayWithWarmup(
      batch_size=flags_obj.batch_size,
      epoch_size=imagenet_preprocessing.NUM_IMAGES['train'],
      warmup_epochs=common.LR_SCHEDULE[0][1],
      boundaries=list(p[1] for p in common.LR_SCHEDULE[1:]),
      multipliers=list(p[0] for p in common.LR_SCHEDULE),
      compute_lr_on_cpu=True)
Ruomei Yan's avatar
Ruomei Yan committed
175
176
  steps_per_epoch = (
      imagenet_preprocessing.NUM_IMAGES['train'] // flags_obj.batch_size)
177

Shining Sun's avatar
Shining Sun committed
178
  with strategy_scope:
179
180
    if flags_obj.optimizer == 'resnet50_default':
      optimizer = common.get_optimizer(lr_schedule)
Alan Chiao's avatar
Alan Chiao committed
181
    elif flags_obj.optimizer == 'mobilenet_default' or flags_obj.optimizer == 'mobilenet_fine_tune':
182
      initial_learning_rate = \
Jaehong Kim's avatar
Jaehong Kim committed
183
          flags_obj.initial_learning_rate_per_sample * flags_obj.batch_size
Ruomei Yan's avatar
Ruomei Yan committed
184
185
      if flags_obj.optimizer == 'mobilenet_fine_tune':
        initial_learning_rate = 1e-5
186
187
188
      optimizer = tf.keras.optimizers.SGD(
          learning_rate=tf.keras.optimizers.schedules.ExponentialDecay(
              initial_learning_rate,
Jaehong Kim's avatar
Jaehong Kim committed
189
190
              decay_steps=steps_per_epoch * flags_obj.num_epochs_per_decay,
              decay_rate=flags_obj.lr_decay_factor,
191
192
              staircase=True),
          momentum=0.9)
193
194
195
196
    optimizer = performance.configure_optimizer(
        optimizer,
        use_float16=flags_core.get_tf_dtype(flags_obj) == tf.float16,
        loss_scale=flags_core.get_loss_scale(flags_obj, default_for_fp16=128),)
197

Hongkun Yu's avatar
Hongkun Yu committed
198
    # TODO(hongkuny): Remove trivial model usage and move it to benchmark.
Haoyu Zhang's avatar
Haoyu Zhang committed
199
    if flags_obj.use_trivial_model:
Allen Wang's avatar
Allen Wang committed
200
      model = test_utils.trivial_model(imagenet_preprocessing.NUM_CLASSES)
201
    elif flags_obj.model == 'resnet50_v1.5':
202
      model = resnet_model.resnet50(
203
          num_classes=imagenet_preprocessing.NUM_CLASSES)
Alan Chiao's avatar
Alan Chiao committed
204
    elif flags_obj.model == 'mobilenet' or flags_obj.model == 'mobilenet_pretrained':
205
206
      # TODO(kimjaehong): Remove layers attribute when minimum TF version
      # support 2.0 layers by default.
Ruomei Yan's avatar
Ruomei Yan committed
207
208
209
210
211
212
      if flags_obj.model == 'mobilenet_pretrained':
        classes_labels = 1000
        initial_weights = 'imagenet'
      else:
        classes_labels = imagenet_preprocessing.NUM_CLASSES
        initial_weights = None
213
      model = tf.keras.applications.mobilenet.MobileNet(
Ruomei Yan's avatar
Ruomei Yan committed
214
215
          weights=initial_weights,
          classes=classes_labels,
216
217
          layers=tf.keras.layers)

218
219
220
221
    if flags_obj.pretrained_filepath:
      model.load_weights(flags_obj.pretrained_filepath)

    if flags_obj.pruning_method == 'polynomial_decay':
222
      import tensorflow_model_optimization as tfmot  # pylint: disable=g-import-not-at-top
223
224
225
226
227
228
229
230
231
232
233
234
235
236
      if dtype != tf.float32:
        raise NotImplementedError(
            'Pruning is currently only supported on dtype=tf.float32.')
      pruning_params = {
          'pruning_schedule':
              tfmot.sparsity.keras.PolynomialDecay(
                  initial_sparsity=flags_obj.pruning_initial_sparsity,
                  final_sparsity=flags_obj.pruning_final_sparsity,
                  begin_step=flags_obj.pruning_begin_step,
                  end_step=flags_obj.pruning_end_step,
                  frequency=flags_obj.pruning_frequency),
      }
      model = tfmot.sparsity.keras.prune_low_magnitude(model, **pruning_params)
    elif flags_obj.pruning_method:
237
238
239
      raise NotImplementedError('Only polynomial_decay is currently supported.')

    if flags_obj.clustering_method == 'selective_clustering':
240
      import tensorflow_model_optimization as tfmot  # pylint: disable=g-import-not-at-top
241
      if dtype != tf.float32:
242
243
        raise NotImplementedError(
            'Clustering is currently only supported on dtype=tf.float32.')
244
      model = _cluster_last_three_conv2d_layers(model)
245
    elif flags_obj.clustering_method:
246
      raise NotImplementedError(
247
          'Only selective_clustering is implemented.')
248

Ruomei Yan's avatar
Ruomei Yan committed
249
250
251
252
253
254
    model.compile(
        loss='sparse_categorical_crossentropy',
        optimizer=optimizer,
        metrics=(['sparse_categorical_accuracy']
                 if flags_obj.report_accuracy_metrics else None),
        run_eagerly=flags_obj.run_eagerly)
Shining Sun's avatar
Shining Sun committed
255

Zongwei Zhou's avatar
Zongwei Zhou committed
256
257
  train_epochs = flags_obj.train_epochs

258
259
260
261
  callbacks = common.get_callbacks(
      pruning_method=flags_obj.pruning_method,
      enable_checkpoint_and_export=flags_obj.enable_checkpoint_and_export,
      model_dir=flags_obj.model_dir)
Shining Sun's avatar
Shining Sun committed
262

263
  # If mutliple epochs, ignore the train_steps flag.
264
  if train_epochs <= 1 and flags_obj.train_steps:
Zongwei Zhou's avatar
Zongwei Zhou committed
265
    steps_per_epoch = min(flags_obj.train_steps, steps_per_epoch)
Shining Sun's avatar
Shining Sun committed
266
267
    train_epochs = 1

Ruomei Yan's avatar
Ruomei Yan committed
268
269
  num_eval_steps = (
      imagenet_preprocessing.NUM_IMAGES['validation'] // flags_obj.batch_size)
Shining Sun's avatar
Shining Sun committed
270
271
272

  validation_data = eval_input_dataset
  if flags_obj.skip_eval:
273
274
275
    # Only build the training graph. This reduces memory usage introduced by
    # control flow ops in layers that have different implementations for
    # training and inference (e.g., batch norm).
276
277
278
279
    if flags_obj.set_learning_phase_to_train:
      # TODO(haoyuzhang): Understand slowdown of setting learning phase when
      # not using distribution strategy.
      tf.keras.backend.set_learning_phase(1)
Shining Sun's avatar
Shining Sun committed
280
281
282
    num_eval_steps = None
    validation_data = None

283
284
  if not strategy and flags_obj.explicit_gpu_placement:
    # TODO(b/135607227): Add device scope automatically in Keras training loop
285
    # when not using distribution strategy.
286
287
288
    no_dist_strat_device = tf.device('/device:GPU:0')
    no_dist_strat_device.__enter__()

289
290
  history = model.fit(train_input_dataset,
                      epochs=train_epochs,
Zongwei Zhou's avatar
Zongwei Zhou committed
291
                      steps_per_epoch=steps_per_epoch,
292
                      callbacks=callbacks,
293
294
                      validation_steps=num_eval_steps,
                      validation_data=validation_data,
295
                      validation_freq=flags_obj.epochs_between_evals,
296
                      verbose=2)
Jaehong Kim's avatar
Jaehong Kim committed
297

Hongkun Yu's avatar
Hongkun Yu committed
298
299
300
301
302
303
  eval_output = None
  if not flags_obj.skip_eval:
    eval_output = model.evaluate(eval_input_dataset,
                                 steps=num_eval_steps,
                                 verbose=2)

304
305
  if flags_obj.pruning_method:
    model = tfmot.sparsity.keras.strip_pruning(model)
306
307

  if flags_obj.clustering_method:
308
    model = tfmot.clustering.keras.strip_clustering(model)
309

310
311
312
313
314
315
316
317
  if flags_obj.enable_checkpoint_and_export:
    if dtype == tf.bfloat16:
      logging.warning('Keras model.save does not support bfloat16 dtype.')
    else:
      # Keras model.save assumes a float32 input designature.
      export_path = os.path.join(flags_obj.model_dir, 'saved_model')
      model.save(export_path, include_optimizer=False)

318
319
320
  if not strategy and flags_obj.explicit_gpu_placement:
    no_dist_strat_device.__exit__()

321
  stats = common.build_stats(history, eval_output, callbacks)
322
  return stats
Shining Sun's avatar
bug fix  
Shining Sun committed
323

Shining Sun's avatar
Shining Sun committed
324

Toby Boyd's avatar
Toby Boyd committed
325
def define_imagenet_keras_flags():
Ruomei Yan's avatar
Ruomei Yan committed
326
327
328
329
  common.define_keras_flags(
      model=True,
      optimizer=True,
      pretrained_filepath=True)
330
  common.define_pruning_flags()
331
  common.define_clustering_flags()
332
  flags_core.set_defaults()
333
  flags.adopt_module_key_flags(common)
Toby Boyd's avatar
Toby Boyd committed
334
335


336
def main(_):
337
  model_helpers.apply_clean(flags.FLAGS)
338
  stats = run(flags.FLAGS)
339
  logging.info('Run stats:\n%s', stats)
340
341
342


if __name__ == '__main__':
343
  logging.set_verbosity(logging.INFO)
Toby Boyd's avatar
Toby Boyd committed
344
  define_imagenet_keras_flags()
Ruomei Yan's avatar
Ruomei Yan committed
345
  app.run(main)