Fix the two types of failed tests:

1. the dataset_num_private_threads flags 2. clustering does not support fp16 or mixed precision training

Fix the two types of failed tests:
1. the dataset_num_private_threads flags 2. clustering does not support fp16 or mixed precision training
7dfef01d · Ruomei Yan · 55018881 · 7dfef01d · 7dfef01d · 7dfef01d
Commit 7dfef01d authored Jun 08, 2020 by Ruomei Yan
3 changed files
--- a/official/benchmark/keras_imagenet_benchmark.py
+++ b/official/benchmark/keras_imagenet_benchmark.py
@@ -929,7 +929,7 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'mirrored'
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_tweaked')
-    FLAGS.batch_size = 128 * 8  # 8 GPUs
+    FLAGS.batch_size = 128 * 8
    FLAGS.datasets_num_private_threads = 14
    self._run_and_report_benchmark()

@@ -996,7 +996,7 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16_tweaked')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
-    FLAGS.dataset_num_private_threads = 40
+    FLAGS.datasets_num_private_threads = 40
    self._run_and_report_benchmark()

  def benchmark_8_gpu_fp16_dynamic_tweaked(self):
@@ -1012,7 +1012,7 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
    FLAGS.batch_size = 256 * 8  # 8 GPUs
    FLAGS.loss_scale = 'dynamic'
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
-    FLAGS.dataset_num_private_threads = 40
+    FLAGS.datasets_num_private_threads = 40
    self._run_and_report_benchmark()

  def benchmark_xla_8_gpu_fp16(self):
@@ -1870,6 +1870,8 @@ class KerasClusteringBenchmarkRealBase(Resnet50KerasBenchmarkBase):
        'skip_eval': True,
        'report_accuracy_metrics': False,
        'data_dir': os.path.join(root_data_dir, 'imagenet'),
+        'clustering_method': 'selective_clustering',
+        'number_of_clusters': 256,
        'train_steps': 110,
        'log_steps': 10,
    })

--- a/official/benchmark/models/resnet_imagenet_main.py
+++ b/official/benchmark/models/resnet_imagenet_main.py
@@ -243,15 +243,12 @@ def run(flags_obj):
          classes=imagenet_preprocessing.NUM_CLASSES,
          layers=tf.keras.layers)
    elif flags_obj.model == 'mobilenet_pretrained':
-      shape = (224, 224, 3)
      model = tf.keras.applications.mobilenet.MobileNet(
-          input_shape=shape,
          alpha=1.0,
          depth_multiplier=1,
          dropout=1e-7,
          include_top=True,
          weights='imagenet',
-          input_tensor=tf.keras.layers.Input(shape),
          pooling=None,
          classes=1000,
          layers=tf.keras.layers)
@@ -277,7 +274,7 @@ def run(flags_obj):
      raise NotImplementedError('Only polynomial_decay is currently supported.')

    if flags_obj.clustering_method == 'selective_clustering':
-      if dtype != tf.float32:
+      if dtype != tf.float32 or flags_obj.fp16_implementation == 'graph_rewrite':
        raise NotImplementedError(
            'Clustering is currently only supported on dtype=tf.float32.')
      clustering_params1 = {

--- a/official/vision/image_classification/augment.py
+++ b/official/vision/image_classification/augment.py
@@ -28,7 +28,7 @@ import math
 import tensorflow as tf
 from typing import Any, Dict, List, Optional, Text, Tuple

-from tensorflow.python.keras.layers.preprocessing import image_preprocessing as image_ops
+from tensorflow.python.keras.layers import image_preprocessing as image_ops

 # This signifies the max integer that the controller RNN could predict for the
 # augmentation scheme.