Reduce iterations from 20 to 12 and add FP16 dynamic. (#7168)

* reduce iterations from 20 to 12. * add fp16 dynamic batch accuracy check. * fix existing lint issue.

Reduce iterations from 20 to 12 and add FP16 dynamic. (#7168)
* reduce iterations from 20 to 12. * add fp16 dynamic batch accuracy check. * fix existing lint issue.
cf1a276a · Toby Boyd · GitHub · cad067d8 · cf1a276a · cf1a276a
Unverified Commit cf1a276a authored Jul 08, 2019 by Toby Boyd Committed by GitHub Jul 08, 2019
Showing with 32 additions and 5 deletions

official/recommendation/ncf_keras_main.py official/recommendation/ncf_keras_main.py +2 -1

official/transformer/v2/transformer_benchmark.py official/transformer/v2/transformer_benchmark.py +30 -4

No files found.
--- a/official/recommendation/ncf_keras_main.py
+++ b/official/recommendation/ncf_keras_main.py
@@ -340,7 +340,8 @@ def run_ncf(_):
        grads = tape.gradient(loss, keras_model.trainable_variables)
        # Converting gradients to dense form helps in perf on GPU for NCF
-        grads = neumf_model.sparse_to_dense_grads(list(zip(grads, keras_model.trainable_variables)))
+        grads = neumf_model.sparse_to_dense_grads(
+            list(zip(grads, keras_model.trainable_variables)))
        optimizer.apply_gradients(grads)
        return loss

--- a/official/transformer/v2/transformer_benchmark.py
+++ b/official/transformer/v2/transformer_benchmark.py
@@ -249,7 +249,10 @@ class TransformerBigKerasAccuracy(TransformerBenchmark):
  def benchmark_8_gpu(self):
    """Benchmark 8 gpu.
-      Should converge to 28.4 BLEU (uncased). This has not be verified yet."
+    Over 6 runs with eval every 20K steps the average highest value was 28.195
+    (bleu uncased). 28.424 was the highest and 27.96 the lowest. The values are
+    the highest value seen during a run and occurred at a median of iteration 9.
+    Iterations are not epochs, an iteration is a number of steps between evals.
    """
    self._setup()
    FLAGS.num_gpus = 8
@@ -260,7 +263,7 @@ class TransformerBigKerasAccuracy(TransformerBenchmark):
    FLAGS['bleu_ref'].value = self.bleu_ref
    FLAGS.param_set = 'big'
    FLAGS.batch_size = 3072*8
-    FLAGS.train_steps = 400000
+    FLAGS.train_steps = 20000 * 12
    FLAGS.steps_between_evals = 20000
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu')
    self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
@@ -284,7 +287,7 @@ class TransformerBigKerasAccuracy(TransformerBenchmark):
    FLAGS.batch_size = 3072*8
    FLAGS.static_batch = True
    FLAGS.max_length = 64
-    FLAGS.train_steps = 400000
+    FLAGS.train_steps = 20000 * 12
    FLAGS.steps_between_evals = 20000
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_static_batch')
    self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
@@ -292,6 +295,29 @@ class TransformerBigKerasAccuracy(TransformerBenchmark):
                                   bleu_min=28,
                                   bleu_max=29)
+  def benchmark_8_gpu_fp16(self):
+    """Benchmark 8 gpu with dynamic batch and fp16.
+      Should converge to 28.4 BLEU (uncased). This has not be verified yet."
+    """
+    self._setup()
+    FLAGS.num_gpus = 8
+    FLAGS.dtype = 'fp16'
+    FLAGS.data_dir = self.train_data_dir
+    FLAGS.vocab_file = self.vocab_file
+    # Sets values directly to avoid validation check.
+    FLAGS['bleu_source'].value = self.bleu_source
+    FLAGS['bleu_ref'].value = self.bleu_ref
+    FLAGS.param_set = 'big'
+    FLAGS.batch_size = 3072*8
+    FLAGS.train_steps = 400000
+    FLAGS.steps_between_evals = 20000
+    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16')
+    self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
+                                   log_steps=FLAGS.log_steps,
+                                   bleu_min=28,
+                                   bleu_max=29)
  def benchmark_8_gpu_static_batch_fp16(self):
    """Benchmark 8 gpu with static batch and fp16.