Unverified Commit cf1a276a authored by Toby Boyd's avatar Toby Boyd Committed by GitHub
Browse files

Reduce iterations from 20 to 12 and add FP16 dynamic. (#7168)

* reduce iterations from 20 to 12.

* add fp16 dynamic batch accuracy check.

* fix existing lint issue.
parent cad067d8
...@@ -340,7 +340,8 @@ def run_ncf(_): ...@@ -340,7 +340,8 @@ def run_ncf(_):
grads = tape.gradient(loss, keras_model.trainable_variables) grads = tape.gradient(loss, keras_model.trainable_variables)
# Converting gradients to dense form helps in perf on GPU for NCF # Converting gradients to dense form helps in perf on GPU for NCF
grads = neumf_model.sparse_to_dense_grads(list(zip(grads, keras_model.trainable_variables))) grads = neumf_model.sparse_to_dense_grads(
list(zip(grads, keras_model.trainable_variables)))
optimizer.apply_gradients(grads) optimizer.apply_gradients(grads)
return loss return loss
......
...@@ -249,7 +249,10 @@ class TransformerBigKerasAccuracy(TransformerBenchmark): ...@@ -249,7 +249,10 @@ class TransformerBigKerasAccuracy(TransformerBenchmark):
def benchmark_8_gpu(self): def benchmark_8_gpu(self):
"""Benchmark 8 gpu. """Benchmark 8 gpu.
Should converge to 28.4 BLEU (uncased). This has not be verified yet." Over 6 runs with eval every 20K steps the average highest value was 28.195
(bleu uncased). 28.424 was the highest and 27.96 the lowest. The values are
the highest value seen during a run and occurred at a median of iteration 9.
Iterations are not epochs, an iteration is a number of steps between evals.
""" """
self._setup() self._setup()
FLAGS.num_gpus = 8 FLAGS.num_gpus = 8
...@@ -260,7 +263,7 @@ class TransformerBigKerasAccuracy(TransformerBenchmark): ...@@ -260,7 +263,7 @@ class TransformerBigKerasAccuracy(TransformerBenchmark):
FLAGS['bleu_ref'].value = self.bleu_ref FLAGS['bleu_ref'].value = self.bleu_ref
FLAGS.param_set = 'big' FLAGS.param_set = 'big'
FLAGS.batch_size = 3072*8 FLAGS.batch_size = 3072*8
FLAGS.train_steps = 400000 FLAGS.train_steps = 20000 * 12
FLAGS.steps_between_evals = 20000 FLAGS.steps_between_evals = 20000
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu') FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu')
self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size, self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
...@@ -284,7 +287,7 @@ class TransformerBigKerasAccuracy(TransformerBenchmark): ...@@ -284,7 +287,7 @@ class TransformerBigKerasAccuracy(TransformerBenchmark):
FLAGS.batch_size = 3072*8 FLAGS.batch_size = 3072*8
FLAGS.static_batch = True FLAGS.static_batch = True
FLAGS.max_length = 64 FLAGS.max_length = 64
FLAGS.train_steps = 400000 FLAGS.train_steps = 20000 * 12
FLAGS.steps_between_evals = 20000 FLAGS.steps_between_evals = 20000
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_static_batch') FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_static_batch')
self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size, self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
...@@ -292,6 +295,29 @@ class TransformerBigKerasAccuracy(TransformerBenchmark): ...@@ -292,6 +295,29 @@ class TransformerBigKerasAccuracy(TransformerBenchmark):
bleu_min=28, bleu_min=28,
bleu_max=29) bleu_max=29)
def benchmark_8_gpu_fp16(self):
"""Benchmark 8 gpu with dynamic batch and fp16.
Should converge to 28.4 BLEU (uncased). This has not be verified yet."
"""
self._setup()
FLAGS.num_gpus = 8
FLAGS.dtype = 'fp16'
FLAGS.data_dir = self.train_data_dir
FLAGS.vocab_file = self.vocab_file
# Sets values directly to avoid validation check.
FLAGS['bleu_source'].value = self.bleu_source
FLAGS['bleu_ref'].value = self.bleu_ref
FLAGS.param_set = 'big'
FLAGS.batch_size = 3072*8
FLAGS.train_steps = 400000
FLAGS.steps_between_evals = 20000
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16')
self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
log_steps=FLAGS.log_steps,
bleu_min=28,
bleu_max=29)
def benchmark_8_gpu_static_batch_fp16(self): def benchmark_8_gpu_static_batch_fp16(self):
"""Benchmark 8 gpu with static batch and fp16. """Benchmark 8 gpu with static batch and fp16.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment