Unverified Commit 695265c8 authored by Toby Boyd's avatar Toby Boyd Committed by GitHub
Browse files

Transformer xla and FP16 benchmarks (#7061)

* Add XLA benchmark tests FP32 only for now.

* Add FP16 XLA tests.

* FP16 only tests.
parent 0cc52905
...@@ -346,6 +346,37 @@ class TransformerKerasBenchmark(TransformerBenchmark): ...@@ -346,6 +346,37 @@ class TransformerKerasBenchmark(TransformerBenchmark):
self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size, self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
log_steps=FLAGS.log_steps) log_steps=FLAGS.log_steps)
def benchmark_1_gpu_fp16(self):
"""Benchmark 1 gpu FP16."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.batch_size = self.batch_per_gpu
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_fp16')
FLAGS.dtype = 'fp16'
self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
log_steps=FLAGS.log_steps)
def benchmark_xla_1_gpu(self):
"""Benchmark 1 gpu w/xla."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.batch_size = self.batch_per_gpu
FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu')
FLAGS.enable_xla = True
self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
log_steps=FLAGS.log_steps)
def benchmark_xla_1_gpu_fp16(self):
"""Benchmark 1 gpu w/xla and FP16."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.batch_size = self.batch_per_gpu
FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_fp16')
FLAGS.enable_xla = True
FLAGS.dtype = 'fp16'
self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
log_steps=FLAGS.log_steps)
def benchmark_1_gpu_static_batch(self): def benchmark_1_gpu_static_batch(self):
"""Benchmark 1 gpu with static batch.""" """Benchmark 1 gpu with static batch."""
self._setup() self._setup()
...@@ -357,6 +388,45 @@ class TransformerKerasBenchmark(TransformerBenchmark): ...@@ -357,6 +388,45 @@ class TransformerKerasBenchmark(TransformerBenchmark):
self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size, self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
log_steps=FLAGS.log_steps) log_steps=FLAGS.log_steps)
def benchmark_xla_1_gpu_static_batch(self):
"""Benchmark 1 gpu with static batch w/xla."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.batch_size = self.batch_per_gpu
FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_static_batch')
FLAGS.static_batch = True
FLAGS.max_length = 64
FLAGS.enable_xla = True
self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
log_steps=FLAGS.log_steps)
def benchmark_1_gpu_static_batch_fp16(self):
"""Benchmark 1 gpu with static batch FP16."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.batch_size = self.batch_per_gpu
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_static_batch_fp16')
FLAGS.static_batch = True
FLAGS.max_length = 64
FLAGS.dtype = 'fp16'
self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
log_steps=FLAGS.log_steps)
def benchmark_xla_1_gpu_static_batch_fp16(self):
"""Benchmark 1 gpu with static batch w/xla and FP16."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.batch_size = self.batch_per_gpu
FLAGS.model_dir = self._get_model_dir(
'benchmark_xla_1_gpu_static_batch_fp16')
FLAGS.static_batch = True
FLAGS.max_length = 64
FLAGS.enable_xla = True
FLAGS.dtype = 'fp16'
self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
log_steps=FLAGS.log_steps)
def benchmark_8_gpu(self): def benchmark_8_gpu(self):
"""Benchmark 8 gpu.""" """Benchmark 8 gpu."""
self._setup() self._setup()
...@@ -366,6 +436,37 @@ class TransformerKerasBenchmark(TransformerBenchmark): ...@@ -366,6 +436,37 @@ class TransformerKerasBenchmark(TransformerBenchmark):
self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size, self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
log_steps=FLAGS.log_steps) log_steps=FLAGS.log_steps)
def benchmark_8_gpu_fp16(self):
"""Benchmark 8 gpu FP16."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.dtype = 'fp16'
FLAGS.batch_size = self.batch_per_gpu * 8
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16')
self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
log_steps=FLAGS.log_steps)
def benchmark_xla_8_gpu(self):
"""Benchmark 8 gpu w/xla."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.enable_xla = True
FLAGS.batch_size = self.batch_per_gpu * 8
FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu')
self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
log_steps=FLAGS.log_steps)
def benchmark_xla_8_gpu_fp16(self):
"""Benchmark 8 gpu w/xla and FP16."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.enable_xla = True
FLAGS.dtype = 'fp16'
FLAGS.batch_size = self.batch_per_gpu * 8
FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_fp16')
self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
log_steps=FLAGS.log_steps)
def benchmark_8_gpu_static_batch(self): def benchmark_8_gpu_static_batch(self):
"""Benchmark 8 gpu with static batch.""" """Benchmark 8 gpu with static batch."""
self._setup() self._setup()
...@@ -377,6 +478,45 @@ class TransformerKerasBenchmark(TransformerBenchmark): ...@@ -377,6 +478,45 @@ class TransformerKerasBenchmark(TransformerBenchmark):
self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size, self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
log_steps=FLAGS.log_steps) log_steps=FLAGS.log_steps)
def benchmark_8_gpu_static_batch_fp16(self):
"""Benchmark 8 gpu with static batch FP16."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.dtype = 'fp16'
FLAGS.batch_size = self.batch_per_gpu * 8
FLAGS.model_dir = self._get_model_dir(
'benchmark_8_gpu_static_batch_fp16')
FLAGS.static_batch = True
FLAGS.max_length = 64
self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
log_steps=FLAGS.log_steps)
def benchmark_xla_8_gpu_static_batch(self):
"""Benchmark 8 gpu with static batch w/xla."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.enable_xla = True
FLAGS.batch_size = self.batch_per_gpu * 8
FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_static_batch')
FLAGS.static_batch = True
FLAGS.max_length = 64
self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
log_steps=FLAGS.log_steps)
def benchmark_xla_8_gpu_static_batch_fp16(self):
"""Benchmark 8 gpu with static batch w/xla and FP16."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.enable_xla = True
FLAGS.dtype = 'fp16'
FLAGS.batch_size = self.batch_per_gpu * 8
FLAGS.model_dir = self._get_model_dir(
'benchmark_xla_8_gpu_static_batch_fp16')
FLAGS.static_batch = True
FLAGS.max_length = 64
self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
log_steps=FLAGS.log_steps)
class TransformerBaseKerasBenchmarkReal(TransformerKerasBenchmark): class TransformerBaseKerasBenchmarkReal(TransformerKerasBenchmark):
"""Transformer based version real data benchmark tests.""" """Transformer based version real data benchmark tests."""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment