Unverified Commit 829190e6 authored by Toby Boyd's avatar Toby Boyd Committed by GitHub
Browse files

Lower MLPerf hr@10 target (#7285)

parent 296d0d3f
......@@ -57,16 +57,25 @@ class NCFKerasBenchmarkBase(tf.test.Benchmark):
else:
flagsaver.restore_flag_values(NCFKerasBenchmarkBase.local_flags)
def _run_and_report_benchmark(self):
def _run_and_report_benchmark(self, hr_at_10_min=0, hr_at_10_max=0):
start_time_sec = time.time()
stats = ncf_keras_main.run_ncf(FLAGS)
wall_time_sec = time.time() - start_time_sec
metrics = self._extract_benchmark_report_extras(stats)
self.report_benchmark(iters=-1, wall_time=wall_time_sec, metrics=metrics)
metrics = []
metrics.append({'name': 'exp_per_second',
'value': stats['avg_exp_per_second']})
def _extract_benchmark_report_extras(self, stats):
raise NotImplementedError('Not implemented')
if hr_at_10_min > 0:
metrics.append({'name': 'hr_at_10',
'value': stats['eval_hit_rate'],
'min_value': hr_at_10_min,
'max_value': hr_at_10_max})
metrics.append({'name': 'train_loss',
'value': stats['loss']})
self.report_benchmark(iters=-1, wall_time=wall_time_sec, metrics=metrics)
class NCFKerasAccuracy(NCFKerasBenchmarkBase):
......@@ -100,22 +109,27 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
default_flags=default_flags,
**kwargs)
def _extract_benchmark_report_extras(self, stats):
metrics = []
metrics.append({'name': 'exp_per_second',
'value': stats['avg_exp_per_second']})
def _run_and_report_benchmark_mlperf_like(self):
"""Run test and report results.
# Target is 0.635, but some runs are below that level. Until we have
# multi-run tests, we have to accept a lower target.
metrics.append({'name': 'hr_at_10',
'value': stats['eval_hit_rate'],
'min_value': 0.630,
'max_value': 0.640})
Note: MLPerf like tests are not tuned to hit a specific hr@10 value, but
we want it recorded.
"""
super(NCFKerasAccuracy, self)._run_and_report_benchmark(hr_at_10_min=0.61)
metrics.append({'name': 'train_loss',
'value': stats['loss']})
def _run_and_report_benchmark(self, hr_at_10_min=0.630, hr_at_10_max=0.640):
"""Run test and report results.
return metrics
Note: Target is 0.635, but some runs are below that level. Until we have
multi-run tests, we have to accept a lower target.
Args:
hr_at_10_min: Minimum acceptable hr@10 value.
hr_at_10_max: Maximum acceptable hr@10 value.
"""
super(NCFKerasAccuracy, self)._run_and_report_benchmark(
top_1_min=hr_at_10_min,
top_1_max=hr_at_10_max)
def benchmark_1_gpu_early_stop(self):
self._setup()
......@@ -202,7 +216,7 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
"""1 GPU using keras fit/compile."""
self._setup()
FLAGS.train_epochs = 7
self._run_and_report_benchmark()
self._run_and_report_benchmark_mlperf_like()
def benchmark_1_gpu_no_dist_strat_force_v2_mlperf_like(self):
"""1 GPU using compile/fit without dist_strat."""
......@@ -217,29 +231,28 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
self._setup()
FLAGS.train_epochs = 7
FLAGS.distribution_strategy = 'off'
self._run_and_report_benchmark()
self._run_and_report_benchmark_mlperf_like()
def benchmark_1_gpu_no_dist_strat_run_eagerly_mlperf_like(self):
"""1 GPU using compile/fit without dist_strat and force run eager."""
self._setup()
FLAGS.train_epochs = 7
FLAGS.distribution_strategy = 'off'
FLAGS.run_eagerly = True
self._run_and_report_benchmark()
self._run_and_report_benchmark_mlperf_like()
def benchmark_xla_1_gpu_mlperf_like(self):
"""1 GPU using compile/fit with XLA."""
self._setup()
FLAGS.train_epochs = 7
FLAGS.enable_xla = True
self._run_and_report_benchmark()
self._run_and_report_benchmark_mlperf_like()
def benchmark_1_gpu_ctl_mlperf_like(self):
"""1 GPU using CTL."""
self._setup()
FLAGS.keras_use_ctl = True
FLAGS.train_epochs = 7
self._run_and_report_benchmark()
self._run_and_report_benchmark_mlperf_like()
def benchmark_xla_1_gpu_ctl_mlperf_like(self):
"""1 GPU using CTL with XLA."""
......@@ -247,7 +260,7 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.keras_use_ctl = True
FLAGS.enable_xla = True
FLAGS.train_epochs = 7
self._run_and_report_benchmark()
self._run_and_report_benchmark_mlperf_like()
def benchmark_8_gpu_mlperf_like(self):
"""8 GPU using keras fit/compile."""
......@@ -259,7 +272,7 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.beta1 = 0.25
FLAGS.beta2 = 0.5
FLAGS.epsilon = 1e-8
self._run_and_report_benchmark()
self._run_and_report_benchmark_mlperf_like()
def benchmark_xla_8_gpu_mlperf_like(self):
"""8 GPU using keras fit/compile with XLA."""
......@@ -272,7 +285,7 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.beta1 = 0.25
FLAGS.beta2 = 0.5
FLAGS.epsilon = 1e-8
self._run_and_report_benchmark()
self._run_and_report_benchmark_mlperf_like()
def benchmark_8_gpu_ctl_mlperf_like(self):
"""8 GPU using CTL."""
......@@ -285,7 +298,7 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.beta1 = 0.25
FLAGS.beta2 = 0.5
FLAGS.epsilon = 1e-8
self._run_and_report_benchmark()
self._run_and_report_benchmark_mlperf_like()
def benchmark_xla_8_gpu_ctl_mlperf_like(self):
"""8 GPU using CTL with XLA."""
......@@ -299,7 +312,7 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.beta1 = 0.25
FLAGS.beta2 = 0.5
FLAGS.epsilon = 1e-8
self._run_and_report_benchmark()
self._run_and_report_benchmark_mlperf_like()
class NCFKerasSynth(NCFKerasBenchmarkBase):
......@@ -329,12 +342,6 @@ class NCFKerasSynth(NCFKerasBenchmarkBase):
default_flags=default_flags,
**kwargs)
def _extract_benchmark_report_extras(self, stats):
metrics = []
metrics.append({'name': 'exp_per_second',
'value': stats['avg_exp_per_second']})
return metrics
def benchmark_1_gpu(self):
self._setup()
self._run_and_report_benchmark()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment