Commit 294660bd authored by Toby Boyd's avatar Toby Boyd Committed by Shining Sun
Browse files

Add accuracy check. (#6694)

* Add accuracy check.

* Avoid double flag init, move data_dir to real data.

* Comment on lower accuracy target.
parent 0f6f656f
...@@ -20,7 +20,6 @@ from __future__ import print_function ...@@ -20,7 +20,6 @@ from __future__ import print_function
import os import os
import time import time
import json
from absl import flags from absl import flags
from absl.testing import flagsaver from absl.testing import flagsaver
...@@ -31,6 +30,8 @@ from official.recommendation import ncf_keras_main ...@@ -31,6 +30,8 @@ from official.recommendation import ncf_keras_main
from official.utils.flags import core from official.utils.flags import core
FLAGS = flags.FLAGS FLAGS = flags.FLAGS
NCF_DATA_DIR_NAME = 'movielens_data'
class KerasNCFBenchmarkBase(tf.test.Benchmark): class KerasNCFBenchmarkBase(tf.test.Benchmark):
"""Base class for NCF model benchmark.""" """Base class for NCF model benchmark."""
...@@ -38,21 +39,16 @@ class KerasNCFBenchmarkBase(tf.test.Benchmark): ...@@ -38,21 +39,16 @@ class KerasNCFBenchmarkBase(tf.test.Benchmark):
def __init__(self, def __init__(self,
output_dir=None, output_dir=None,
root_data_dir=None,
default_flags=None, default_flags=None,
**kwargs): **kwargs):
self.output_dir = output_dir self.output_dir = output_dir
self.default_flags = default_flags or {} self.default_flags = default_flags or {}
ncf_common.define_ncf_flags()
if root_data_dir:
FLAGS.data_dir = os.path.join(root_data_dir, 'movielens_data')
def _setup(self): def _setup(self):
"""Sets up and resets flags before each test.""" """Sets up and resets flags before each test."""
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.DEBUG) tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.DEBUG)
if KerasNCFBenchmarkBase.local_flags is None: if KerasNCFBenchmarkBase.local_flags is None:
ncf_common.define_ncf_flags()
# Loads flags to get defaults to then override. List cannot be empty. # Loads flags to get defaults to then override. List cannot be empty.
flags.FLAGS(['foo']) flags.FLAGS(['foo'])
core.set_defaults(**self.default_flags) core.set_defaults(**self.default_flags)
...@@ -66,11 +62,11 @@ class KerasNCFBenchmarkBase(tf.test.Benchmark): ...@@ -66,11 +62,11 @@ class KerasNCFBenchmarkBase(tf.test.Benchmark):
stats = ncf_keras_main.run_ncf(FLAGS) stats = ncf_keras_main.run_ncf(FLAGS)
wall_time_sec = time.time() - start_time_sec wall_time_sec = time.time() - start_time_sec
extras = self._extract_benchmark_report_extras(stats) metrics = self._extract_benchmark_report_extras(stats)
self.report_benchmark(iters=-1, wall_time=wall_time_sec, extras=extras) self.report_benchmark(iters=-1, wall_time=wall_time_sec, metrics=metrics)
def _extract_benchmark_report_extras(self, stats): def _extract_benchmark_report_extras(self, stats):
raise NotImplementedError("Not implemented") raise NotImplementedError('Not implemented')
class KerasNCFRealData(KerasNCFBenchmarkBase): class KerasNCFRealData(KerasNCFBenchmarkBase):
...@@ -78,6 +74,7 @@ class KerasNCFRealData(KerasNCFBenchmarkBase): ...@@ -78,6 +74,7 @@ class KerasNCFRealData(KerasNCFBenchmarkBase):
def __init__(self, def __init__(self,
output_dir=None, output_dir=None,
root_data_dir=None,
default_flags=None, default_flags=None,
**kwargs): **kwargs):
...@@ -96,6 +93,7 @@ class KerasNCFRealData(KerasNCFBenchmarkBase): ...@@ -96,6 +93,7 @@ class KerasNCFRealData(KerasNCFBenchmarkBase):
default_flags['hr_threshold'] = 0.635 default_flags['hr_threshold'] = 0.635
default_flags['ml_perf'] = True default_flags['ml_perf'] = True
default_flags['use_synthetic_data'] = False default_flags['use_synthetic_data'] = False
default_flags['data_dir'] = os.path.join(root_data_dir, NCF_DATA_DIR_NAME)
super(KerasNCFRealData, self).__init__( super(KerasNCFRealData, self).__init__(
output_dir=output_dir, output_dir=output_dir,
...@@ -103,11 +101,21 @@ class KerasNCFRealData(KerasNCFBenchmarkBase): ...@@ -103,11 +101,21 @@ class KerasNCFRealData(KerasNCFBenchmarkBase):
**kwargs) **kwargs)
def _extract_benchmark_report_extras(self, stats): def _extract_benchmark_report_extras(self, stats):
extras = {} metrics = []
extras['train_loss'] = stats['loss'] metrics.append({'name': 'exp_per_second',
extras['hr_at_10'] = stats['eval_hit_rate'] 'value': stats['avg_exp_per_second']})
extras['exp_per_second'] = stats['avg_exp_per_second']
return extras # Target is 0.625, but some runs are below that level. Until we have
# multi-run tests, we have to accept a lower target.
metrics.append({'name': 'hr_at_10',
'value': stats['eval_hit_rate'],
'min_value': 0.618,
'max_value': 0.635})
metrics.append({'name': 'train_loss',
'value': stats['loss']})
return metrics
def benchmark_1_gpu(self): def benchmark_1_gpu(self):
self._setup() self._setup()
...@@ -158,9 +166,10 @@ class KerasNCFSyntheticData(KerasNCFBenchmarkBase): ...@@ -158,9 +166,10 @@ class KerasNCFSyntheticData(KerasNCFBenchmarkBase):
**kwargs) **kwargs)
def _extract_benchmark_report_extras(self, stats): def _extract_benchmark_report_extras(self, stats):
extras = {} metrics = []
extras['exp_per_second'] = stats['avg_exp_per_second'] metrics.append({'name': 'exp_per_second',
return extras 'value': stats['avg_exp_per_second']})
return metrics
def benchmark_1_gpu(self): def benchmark_1_gpu(self):
self._setup() self._setup()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment