Unverified Commit 4298c3a3 authored by Reed's avatar Reed Committed by GitHub
Browse files

Split --ml_perf into two flags. (#5615)

--ml_perf now just changes the model to make it MLPerf compliant. --output_ml_perf_compliance_logging adds the MLPerf compliance logs.
parent 2644707c
...@@ -555,7 +555,8 @@ def main(_): ...@@ -555,7 +555,8 @@ def main(_):
if flags.FLAGS.seed is not None: if flags.FLAGS.seed is not None:
np.random.seed(flags.FLAGS.seed) np.random.seed(flags.FLAGS.seed)
with mlperf_helper.LOGGER(enable=flags.FLAGS.ml_perf): with mlperf_helper.LOGGER(
enable=flags.FLAGS.output_ml_perf_compliance_logging):
mlperf_helper.set_ncf_root(os.path.split(os.path.abspath(__file__))[0]) mlperf_helper.set_ncf_root(os.path.split(os.path.abspath(__file__))[0])
_generation_loop( _generation_loop(
num_workers=flags.FLAGS.num_workers, num_workers=flags.FLAGS.num_workers,
...@@ -623,6 +624,9 @@ def define_flags(): ...@@ -623,6 +624,9 @@ def define_flags():
"specified, a seed will not be set.") "specified, a seed will not be set.")
flags.DEFINE_boolean(name="ml_perf", default=None, flags.DEFINE_boolean(name="ml_perf", default=None,
help="Match MLPerf. See ncf_main.py for details.") help="Match MLPerf. See ncf_main.py for details.")
flags.DEFINE_bool(name="output_ml_perf_compliance_logging", default=None,
help="Output the MLPerf compliance logging. See "
"ncf_main.py for details.")
flags.mark_flags_as_required(["data_dir", "cache_id"]) flags.mark_flags_as_required(["data_dir", "cache_id"])
......
...@@ -461,6 +461,7 @@ def instantiate_pipeline(dataset, data_dir, batch_size, eval_batch_size, ...@@ -461,6 +461,7 @@ def instantiate_pipeline(dataset, data_dir, batch_size, eval_batch_size,
"redirect_logs": use_subprocess, "redirect_logs": use_subprocess,
"use_tf_logging": not use_subprocess, "use_tf_logging": not use_subprocess,
"ml_perf": match_mlperf, "ml_perf": match_mlperf,
"output_ml_perf_compliance_logging": mlperf_helper.LOGGER.enabled,
} }
if use_subprocess: if use_subprocess:
......
...@@ -50,6 +50,9 @@ from official.utils.misc import distribution_utils ...@@ -50,6 +50,9 @@ from official.utils.misc import distribution_utils
from official.utils.misc import model_helpers from official.utils.misc import model_helpers
FLAGS = flags.FLAGS
def construct_estimator(num_gpus, model_dir, params, batch_size, def construct_estimator(num_gpus, model_dir, params, batch_size,
eval_batch_size): eval_batch_size):
"""Construct either an Estimator or TPUEstimator for NCF. """Construct either an Estimator or TPUEstimator for NCF.
...@@ -118,7 +121,8 @@ def construct_estimator(num_gpus, model_dir, params, batch_size, ...@@ -118,7 +121,8 @@ def construct_estimator(num_gpus, model_dir, params, batch_size,
def main(_): def main(_):
with logger.benchmark_context(FLAGS), mlperf_helper.LOGGER(FLAGS.ml_perf): with logger.benchmark_context(FLAGS), \
mlperf_helper.LOGGER(FLAGS.output_ml_perf_compliance_logging):
mlperf_helper.set_ncf_root(os.path.split(os.path.abspath(__file__))[0]) mlperf_helper.set_ncf_root(os.path.split(os.path.abspath(__file__))[0])
run_ncf(FLAGS) run_ncf(FLAGS)
mlperf_helper.stitch_ncf() mlperf_helper.stitch_ncf()
...@@ -417,6 +421,18 @@ def define_ncf_flags(): ...@@ -417,6 +421,18 @@ def define_ncf_flags():
"which performs better due to the fact the sorting algorithms are " "which performs better due to the fact the sorting algorithms are "
"not stable.")) "not stable."))
flags.DEFINE_bool(
name="output_ml_perf_compliance_logging", default=False,
help=flags_core.help_wrap(
"If set, output the MLPerf compliance logging. This is only useful "
"if one is running the model for MLPerf. See "
"https://github.com/mlperf/policies/blob/master/training_rules.adoc"
"#submission-compliance-logs for details. This uses sudo and so may "
"ask for your password, as root access is needed to clear the system "
"caches, which is required for MLPerf compliance."
)
)
flags.DEFINE_integer( flags.DEFINE_integer(
name="seed", default=None, help=flags_core.help_wrap( name="seed", default=None, help=flags_core.help_wrap(
"This value will be used to seed both NumPy and TensorFlow.")) "This value will be used to seed both NumPy and TensorFlow."))
...@@ -460,5 +476,4 @@ def define_ncf_flags(): ...@@ -460,5 +476,4 @@ def define_ncf_flags():
if __name__ == "__main__": if __name__ == "__main__":
tf.logging.set_verbosity(tf.logging.INFO) tf.logging.set_verbosity(tf.logging.INFO)
define_ncf_flags() define_ncf_flags()
FLAGS = flags.FLAGS
absl_app.run(main) absl_app.run(main)
...@@ -19,11 +19,14 @@ from __future__ import division ...@@ -19,11 +19,14 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import math import math
import mock
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
from absl.testing import flagsaver
from official.recommendation import constants as rconst from official.recommendation import constants as rconst
from official.recommendation import data_preprocessing
from official.recommendation import neumf_model from official.recommendation import neumf_model
from official.recommendation import ncf_main from official.recommendation import ncf_main
from official.recommendation import stat_utils from official.recommendation import stat_utils
...@@ -33,6 +36,12 @@ NUM_TRAIN_NEG = 4 ...@@ -33,6 +36,12 @@ NUM_TRAIN_NEG = 4
class NcfTest(tf.test.TestCase): class NcfTest(tf.test.TestCase):
@classmethod
def setUpClass(cls): # pylint: disable=invalid-name
super(NcfTest, cls).setUpClass()
ncf_main.define_ncf_flags()
def setUp(self): def setUp(self):
self.top_k_old = rconst.TOP_K self.top_k_old = rconst.TOP_K
self.num_eval_negatives_old = rconst.NUM_EVAL_NEGATIVES self.num_eval_negatives_old = rconst.NUM_EVAL_NEGATIVES
...@@ -224,6 +233,22 @@ class NcfTest(tf.test.TestCase): ...@@ -224,6 +233,22 @@ class NcfTest(tf.test.TestCase):
self.assertAlmostEqual(ndcg, (1 + 2 * math.log(2) / math.log(3) + self.assertAlmostEqual(ndcg, (1 + 2 * math.log(2) / math.log(3) +
math.log(2) / math.log(4)) / 4) math.log(2) / math.log(4)) / 4)
_BASE_END_TO_END_FLAGS = {
"batch_size": 1024,
"train_epochs": 1,
"use_synthetic_data": True
}
@flagsaver.flagsaver(**_BASE_END_TO_END_FLAGS)
@mock.patch.object(data_preprocessing, "SYNTHETIC_BATCHES_PER_EPOCH", 100)
def test_end_to_end(self):
ncf_main.main(None)
@flagsaver.flagsaver(ml_perf=True, **_BASE_END_TO_END_FLAGS)
@mock.patch.object(data_preprocessing, "SYNTHETIC_BATCHES_PER_EPOCH", 100)
def test_end_to_end_mlperf(self):
ncf_main.main(None)
if __name__ == "__main__": if __name__ == "__main__":
tf.logging.set_verbosity(tf.logging.INFO) tf.logging.set_verbosity(tf.logging.INFO)
......
...@@ -192,7 +192,8 @@ def stitch_ncf(): ...@@ -192,7 +192,8 @@ def stitch_ncf():
return return
if LOGGER.log_file is None or not tf.gfile.Exists(LOGGER.log_file): if LOGGER.log_file is None or not tf.gfile.Exists(LOGGER.log_file):
tf.logging.error("Could not find log file to stitch.") tf.logging.warning("Could not find log file to stitch.")
return
log_lines = [] log_lines = []
num_eval_users = None num_eval_users = None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment