Unverified Commit 4298c3a3 authored by Reed's avatar Reed Committed by GitHub
Browse files

Split --ml_perf into two flags. (#5615)

--ml_perf now just changes the model to make it MLPerf compliant. --output_ml_perf_compliance_logging adds the MLPerf compliance logs.
parent 2644707c
......@@ -555,7 +555,8 @@ def main(_):
if flags.FLAGS.seed is not None:
np.random.seed(flags.FLAGS.seed)
with mlperf_helper.LOGGER(enable=flags.FLAGS.ml_perf):
with mlperf_helper.LOGGER(
enable=flags.FLAGS.output_ml_perf_compliance_logging):
mlperf_helper.set_ncf_root(os.path.split(os.path.abspath(__file__))[0])
_generation_loop(
num_workers=flags.FLAGS.num_workers,
......@@ -623,6 +624,9 @@ def define_flags():
"specified, a seed will not be set.")
flags.DEFINE_boolean(name="ml_perf", default=None,
help="Match MLPerf. See ncf_main.py for details.")
flags.DEFINE_bool(name="output_ml_perf_compliance_logging", default=None,
help="Output the MLPerf compliance logging. See "
"ncf_main.py for details.")
flags.mark_flags_as_required(["data_dir", "cache_id"])
......
......@@ -461,6 +461,7 @@ def instantiate_pipeline(dataset, data_dir, batch_size, eval_batch_size,
"redirect_logs": use_subprocess,
"use_tf_logging": not use_subprocess,
"ml_perf": match_mlperf,
"output_ml_perf_compliance_logging": mlperf_helper.LOGGER.enabled,
}
if use_subprocess:
......
......@@ -50,6 +50,9 @@ from official.utils.misc import distribution_utils
from official.utils.misc import model_helpers
FLAGS = flags.FLAGS
def construct_estimator(num_gpus, model_dir, params, batch_size,
eval_batch_size):
"""Construct either an Estimator or TPUEstimator for NCF.
......@@ -118,7 +121,8 @@ def construct_estimator(num_gpus, model_dir, params, batch_size,
def main(_):
with logger.benchmark_context(FLAGS), mlperf_helper.LOGGER(FLAGS.ml_perf):
with logger.benchmark_context(FLAGS), \
mlperf_helper.LOGGER(FLAGS.output_ml_perf_compliance_logging):
mlperf_helper.set_ncf_root(os.path.split(os.path.abspath(__file__))[0])
run_ncf(FLAGS)
mlperf_helper.stitch_ncf()
......@@ -417,6 +421,18 @@ def define_ncf_flags():
"which performs better due to the fact the sorting algorithms are "
"not stable."))
flags.DEFINE_bool(
name="output_ml_perf_compliance_logging", default=False,
help=flags_core.help_wrap(
"If set, output the MLPerf compliance logging. This is only useful "
"if one is running the model for MLPerf. See "
"https://github.com/mlperf/policies/blob/master/training_rules.adoc"
"#submission-compliance-logs for details. This uses sudo and so may "
"ask for your password, as root access is needed to clear the system "
"caches, which is required for MLPerf compliance."
)
)
flags.DEFINE_integer(
name="seed", default=None, help=flags_core.help_wrap(
"This value will be used to seed both NumPy and TensorFlow."))
......@@ -460,5 +476,4 @@ def define_ncf_flags():
if __name__ == "__main__":
tf.logging.set_verbosity(tf.logging.INFO)
define_ncf_flags()
FLAGS = flags.FLAGS
absl_app.run(main)
......@@ -19,11 +19,14 @@ from __future__ import division
from __future__ import print_function
import math
import mock
import numpy as np
import tensorflow as tf
from absl.testing import flagsaver
from official.recommendation import constants as rconst
from official.recommendation import data_preprocessing
from official.recommendation import neumf_model
from official.recommendation import ncf_main
from official.recommendation import stat_utils
......@@ -33,6 +36,12 @@ NUM_TRAIN_NEG = 4
class NcfTest(tf.test.TestCase):
@classmethod
def setUpClass(cls): # pylint: disable=invalid-name
super(NcfTest, cls).setUpClass()
ncf_main.define_ncf_flags()
def setUp(self):
self.top_k_old = rconst.TOP_K
self.num_eval_negatives_old = rconst.NUM_EVAL_NEGATIVES
......@@ -224,6 +233,22 @@ class NcfTest(tf.test.TestCase):
self.assertAlmostEqual(ndcg, (1 + 2 * math.log(2) / math.log(3) +
math.log(2) / math.log(4)) / 4)
_BASE_END_TO_END_FLAGS = {
"batch_size": 1024,
"train_epochs": 1,
"use_synthetic_data": True
}
@flagsaver.flagsaver(**_BASE_END_TO_END_FLAGS)
@mock.patch.object(data_preprocessing, "SYNTHETIC_BATCHES_PER_EPOCH", 100)
def test_end_to_end(self):
ncf_main.main(None)
@flagsaver.flagsaver(ml_perf=True, **_BASE_END_TO_END_FLAGS)
@mock.patch.object(data_preprocessing, "SYNTHETIC_BATCHES_PER_EPOCH", 100)
def test_end_to_end_mlperf(self):
ncf_main.main(None)
if __name__ == "__main__":
tf.logging.set_verbosity(tf.logging.INFO)
......
......@@ -192,7 +192,8 @@ def stitch_ncf():
return
if LOGGER.log_file is None or not tf.gfile.Exists(LOGGER.log_file):
tf.logging.error("Could not find log file to stitch.")
tf.logging.warning("Could not find log file to stitch.")
return
log_lines = []
num_eval_users = None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment