"...resnet50_tensorflow.git" did not exist on "ea70bc22178169f42f8163d5542e770526c75c63"
Commit 85addcf3 authored by A. Unique TensorFlower's avatar A. Unique TensorFlower
Browse files

Merge pull request #7436 from nnigania:ncf_f16

PiperOrigin-RevId: 265165355
parents ee016fb0 5b0ef1fc
......@@ -154,8 +154,10 @@ def define_ncf_flags():
intra_op=False,
synthetic_data=True,
max_train_steps=False,
dtype=False,
dtype=True,
all_reduce_alg=False,
loss_scale=True,
dynamic_loss_scale=True,
enable_xla=True,
force_v2_in_keras_compile=True
)
......
......@@ -263,6 +263,15 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.train_epochs = 7
self._run_and_report_benchmark_mlperf_like()
def benchmark_1_gpu_ctl_fp16_mlperf_like(self):
"""1 GPU using CTL."""
self._setup()
FLAGS.keras_use_ctl = True
FLAGS.train_epochs = 7
FLAGS.dtype = 'fp16'
FLAGS.loss_scale = 8192
self._run_and_report_benchmark_mlperf_like()
def benchmark_1_gpu_ctl_run_eagerly_mlperf_like(self):
"""1 GPU using CTL with eager and distribution strategy."""
self._setup()
......@@ -279,6 +288,16 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.train_epochs = 7
self._run_and_report_benchmark_mlperf_like()
def benchmark_xla_1_gpu_ctl_fp16_mlperf_like(self):
"""1 GPU using CTL with XLA."""
self._setup()
FLAGS.keras_use_ctl = True
FLAGS.enable_xla = True
FLAGS.train_epochs = 7
FLAGS.dtype = 'fp16'
FLAGS.loss_scale = 8192
self._run_and_report_benchmark_mlperf_like()
def benchmark_8_gpu_mlperf_like(self):
"""8 GPU using keras fit/compile."""
self._setup()
......
......@@ -42,6 +42,7 @@ from official.utils.logs import mlperf_helper
from official.utils.misc import distribution_utils
from official.utils.misc import keras_utils
from official.utils.misc import model_helpers
from official.utils.flags import core as flags_core
from official.utils.misc import tpu_lib
FLAGS = flags.FLAGS
......@@ -267,6 +268,12 @@ def run_ncf(_):
beta_1=params["beta1"],
beta_2=params["beta2"],
epsilon=params["epsilon"])
if FLAGS.dtype == "fp16":
optimizer = \
tf.compat.v1.train.experimental.enable_mixed_precision_graph_rewrite(
optimizer,
loss_scale=flags_core.get_loss_scale(FLAGS,
default_for_fp16="dynamic"))
if params["keras_use_ctl"]:
train_loss, eval_results = run_ncf_custom_training(
......@@ -371,8 +378,12 @@ def run_ncf_custom_training(params,
softmax_logits,
sample_weight=features[rconst.VALID_POINT_MASK])
loss *= (1.0 / params["batch_size"])
if FLAGS.dtype == "fp16":
loss = optimizer.get_scaled_loss(loss)
grads = tape.gradient(loss, keras_model.trainable_variables)
if FLAGS.dtype == "fp16":
grads = optimizer.get_unscaled_gradients(grads)
# Converting gradients to dense form helps in perf on GPU for NCF
grads = neumf_model.sparse_to_dense_grads(
list(zip(grads, keras_model.trainable_variables)))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment