"include/vscode:/vscode.git/clone" did not exist on "d1db6a0c3ea190996bdae37adda191f746bfc34e"
Commit f6d64296 authored by Nimit Nigania's avatar Nimit Nigania
Browse files

fp16 support

parent 52372782
......@@ -164,8 +164,10 @@ def define_ncf_flags():
intra_op=False,
synthetic_data=True,
max_train_steps=False,
dtype=False,
dtype=True,
all_reduce_alg=False,
loss_scale=True,
dynamic_loss_scale=True,
enable_xla=True,
force_v2_in_keras_compile=True
)
......
......@@ -266,6 +266,16 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.train_epochs = 7
self._run_and_report_benchmark_mlperf_like()
def benchmark_xla_1_gpu_ctl_fp16_mlperf_like(self):
"""1 GPU using CTL with XLA."""
self._setup()
FLAGS.keras_use_ctl = True
FLAGS.enable_xla = True
FLAGS.train_epochs = 7
FLAGS.dtype = 'fp16'
FLAGS.loss_scale = 8192
self._run_and_report_benchmark_mlperf_like()
def benchmark_8_gpu_mlperf_like(self):
"""8 GPU using keras fit/compile."""
self._setup()
......
......@@ -42,6 +42,7 @@ from official.utils.logs import mlperf_helper
from official.utils.misc import distribution_utils
from official.utils.misc import keras_utils
from official.utils.misc import model_helpers
from official.utils.flags import core as flags_core
FLAGS = flags.FLAGS
......@@ -277,6 +278,9 @@ def run_ncf(_):
beta_1=params["beta1"],
beta_2=params["beta2"],
epsilon=params["epsilon"])
if FLAGS.dtype == "fp16":
optimizer = tf.compat.v1.train.experimental.enable_mixed_precision_graph_rewrite(
optimizer, loss_scale=flags_core.get_loss_scale(FLAGS, default_for_fp16="dynamic"))
if params["keras_use_ctl"]:
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
......@@ -296,8 +300,12 @@ def run_ncf(_):
loss = loss_object(labels, softmax_logits,
sample_weight=features[rconst.VALID_POINT_MASK])
loss *= (1.0 / (batch_size*strategy.num_replicas_in_sync))
if FLAGS.dtype == "fp16":
loss = optimizer.get_scaled_loss(loss)
grads = tape.gradient(loss, keras_model.trainable_variables)
if FLAGS.dtype == "fp16":
grads = optimizer.get_unscaled_gradients(grads)
# Converting gradients to dense form helps in perf on GPU for NCF
grads = neumf_model.sparse_to_dense_grads(
list(zip(grads, keras_model.trainable_variables)))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment