"src/include/threadwise_tensor_slice_copy.hpp" did not exist on "c9fa46af0bf70701e73a6d2cd9741759d179e5ee"
Commit f6d64296 authored by Nimit Nigania's avatar Nimit Nigania
Browse files

fp16 support

parent 52372782
...@@ -164,8 +164,10 @@ def define_ncf_flags(): ...@@ -164,8 +164,10 @@ def define_ncf_flags():
intra_op=False, intra_op=False,
synthetic_data=True, synthetic_data=True,
max_train_steps=False, max_train_steps=False,
dtype=False, dtype=True,
all_reduce_alg=False, all_reduce_alg=False,
loss_scale=True,
dynamic_loss_scale=True,
enable_xla=True, enable_xla=True,
force_v2_in_keras_compile=True force_v2_in_keras_compile=True
) )
......
...@@ -266,6 +266,16 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase): ...@@ -266,6 +266,16 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.train_epochs = 7 FLAGS.train_epochs = 7
self._run_and_report_benchmark_mlperf_like() self._run_and_report_benchmark_mlperf_like()
def benchmark_xla_1_gpu_ctl_fp16_mlperf_like(self):
"""1 GPU using CTL with XLA."""
self._setup()
FLAGS.keras_use_ctl = True
FLAGS.enable_xla = True
FLAGS.train_epochs = 7
FLAGS.dtype = 'fp16'
FLAGS.loss_scale = 8192
self._run_and_report_benchmark_mlperf_like()
def benchmark_8_gpu_mlperf_like(self): def benchmark_8_gpu_mlperf_like(self):
"""8 GPU using keras fit/compile.""" """8 GPU using keras fit/compile."""
self._setup() self._setup()
......
...@@ -42,6 +42,7 @@ from official.utils.logs import mlperf_helper ...@@ -42,6 +42,7 @@ from official.utils.logs import mlperf_helper
from official.utils.misc import distribution_utils from official.utils.misc import distribution_utils
from official.utils.misc import keras_utils from official.utils.misc import keras_utils
from official.utils.misc import model_helpers from official.utils.misc import model_helpers
from official.utils.flags import core as flags_core
FLAGS = flags.FLAGS FLAGS = flags.FLAGS
...@@ -277,6 +278,9 @@ def run_ncf(_): ...@@ -277,6 +278,9 @@ def run_ncf(_):
beta_1=params["beta1"], beta_1=params["beta1"],
beta_2=params["beta2"], beta_2=params["beta2"],
epsilon=params["epsilon"]) epsilon=params["epsilon"])
if FLAGS.dtype == "fp16":
optimizer = tf.compat.v1.train.experimental.enable_mixed_precision_graph_rewrite(
optimizer, loss_scale=flags_core.get_loss_scale(FLAGS, default_for_fp16="dynamic"))
if params["keras_use_ctl"]: if params["keras_use_ctl"]:
loss_object = tf.keras.losses.SparseCategoricalCrossentropy( loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
...@@ -296,8 +300,12 @@ def run_ncf(_): ...@@ -296,8 +300,12 @@ def run_ncf(_):
loss = loss_object(labels, softmax_logits, loss = loss_object(labels, softmax_logits,
sample_weight=features[rconst.VALID_POINT_MASK]) sample_weight=features[rconst.VALID_POINT_MASK])
loss *= (1.0 / (batch_size*strategy.num_replicas_in_sync)) loss *= (1.0 / (batch_size*strategy.num_replicas_in_sync))
if FLAGS.dtype == "fp16":
loss = optimizer.get_scaled_loss(loss)
grads = tape.gradient(loss, keras_model.trainable_variables) grads = tape.gradient(loss, keras_model.trainable_variables)
if FLAGS.dtype == "fp16":
grads = optimizer.get_unscaled_gradients(grads)
# Converting gradients to dense form helps in perf on GPU for NCF # Converting gradients to dense form helps in perf on GPU for NCF
grads = neumf_model.sparse_to_dense_grads( grads = neumf_model.sparse_to_dense_grads(
list(zip(grads, keras_model.trainable_variables))) list(zip(grads, keras_model.trainable_variables)))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment