NCF CTL Perf optimization to convert gradients from sparse to dense (#7102)

* borrowing a tf1.x optimization which converts gradients from sparse to dense for better perf * cleanup after code review

NCF CTL Perf optimization to convert gradients from sparse to dense (#7102)
* borrowing a tf1.x optimization which converts gradients from sparse to dense for better perf * cleanup after code review
44ff121d · nnigania · GitHub · 5afa9569 · 44ff121d · 44ff121d
Unverified Commit 44ff121d authored Jun 28, 2019 by nnigania Committed by GitHub Jun 28, 2019
Show whitespace changes
Inline Side-by-side

Showing with 5 additions and 4 deletions

official/recommendation/ncf_keras_main.py official/recommendation/ncf_keras_main.py +3 -2

official/recommendation/neumf_model.py official/recommendation/neumf_model.py +2 -2

No files found.
--- a/official/recommendation/ncf_keras_main.py
+++ b/official/recommendation/ncf_keras_main.py
@@ -339,8 +339,9 @@ def run_ncf(_):
          loss *= (1.0 / (batch_size*strategy.num_replicas_in_sync))

        grads = tape.gradient(loss, keras_model.trainable_variables)
-        optimizer.apply_gradients(list(zip(grads,
-                                           keras_model.trainable_variables)))
+        # Converting gradients to dense form helps in perf on GPU for NCF
+        grads = neumf_model.sparse_to_dense_grads(list(zip(grads, keras_model.trainable_variables)))
+        optimizer.apply_gradients(grads)
        return loss

      per_replica_losses = strategy.experimental_run(step_fn,

--- a/official/recommendation/neumf_model.py
+++ b/official/recommendation/neumf_model.py
@@ -45,7 +45,7 @@ from official.recommendation import stat_utils
 from official.utils.logs import mlperf_helper


-def _sparse_to_dense_grads(grads_and_vars):
+def sparse_to_dense_grads(grads_and_vars):
  """Convert sparse gradients to dense gradients.

  All sparse gradients, which are represented as instances of tf.IndexedSlices,
@@ -135,7 +135,7 @@ def neumf_model_fn(features, labels, mode, params):
    tvars = tf.compat.v1.trainable_variables()
    gradients = optimizer.compute_gradients(
        loss, tvars, colocate_gradients_with_ops=True)
-    gradients = _sparse_to_dense_grads(gradients)
+    gradients = sparse_to_dense_grads(gradients)
    minimize_op = optimizer.apply_gradients(
        gradients, global_step=global_step, name="train")
    update_ops = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS)