Use sparse_categorical_crossentropy for test as the loss object default does...

Use sparse_categorical_crossentropy for test as the loss object default does not work on tpustrategy + the single task trainer already handles the reduction. PiperOrigin-RevId: 367757677

Use sparse_categorical_crossentropy for test as the loss object default does...
Use sparse_categorical_crossentropy for test as the loss object default does not work on tpustrategy + the single task trainer already handles the reduction. PiperOrigin-RevId: 367757677
ffb6dbf3 · Hongkun Yu · A. Unique TensorFlower · e353e4e5 · ffb6dbf3 · ffb6dbf3
Commit ffb6dbf3 authored Apr 09, 2021 by Hongkun Yu Committed by A. Unique TensorFlower Apr 09, 2021
Showing with 7 additions and 2 deletions

orbit/examples/single_task/single_task_trainer.py orbit/examples/single_task/single_task_trainer.py +4 -0

orbit/examples/single_task/single_task_trainer_test.py orbit/examples/single_task/single_task_trainer_test.py +3 -2

No files found.
--- a/orbit/examples/single_task/single_task_trainer.py
+++ b/orbit/examples/single_task/single_task_trainer.py
@@ -107,6 +107,10 @@ class SingleTaskTrainer(orbit.StandardTrainer):
        # replicas. This ensures that we don't end up multiplying our loss by
        # the number of workers - gradients are summed, not averaged, across
        # replicas during the apply_gradients call.
+        # Note, the reduction of loss is explicitly handled and scaled by
+        # num_replicas_in_sync. Recommend to use a plain loss function.
+        # If you're using tf.keras.losses.Loss object, you may need to set
+        # reduction argument explicitly.
        loss = tf.reduce_mean(self.loss_fn(target, output))
        scaled_loss = loss / self.strategy.num_replicas_in_sync


--- a/orbit/examples/single_task/single_task_trainer_test.py
+++ b/orbit/examples/single_task/single_task_trainer_test.py
@@ -30,14 +30,15 @@ class SingleTaskTrainerTest(tf.test.TestCase):
        tf.keras.Input(shape=(4,), name='features'),
        tf.keras.layers.Dense(10, activation=tf.nn.relu),
        tf.keras.layers.Dense(10, activation=tf.nn.relu),
-        tf.keras.layers.Dense(3)
+        tf.keras.layers.Dense(3),
+        tf.keras.layers.Softmax(),
    ])

    trainer = single_task_trainer.SingleTaskTrainer(
        train_ds,
        label_key='label',
        model=model,
-        loss_fn=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
+        loss_fn=tf.keras.losses.sparse_categorical_crossentropy,
        optimizer=tf.keras.optimizers.SGD(learning_rate=0.01))

    controller = orbit.Controller(