Small changes in video classification task.

PiperOrigin-RevId: 335548133

Small changes in video classification task.
PiperOrigin-RevId: 335548133
1407e274 · Yeqing Li · A. Unique TensorFlower · 75c6e3bc · 1407e274 · 1407e274
Commit 1407e274 authored Oct 05, 2020 by Yeqing Li Committed by A. Unique TensorFlower Oct 05, 2020
Showing with 11 additions and 1 deletion

official/vision/beta/dataloaders/video_input.py official/vision/beta/dataloaders/video_input.py +3 -0

official/vision/beta/tasks/video_classification.py official/vision/beta/tasks/video_classification.py +8 -1

No files found.
--- a/official/vision/beta/dataloaders/video_input.py
+++ b/official/vision/beta/dataloaders/video_input.py
@@ -192,6 +192,7 @@ class Parser(parser.Parser):
    self._num_classes = input_params.num_classes
    self._image_key = image_key
    self._label_key = label_key
+    self._dtype = tf.dtypes.as_dtype(input_params.dtype)
  def _parse_train_data(
      self, decoded_tensors: Dict[str, tf.Tensor]
@@ -208,6 +209,7 @@ class Parser(parser.Parser):
        num_test_clips=self._num_test_clips,
        min_resize=self._min_resize,
        crop_size=self._crop_size)
+    image = tf.cast(image, dtype=self._dtype)
    label = _process_label(label, self._one_hot_label, self._num_classes)
    return {'image': image}, label
@@ -226,6 +228,7 @@ class Parser(parser.Parser):
        num_test_clips=self._num_test_clips,
        min_resize=self._min_resize,
        crop_size=self._crop_size)
+    image = tf.cast(image, dtype=self._dtype)
    label = _process_label(label, self._one_hot_label, self._num_classes)
    return {'image': image}, label

--- a/official/vision/beta/tasks/video_classification.py
+++ b/official/vision/beta/tasks/video_classification.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 # ==============================================================================
 """Video classification task definition."""
+from absl import logging
 import tensorflow as tf
 from official.core import base_task
 from official.core import input_reader
@@ -30,7 +31,13 @@ class VideoClassificationTask(base_task.Task):
  def build_model(self):
    """Builds video classification model."""
-    input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, None, 3])
+    common_input_shape = [
+        d1 if d1 == d2 else None
+        for d1, d2 in zip(self.task_config.train_data.feature_shape,
+                          self.task_config.validation_data.feature_shape)
+    ]
+    input_specs = tf.keras.layers.InputSpec(shape=[None] + common_input_shape)
+    logging.info('Build model input %r', common_input_shape)
    l2_weight_decay = self.task_config.losses.l2_weight_decay
    # Divide weight decay by 2.0 to match the implementation of tf.nn.l2_loss.