Small refactors of the video input interface and fixes bugs.

PiperOrigin-RevId: 361233109

Small refactors of the video input interface and fixes bugs.
PiperOrigin-RevId: 361233109
ebac9847 · Yeqing Li · A. Unique TensorFlower · 5c02f1ef · ebac9847
Commit ebac9847 authored Mar 05, 2021 by Yeqing Li Committed by A. Unique TensorFlower Mar 05, 2021
Show whitespace changes
Inline Side-by-side

Showing with 31 additions and 33 deletions

official/vision/beta/dataloaders/video_input.py official/vision/beta/dataloaders/video_input.py +31 -33

No files found.
--- a/official/vision/beta/dataloaders/video_input.py
+++ b/official/vision/beta/dataloaders/video_input.py
@@ -29,7 +29,7 @@ IMAGE_KEY = 'image/encoded'
 LABEL_KEY = 'clip/label/index'
-def _process_image(image: tf.Tensor,
+def process_image(image: tf.Tensor,
                  is_training: bool = True,
                  num_frames: int = 32,
                  stride: int = 1,
@@ -112,7 +112,7 @@ def _process_image(image: tf.Tensor,
  return preprocess_ops_3d.normalize_image(image, zero_centering_image)
-def _postprocess_image(image: tf.Tensor,
+def postprocess_image(image: tf.Tensor,
                      is_training: bool = True,
                      num_frames: int = 32,
                      num_test_clips: int = 1,
@@ -147,7 +147,7 @@ def _postprocess_image(image: tf.Tensor,
  return image
-def _process_label(label: tf.Tensor,
+def process_label(label: tf.Tensor,
                  one_hot_label: bool = True,
                  num_classes: Optional[int] = None) -> tf.Tensor:
  """Processes label Tensor."""
@@ -175,15 +175,13 @@ class Decoder(decoder.Decoder):
  """A tf.Example decoder for classification task."""
  def __init__(self, image_key: str = IMAGE_KEY, label_key: str = LABEL_KEY):
-    self._image_key = image_key
-    self._label_key = label_key
    self._context_description = {
        # One integer stored in context.
-        self._label_key: tf.io.VarLenFeature(tf.int64),
+        label_key: tf.io.VarLenFeature(tf.int64),
    }
    self._sequence_description = {
        # Each image is a string encoding JPEG.
-        self._image_key: tf.io.FixedLenSequenceFeature((), tf.string),
+        image_key: tf.io.FixedLenSequenceFeature((), tf.string),
    }
  def add_feature(self, feature_name: str,
@@ -245,7 +243,7 @@ class Parser(parser.Parser):
    """Parses data for training."""
    # Process image and label.
    image = decoded_tensors[self._image_key]
-    image = _process_image(
+    image = process_image(
        image=image,
        is_training=True,
        num_frames=self._num_frames,
@@ -261,7 +259,7 @@ class Parser(parser.Parser):
    features = {'image': image}
    label = decoded_tensors[self._label_key]
-    label = _process_label(label, self._one_hot_label, self._num_classes)
+    label = process_label(label, self._one_hot_label, self._num_classes)
    if self._output_audio:
      audio = decoded_tensors[self._audio_feature]
@@ -279,7 +277,7 @@ class Parser(parser.Parser):
  ) -> Tuple[Dict[str, tf.Tensor], tf.Tensor]:
    """Parses data for evaluation."""
    image = decoded_tensors[self._image_key]
-    image = _process_image(
+    image = process_image(
        image=image,
        is_training=False,
        num_frames=self._num_frames,
@@ -292,14 +290,14 @@ class Parser(parser.Parser):
    features = {'image': image}
    label = decoded_tensors[self._label_key]
-    label = _process_label(label, self._one_hot_label, self._num_classes)
+    label = process_label(label, self._one_hot_label, self._num_classes)
    if self._output_audio:
      audio = decoded_tensors[self._audio_feature]
      audio = tf.cast(audio, dtype=self._dtype)
      audio = preprocess_ops_3d.sample_sequence(
          audio, 20, random=False, stride=1)
-      audio = tf.ensure_shape(audio, [20, 2048])
+      audio = tf.ensure_shape(audio, self._audio_shape)
      features['audio'] = audio
    return features, label
@@ -318,9 +316,9 @@ class PostBatchProcessor(object):
  def __call__(self, features: Dict[str, tf.Tensor],
               label: tf.Tensor) -> Tuple[Dict[str, tf.Tensor], tf.Tensor]:
    """Parses a single tf.Example into image and label tensors."""
-    for key in ['image', 'audio']:
+    for key in ['image']:
      if key in features:
-        features[key] = _postprocess_image(
+        features[key] = postprocess_image(
            image=features[key],
            is_training=self._is_training,
            num_frames=self._num_frames,