Internal change

PiperOrigin-RevId: 406396382

Internal change
PiperOrigin-RevId: 406396382
fa46f548 · Abdullah Rashwan · A. Unique TensorFlower · 19620a5d · fa46f548 · fa46f548
Commit fa46f548 authored Oct 29, 2021 by Abdullah Rashwan Committed by A. Unique TensorFlower Oct 29, 2021
3 changed files
--- a/official/vision/beta/configs/semantic_segmentation.py
+++ b/official/vision/beta/configs/semantic_segmentation.py
@@ -50,6 +50,7 @@ class DataConfig(cfg.DataConfig):
  aug_scale_min: float = 1.0
  aug_scale_max: float = 1.0
  aug_rand_hflip: bool = True
+  preserve_aspect_ratio: bool = True
  aug_policy: Optional[str] = None
  drop_remainder: bool = True
  file_type: str = 'tfrecord'

--- a/official/vision/beta/dataloaders/segmentation_input.py
+++ b/official/vision/beta/dataloaders/segmentation_input.py
@@ -48,6 +48,7 @@ class Parser(parser.Parser):
               groundtruth_padded_size=None,
               ignore_label=255,
               aug_rand_hflip=False,
+               preserve_aspect_ratio=True,
               aug_scale_min=1.0,
               aug_scale_max=1.0,
               dtype='float32'):
@@ -69,6 +70,8 @@ class Parser(parser.Parser):
        and evaluation.
      aug_rand_hflip: `bool`, if True, augment training with random
        horizontal flip.
+      preserve_aspect_ratio: `bool`, if True, the aspect ratio is preserved,
+        otherwise, the image is resized to output_size.
      aug_scale_min: `float`, the minimum scale applied to `output_size` for
        data augmentation during training.
      aug_scale_max: `float`, the maximum scale applied to `output_size` for
@@ -83,6 +86,7 @@ class Parser(parser.Parser):
                       'specified when resize_eval_groundtruth is False.')
    self._groundtruth_padded_size = groundtruth_padded_size
    self._ignore_label = ignore_label
+    self._preserve_aspect_ratio = preserve_aspect_ratio
    # Data augmentation.
    self._aug_rand_hflip = aug_rand_hflip
@@ -105,6 +109,13 @@ class Parser(parser.Parser):
    label = tf.cast(label, tf.float32)
    # Normalizes image with mean and std pixel values.
    image = preprocess_ops.normalize_image(image)
+    if not self._preserve_aspect_ratio:
+      label = tf.reshape(label, [data['image/height'], data['image/width'], 1])
+      image = tf.image.resize(image, self._output_size, method='bilinear')
+      label = tf.image.resize(label, self._output_size, method='nearest')
+      label = tf.reshape(label[:, :, -1], [1] + self._output_size)
    return image, label
  def _parse_train_data(self, data):

--- a/official/vision/beta/tasks/semantic_segmentation.py
+++ b/official/vision/beta/tasks/semantic_segmentation.py
@@ -100,6 +100,7 @@ class SemanticSegmentationTask(base_task.Task):
        aug_scale_min=params.aug_scale_min,
        aug_scale_max=params.aug_scale_max,
        aug_rand_hflip=params.aug_rand_hflip,
+        preserve_aspect_ratio=params.preserve_aspect_ratio,
        dtype=params.dtype)
    reader = input_reader_factory.input_reader_generator(