address comments

7d53f9da · Vishnu Banna · a0904210 · 7d53f9da · 7d53f9da · 7d53f9da
Commit 7d53f9da authored Sep 24, 2021 by Vishnu Banna
4 changed files
--- a/official/vision/beta/projects/yolo/dataloaders/yolo_input.py
+++ b/official/vision/beta/projects/yolo/dataloaders/yolo_input.py
@@ -14,6 +14,7 @@

 """Detection Data parser and processing for YOLO."""
 import tensorflow as tf
+import numpy as np
 from official.vision.beta.projects.yolo.ops import preprocessing_ops
 from official.vision.beta.projects.yolo.ops import anchor
 from official.vision.beta.ops import preprocess_ops
@@ -56,7 +57,8 @@ class Parser(parser.Parser):
    Args:
      output_size: `Tensor` or `List` for [height, width] of output image. The
        output_size should be divided by the largest feature stride 2^max_level.
-      anchors: `Dict[List[Union[int, float]]]` values for each anchor box.
+      anchors: `Dict[List[Union[int, float]]]` of anchor boxes to be bes used 
+        in each level. 
      expanded_strides: `Dict[int]` for how much the model scales down the 
        images at the largest level. For example, level 3 down samples the image 
        by a factor of 16, in the expanded strides dictionary, we will pass 
@@ -115,7 +117,7 @@ class Parser(parser.Parser):
        from {"float32", "float16", "bfloat16"}.
      seed: `int` the seed for random number generation. 
    """
-    for key in anchors.keys():
+    for key in anchors:
      # Assert that the width and height is viable
      assert output_size[1] % expanded_strides[str(key)] == 0
      assert output_size[0] % expanded_strides[str(key)] == 0
@@ -371,5 +373,3 @@ class Parser(parser.Parser):
          groundtruths, self._max_num_instances)
      labels['groundtruths'] = groundtruths
    return image, labels
-
-
--- a/official/vision/beta/projects/yolo/ops/loss_utils.py
+++ b/official/vision/beta/projects/yolo/ops/loss_utils.py
@@ -558,6 +558,7 @@ def _anchor_free_scale_boxes(encoded_boxes,
  pred_box = scale_down(scaled_box, (scaler * stride))
  return (scaler, scaled_box, pred_box)

+
 def get_predicted_box(width,
                      height,
                      encoded_boxes,

--- a/official/vision/beta/projects/yolo/ops/mosaic.py
+++ b/official/vision/beta/projects/yolo/ops/mosaic.py
@@ -105,11 +105,11 @@ class Mosaic:
    """Generate a random center to use for slicing and patching the images."""
    if self._mosaic_crop_mode == 'crop':
      min_offset = self._mosaic_center
-      cut_x = preprocessing_ops.rand_uniform_strong(
+      cut_x = preprocessing_ops.random_uniform_strong(
          self._output_size[1] * min_offset,
          self._output_size[1] * (1 - min_offset),
          seed=self._seed)
-      cut_y = preprocessing_ops.rand_uniform_strong(
+      cut_y = preprocessing_ops.random_uniform_strong(
          self._output_size[0] * min_offset,
          self._output_size[0] * (1 - min_offset),
          seed=self._seed)
@@ -190,10 +190,10 @@ class Mosaic:
      # shift the center of the image by applying a translation to the whole 
      # image
      ch = tf.math.round(
-          preprocessing_ops.rand_uniform_strong(
+          preprocessing_ops.random_uniform_strong(
              -center[0], center[0], seed=self._seed))
      cw = tf.math.round(
-          preprocessing_ops.rand_uniform_strong(
+          preprocessing_ops.random_uniform_strong(
              -center[1], center[1], seed=self._seed))

      # clip the boxes to those with in the image
@@ -302,7 +302,7 @@ class Mosaic:
    if self._mosaic_frequency >= 1.0:
      domo = 1.0
    else:
-      domo = preprocessing_ops.rand_uniform_strong(
+      domo = preprocessing_ops.random_uniform_strong(
          0.0, 1.0, dtype=tf.float32, seed=self._seed)
      noop = one.copy()

@@ -324,14 +324,14 @@ class Mosaic:
    if self._mixup_frequency >= 1.0:
      domo = 1.0
    else:
-      domo = preprocessing_ops.rand_uniform_strong(
+      domo = preprocessing_ops.random_uniform_strong(
          0.0, 1.0, dtype=tf.float32, seed=self._seed)
      noop = one.copy()

    if domo >= (1 - self._mixup_frequency):
      sample = one
      otype = one["image"].dtype
-      r = preprocessing_ops.rand_uniform_strong(
+      r = preprocessing_ops.random_uniform_strong(
          0.4, 0.6, tf.float32, seed=self._seed)
      sample['image'] = (
          r * tf.cast(one["image"], tf.float32) +

--- a/official/vision/beta/projects/yolo/ops/preprocessing_ops.py
+++ b/official/vision/beta/projects/yolo/ops/preprocessing_ops.py
@@ -27,7 +27,7 @@ def set_random_seeds(seed=0):
  tf.random.set_seed(seed)
  np.random.seed(seed)

-def rand_uniform_strong(minval, maxval, dtype=tf.float32, seed=None, shape=[]):
+def random_uniform_strong(minval, maxval, dtype=tf.float32, seed=None, shape=[]):
  """A unified function for consistent random number generation. 
  
  Equivalent to tf.random.uniform, except that minval and maxval are flipped if
@@ -52,7 +52,7 @@ def rand_uniform_strong(minval, maxval, dtype=tf.float32, seed=None, shape=[]):
      shape=shape, minval=minval, maxval=maxval, seed=seed, dtype=dtype)


-def rand_scale(val, dtype=tf.float32, seed=None):
+def random_scale(val, dtype=tf.float32, seed=None):
  """Generates a random number for scaling a parameter by multiplication.

  Generates a random number for the scale. Half of the time, the value is 
@@ -68,8 +68,8 @@ def rand_scale(val, dtype=tf.float32, seed=None):
  Returns:
    The random scale.
  """
-  scale = rand_uniform_strong(1.0, val, dtype=dtype, seed=seed)
-  do_ret = rand_uniform_strong(minval=0, maxval=2, dtype=tf.int32, seed=seed)
+  scale = random_uniform_strong(1.0, val, dtype=dtype, seed=seed)
+  do_ret = random_uniform_strong(minval=0, maxval=2, dtype=tf.int32, seed=seed)
  if (do_ret == 1):
    return scale
  return 1.0 / scale
@@ -145,13 +145,13 @@ def get_image_shape(image):
 def _augment_hsv_darknet(image, rh, rs, rv, seed=None):
  """Randomize the hue, saturation, and brightness via the darknet method."""
  if rh > 0.0:
-    delta = rand_uniform_strong(-rh, rh, seed=seed)
+    delta = random_uniform_strong(-rh, rh, seed=seed)
    image = tf.image.adjust_hue(image, delta)
  if rs > 0.0:
-    delta = rand_scale(rs, seed=seed)
+    delta = random_scale(rs, seed=seed)
    image = tf.image.adjust_saturation(image, delta)
  if rv > 0.0:
-    delta = rand_scale(rv, seed=seed)
+    delta = random_scale(rv, seed=seed)
    image *= delta

  # clip the values of the image between 0.0 and 1.0
@@ -166,7 +166,7 @@ def _augment_hsv_torch(image, rh, rs, rv, seed=None):
  image = tf.image.rgb_to_hsv(image)
  gen_range = tf.cast([rh, rs, rv], image.dtype)
  scale = tf.cast([180, 255, 255], image.dtype)
-  r = rand_uniform_strong(
+  r = random_uniform_strong(
      -1, 1, shape=[3], dtype=image.dtype, seed=seed) * gen_range + 1

  image = tf.math.floor(tf.cast(image, scale.dtype) * scale)
@@ -374,13 +374,13 @@ def resize_and_jitter_image(image,
    # location of the corner points.
    jitter_width = original_width * jitter
    jitter_height = original_height * jitter
-    pleft = rand_uniform_strong(
+    pleft = random_uniform_strong(
        -jitter_width, jitter_width, jitter_width.dtype, seed=seed)
-    pright = rand_uniform_strong(
+    pright = random_uniform_strong(
        -jitter_width, jitter_width, jitter_width.dtype, seed=seed)
-    ptop = rand_uniform_strong(
+    ptop = random_uniform_strong(
        -jitter_height, jitter_height, jitter_height.dtype, seed=seed)
-    pbottom = rand_uniform_strong(
+    pbottom = random_uniform_strong(
        -jitter_height, jitter_height, jitter_height.dtype, seed=seed)

    # Letter box the image.
@@ -530,7 +530,7 @@ def _build_transform(image,

  # Compute a random rotation to apply.
  rotation = tf.eye(3, dtype=tf.float32)
-  a = deg_to_rad(rand_uniform_strong(-degrees, degrees, seed=seed))
+  a = deg_to_rad(random_uniform_strong(-degrees, degrees, seed=seed))
  cos = tf.math.cos(a)
  sin = tf.math.sin(a)
  rotation = tf.tensor_scatter_nd_update(rotation,
@@ -542,8 +542,8 @@ def _build_transform(image,

  # Compute a random prespective change to apply.
  prespective_warp = tf.eye(3)
-  Px = rand_uniform_strong(-perspective, perspective, seed=seed)
-  Py = rand_uniform_strong(-perspective, perspective, seed=seed)
+  Px = random_uniform_strong(-perspective, perspective, seed=seed)
+  Py = random_uniform_strong(-perspective, perspective, seed=seed)
  prespective_warp = tf.tensor_scatter_nd_update(prespective_warp,
                                                 [[2, 0], [2, 1]], [Px, Py])
  prespective_warp_boxes = tf.tensor_scatter_nd_update(prespective_warp,
@@ -552,7 +552,7 @@ def _build_transform(image,

  # Compute a random scaling to apply.
  scale = tf.eye(3, dtype=tf.float32)
-  s = rand_uniform_strong(scale_min, scale_max, seed=seed)
+  s = random_uniform_strong(scale_min, scale_max, seed=seed)
  scale = tf.tensor_scatter_nd_update(scale, [[0, 0], [1, 1]], [1 / s, 1 / s])
  scale_boxes = tf.tensor_scatter_nd_update(scale, [[0, 0], [1, 1]], [s, s])

@@ -562,14 +562,14 @@ def _build_transform(image,
    # The image is contained within the image and arbitrarily translated to
    # locations with in the image.
    center = center_boxes = tf.eye(3, dtype=tf.float32)
-    Tx = rand_uniform_strong(-1, 0, seed=seed) * (cw / s - width)
-    Ty = rand_uniform_strong(-1, 0, seed=seed) * (ch / s - height)
+    Tx = random_uniform_strong(-1, 0, seed=seed) * (cw / s - width)
+    Ty = random_uniform_strong(-1, 0, seed=seed) * (ch / s - height)
  else:
    # The image can be translated outside of the output resolution window
    # but the image is translated relative to the output resolution not the
    # input image resolution.
-    Tx = rand_uniform_strong(0.5 - translate, 0.5 + translate, seed=seed)
-    Ty = rand_uniform_strong(0.5 - translate, 0.5 + translate, seed=seed)
+    Tx = random_uniform_strong(0.5 - translate, 0.5 + translate, seed=seed)
+    Ty = random_uniform_strong(0.5 - translate, 0.5 + translate, seed=seed)

    # Center and Scale the image such that the window of translation is
    # contained to the output resolution.