Commit 7d53f9da authored by Vishnu Banna's avatar Vishnu Banna
Browse files

address comments

parent a0904210
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
"""Detection Data parser and processing for YOLO.""" """Detection Data parser and processing for YOLO."""
import tensorflow as tf import tensorflow as tf
import numpy as np
from official.vision.beta.projects.yolo.ops import preprocessing_ops from official.vision.beta.projects.yolo.ops import preprocessing_ops
from official.vision.beta.projects.yolo.ops import anchor from official.vision.beta.projects.yolo.ops import anchor
from official.vision.beta.ops import preprocess_ops from official.vision.beta.ops import preprocess_ops
...@@ -56,7 +57,8 @@ class Parser(parser.Parser): ...@@ -56,7 +57,8 @@ class Parser(parser.Parser):
Args: Args:
output_size: `Tensor` or `List` for [height, width] of output image. The output_size: `Tensor` or `List` for [height, width] of output image. The
output_size should be divided by the largest feature stride 2^max_level. output_size should be divided by the largest feature stride 2^max_level.
anchors: `Dict[List[Union[int, float]]]` values for each anchor box. anchors: `Dict[List[Union[int, float]]]` of anchor boxes to be bes used
in each level.
expanded_strides: `Dict[int]` for how much the model scales down the expanded_strides: `Dict[int]` for how much the model scales down the
images at the largest level. For example, level 3 down samples the image images at the largest level. For example, level 3 down samples the image
by a factor of 16, in the expanded strides dictionary, we will pass by a factor of 16, in the expanded strides dictionary, we will pass
...@@ -115,7 +117,7 @@ class Parser(parser.Parser): ...@@ -115,7 +117,7 @@ class Parser(parser.Parser):
from {"float32", "float16", "bfloat16"}. from {"float32", "float16", "bfloat16"}.
seed: `int` the seed for random number generation. seed: `int` the seed for random number generation.
""" """
for key in anchors.keys(): for key in anchors:
# Assert that the width and height is viable # Assert that the width and height is viable
assert output_size[1] % expanded_strides[str(key)] == 0 assert output_size[1] % expanded_strides[str(key)] == 0
assert output_size[0] % expanded_strides[str(key)] == 0 assert output_size[0] % expanded_strides[str(key)] == 0
...@@ -371,5 +373,3 @@ class Parser(parser.Parser): ...@@ -371,5 +373,3 @@ class Parser(parser.Parser):
groundtruths, self._max_num_instances) groundtruths, self._max_num_instances)
labels['groundtruths'] = groundtruths labels['groundtruths'] = groundtruths
return image, labels return image, labels
...@@ -558,6 +558,7 @@ def _anchor_free_scale_boxes(encoded_boxes, ...@@ -558,6 +558,7 @@ def _anchor_free_scale_boxes(encoded_boxes,
pred_box = scale_down(scaled_box, (scaler * stride)) pred_box = scale_down(scaled_box, (scaler * stride))
return (scaler, scaled_box, pred_box) return (scaler, scaled_box, pred_box)
def get_predicted_box(width, def get_predicted_box(width,
height, height,
encoded_boxes, encoded_boxes,
......
...@@ -105,11 +105,11 @@ class Mosaic: ...@@ -105,11 +105,11 @@ class Mosaic:
"""Generate a random center to use for slicing and patching the images.""" """Generate a random center to use for slicing and patching the images."""
if self._mosaic_crop_mode == 'crop': if self._mosaic_crop_mode == 'crop':
min_offset = self._mosaic_center min_offset = self._mosaic_center
cut_x = preprocessing_ops.rand_uniform_strong( cut_x = preprocessing_ops.random_uniform_strong(
self._output_size[1] * min_offset, self._output_size[1] * min_offset,
self._output_size[1] * (1 - min_offset), self._output_size[1] * (1 - min_offset),
seed=self._seed) seed=self._seed)
cut_y = preprocessing_ops.rand_uniform_strong( cut_y = preprocessing_ops.random_uniform_strong(
self._output_size[0] * min_offset, self._output_size[0] * min_offset,
self._output_size[0] * (1 - min_offset), self._output_size[0] * (1 - min_offset),
seed=self._seed) seed=self._seed)
...@@ -190,10 +190,10 @@ class Mosaic: ...@@ -190,10 +190,10 @@ class Mosaic:
# shift the center of the image by applying a translation to the whole # shift the center of the image by applying a translation to the whole
# image # image
ch = tf.math.round( ch = tf.math.round(
preprocessing_ops.rand_uniform_strong( preprocessing_ops.random_uniform_strong(
-center[0], center[0], seed=self._seed)) -center[0], center[0], seed=self._seed))
cw = tf.math.round( cw = tf.math.round(
preprocessing_ops.rand_uniform_strong( preprocessing_ops.random_uniform_strong(
-center[1], center[1], seed=self._seed)) -center[1], center[1], seed=self._seed))
# clip the boxes to those with in the image # clip the boxes to those with in the image
...@@ -302,7 +302,7 @@ class Mosaic: ...@@ -302,7 +302,7 @@ class Mosaic:
if self._mosaic_frequency >= 1.0: if self._mosaic_frequency >= 1.0:
domo = 1.0 domo = 1.0
else: else:
domo = preprocessing_ops.rand_uniform_strong( domo = preprocessing_ops.random_uniform_strong(
0.0, 1.0, dtype=tf.float32, seed=self._seed) 0.0, 1.0, dtype=tf.float32, seed=self._seed)
noop = one.copy() noop = one.copy()
...@@ -324,14 +324,14 @@ class Mosaic: ...@@ -324,14 +324,14 @@ class Mosaic:
if self._mixup_frequency >= 1.0: if self._mixup_frequency >= 1.0:
domo = 1.0 domo = 1.0
else: else:
domo = preprocessing_ops.rand_uniform_strong( domo = preprocessing_ops.random_uniform_strong(
0.0, 1.0, dtype=tf.float32, seed=self._seed) 0.0, 1.0, dtype=tf.float32, seed=self._seed)
noop = one.copy() noop = one.copy()
if domo >= (1 - self._mixup_frequency): if domo >= (1 - self._mixup_frequency):
sample = one sample = one
otype = one["image"].dtype otype = one["image"].dtype
r = preprocessing_ops.rand_uniform_strong( r = preprocessing_ops.random_uniform_strong(
0.4, 0.6, tf.float32, seed=self._seed) 0.4, 0.6, tf.float32, seed=self._seed)
sample['image'] = ( sample['image'] = (
r * tf.cast(one["image"], tf.float32) + r * tf.cast(one["image"], tf.float32) +
......
...@@ -27,7 +27,7 @@ def set_random_seeds(seed=0): ...@@ -27,7 +27,7 @@ def set_random_seeds(seed=0):
tf.random.set_seed(seed) tf.random.set_seed(seed)
np.random.seed(seed) np.random.seed(seed)
def rand_uniform_strong(minval, maxval, dtype=tf.float32, seed=None, shape=[]): def random_uniform_strong(minval, maxval, dtype=tf.float32, seed=None, shape=[]):
"""A unified function for consistent random number generation. """A unified function for consistent random number generation.
Equivalent to tf.random.uniform, except that minval and maxval are flipped if Equivalent to tf.random.uniform, except that minval and maxval are flipped if
...@@ -52,7 +52,7 @@ def rand_uniform_strong(minval, maxval, dtype=tf.float32, seed=None, shape=[]): ...@@ -52,7 +52,7 @@ def rand_uniform_strong(minval, maxval, dtype=tf.float32, seed=None, shape=[]):
shape=shape, minval=minval, maxval=maxval, seed=seed, dtype=dtype) shape=shape, minval=minval, maxval=maxval, seed=seed, dtype=dtype)
def rand_scale(val, dtype=tf.float32, seed=None): def random_scale(val, dtype=tf.float32, seed=None):
"""Generates a random number for scaling a parameter by multiplication. """Generates a random number for scaling a parameter by multiplication.
Generates a random number for the scale. Half of the time, the value is Generates a random number for the scale. Half of the time, the value is
...@@ -68,8 +68,8 @@ def rand_scale(val, dtype=tf.float32, seed=None): ...@@ -68,8 +68,8 @@ def rand_scale(val, dtype=tf.float32, seed=None):
Returns: Returns:
The random scale. The random scale.
""" """
scale = rand_uniform_strong(1.0, val, dtype=dtype, seed=seed) scale = random_uniform_strong(1.0, val, dtype=dtype, seed=seed)
do_ret = rand_uniform_strong(minval=0, maxval=2, dtype=tf.int32, seed=seed) do_ret = random_uniform_strong(minval=0, maxval=2, dtype=tf.int32, seed=seed)
if (do_ret == 1): if (do_ret == 1):
return scale return scale
return 1.0 / scale return 1.0 / scale
...@@ -145,13 +145,13 @@ def get_image_shape(image): ...@@ -145,13 +145,13 @@ def get_image_shape(image):
def _augment_hsv_darknet(image, rh, rs, rv, seed=None): def _augment_hsv_darknet(image, rh, rs, rv, seed=None):
"""Randomize the hue, saturation, and brightness via the darknet method.""" """Randomize the hue, saturation, and brightness via the darknet method."""
if rh > 0.0: if rh > 0.0:
delta = rand_uniform_strong(-rh, rh, seed=seed) delta = random_uniform_strong(-rh, rh, seed=seed)
image = tf.image.adjust_hue(image, delta) image = tf.image.adjust_hue(image, delta)
if rs > 0.0: if rs > 0.0:
delta = rand_scale(rs, seed=seed) delta = random_scale(rs, seed=seed)
image = tf.image.adjust_saturation(image, delta) image = tf.image.adjust_saturation(image, delta)
if rv > 0.0: if rv > 0.0:
delta = rand_scale(rv, seed=seed) delta = random_scale(rv, seed=seed)
image *= delta image *= delta
# clip the values of the image between 0.0 and 1.0 # clip the values of the image between 0.0 and 1.0
...@@ -166,7 +166,7 @@ def _augment_hsv_torch(image, rh, rs, rv, seed=None): ...@@ -166,7 +166,7 @@ def _augment_hsv_torch(image, rh, rs, rv, seed=None):
image = tf.image.rgb_to_hsv(image) image = tf.image.rgb_to_hsv(image)
gen_range = tf.cast([rh, rs, rv], image.dtype) gen_range = tf.cast([rh, rs, rv], image.dtype)
scale = tf.cast([180, 255, 255], image.dtype) scale = tf.cast([180, 255, 255], image.dtype)
r = rand_uniform_strong( r = random_uniform_strong(
-1, 1, shape=[3], dtype=image.dtype, seed=seed) * gen_range + 1 -1, 1, shape=[3], dtype=image.dtype, seed=seed) * gen_range + 1
image = tf.math.floor(tf.cast(image, scale.dtype) * scale) image = tf.math.floor(tf.cast(image, scale.dtype) * scale)
...@@ -374,13 +374,13 @@ def resize_and_jitter_image(image, ...@@ -374,13 +374,13 @@ def resize_and_jitter_image(image,
# location of the corner points. # location of the corner points.
jitter_width = original_width * jitter jitter_width = original_width * jitter
jitter_height = original_height * jitter jitter_height = original_height * jitter
pleft = rand_uniform_strong( pleft = random_uniform_strong(
-jitter_width, jitter_width, jitter_width.dtype, seed=seed) -jitter_width, jitter_width, jitter_width.dtype, seed=seed)
pright = rand_uniform_strong( pright = random_uniform_strong(
-jitter_width, jitter_width, jitter_width.dtype, seed=seed) -jitter_width, jitter_width, jitter_width.dtype, seed=seed)
ptop = rand_uniform_strong( ptop = random_uniform_strong(
-jitter_height, jitter_height, jitter_height.dtype, seed=seed) -jitter_height, jitter_height, jitter_height.dtype, seed=seed)
pbottom = rand_uniform_strong( pbottom = random_uniform_strong(
-jitter_height, jitter_height, jitter_height.dtype, seed=seed) -jitter_height, jitter_height, jitter_height.dtype, seed=seed)
# Letter box the image. # Letter box the image.
...@@ -530,7 +530,7 @@ def _build_transform(image, ...@@ -530,7 +530,7 @@ def _build_transform(image,
# Compute a random rotation to apply. # Compute a random rotation to apply.
rotation = tf.eye(3, dtype=tf.float32) rotation = tf.eye(3, dtype=tf.float32)
a = deg_to_rad(rand_uniform_strong(-degrees, degrees, seed=seed)) a = deg_to_rad(random_uniform_strong(-degrees, degrees, seed=seed))
cos = tf.math.cos(a) cos = tf.math.cos(a)
sin = tf.math.sin(a) sin = tf.math.sin(a)
rotation = tf.tensor_scatter_nd_update(rotation, rotation = tf.tensor_scatter_nd_update(rotation,
...@@ -542,8 +542,8 @@ def _build_transform(image, ...@@ -542,8 +542,8 @@ def _build_transform(image,
# Compute a random prespective change to apply. # Compute a random prespective change to apply.
prespective_warp = tf.eye(3) prespective_warp = tf.eye(3)
Px = rand_uniform_strong(-perspective, perspective, seed=seed) Px = random_uniform_strong(-perspective, perspective, seed=seed)
Py = rand_uniform_strong(-perspective, perspective, seed=seed) Py = random_uniform_strong(-perspective, perspective, seed=seed)
prespective_warp = tf.tensor_scatter_nd_update(prespective_warp, prespective_warp = tf.tensor_scatter_nd_update(prespective_warp,
[[2, 0], [2, 1]], [Px, Py]) [[2, 0], [2, 1]], [Px, Py])
prespective_warp_boxes = tf.tensor_scatter_nd_update(prespective_warp, prespective_warp_boxes = tf.tensor_scatter_nd_update(prespective_warp,
...@@ -552,7 +552,7 @@ def _build_transform(image, ...@@ -552,7 +552,7 @@ def _build_transform(image,
# Compute a random scaling to apply. # Compute a random scaling to apply.
scale = tf.eye(3, dtype=tf.float32) scale = tf.eye(3, dtype=tf.float32)
s = rand_uniform_strong(scale_min, scale_max, seed=seed) s = random_uniform_strong(scale_min, scale_max, seed=seed)
scale = tf.tensor_scatter_nd_update(scale, [[0, 0], [1, 1]], [1 / s, 1 / s]) scale = tf.tensor_scatter_nd_update(scale, [[0, 0], [1, 1]], [1 / s, 1 / s])
scale_boxes = tf.tensor_scatter_nd_update(scale, [[0, 0], [1, 1]], [s, s]) scale_boxes = tf.tensor_scatter_nd_update(scale, [[0, 0], [1, 1]], [s, s])
...@@ -562,14 +562,14 @@ def _build_transform(image, ...@@ -562,14 +562,14 @@ def _build_transform(image,
# The image is contained within the image and arbitrarily translated to # The image is contained within the image and arbitrarily translated to
# locations with in the image. # locations with in the image.
center = center_boxes = tf.eye(3, dtype=tf.float32) center = center_boxes = tf.eye(3, dtype=tf.float32)
Tx = rand_uniform_strong(-1, 0, seed=seed) * (cw / s - width) Tx = random_uniform_strong(-1, 0, seed=seed) * (cw / s - width)
Ty = rand_uniform_strong(-1, 0, seed=seed) * (ch / s - height) Ty = random_uniform_strong(-1, 0, seed=seed) * (ch / s - height)
else: else:
# The image can be translated outside of the output resolution window # The image can be translated outside of the output resolution window
# but the image is translated relative to the output resolution not the # but the image is translated relative to the output resolution not the
# input image resolution. # input image resolution.
Tx = rand_uniform_strong(0.5 - translate, 0.5 + translate, seed=seed) Tx = random_uniform_strong(0.5 - translate, 0.5 + translate, seed=seed)
Ty = rand_uniform_strong(0.5 - translate, 0.5 + translate, seed=seed) Ty = random_uniform_strong(0.5 - translate, 0.5 + translate, seed=seed)
# Center and Scale the image such that the window of translation is # Center and Scale the image such that the window of translation is
# contained to the output resolution. # contained to the output resolution.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment