Commit 7d53f9da authored by Vishnu Banna's avatar Vishnu Banna
Browse files

address comments

parent a0904210
......@@ -14,6 +14,7 @@
"""Detection Data parser and processing for YOLO."""
import tensorflow as tf
import numpy as np
from official.vision.beta.projects.yolo.ops import preprocessing_ops
from official.vision.beta.projects.yolo.ops import anchor
from official.vision.beta.ops import preprocess_ops
......@@ -56,7 +57,8 @@ class Parser(parser.Parser):
Args:
output_size: `Tensor` or `List` for [height, width] of output image. The
output_size should be divided by the largest feature stride 2^max_level.
anchors: `Dict[List[Union[int, float]]]` values for each anchor box.
anchors: `Dict[List[Union[int, float]]]` of anchor boxes to be bes used
in each level.
expanded_strides: `Dict[int]` for how much the model scales down the
images at the largest level. For example, level 3 down samples the image
by a factor of 16, in the expanded strides dictionary, we will pass
......@@ -115,7 +117,7 @@ class Parser(parser.Parser):
from {"float32", "float16", "bfloat16"}.
seed: `int` the seed for random number generation.
"""
for key in anchors.keys():
for key in anchors:
# Assert that the width and height is viable
assert output_size[1] % expanded_strides[str(key)] == 0
assert output_size[0] % expanded_strides[str(key)] == 0
......@@ -371,5 +373,3 @@ class Parser(parser.Parser):
groundtruths, self._max_num_instances)
labels['groundtruths'] = groundtruths
return image, labels
......@@ -558,6 +558,7 @@ def _anchor_free_scale_boxes(encoded_boxes,
pred_box = scale_down(scaled_box, (scaler * stride))
return (scaler, scaled_box, pred_box)
def get_predicted_box(width,
height,
encoded_boxes,
......
......@@ -105,11 +105,11 @@ class Mosaic:
"""Generate a random center to use for slicing and patching the images."""
if self._mosaic_crop_mode == 'crop':
min_offset = self._mosaic_center
cut_x = preprocessing_ops.rand_uniform_strong(
cut_x = preprocessing_ops.random_uniform_strong(
self._output_size[1] * min_offset,
self._output_size[1] * (1 - min_offset),
seed=self._seed)
cut_y = preprocessing_ops.rand_uniform_strong(
cut_y = preprocessing_ops.random_uniform_strong(
self._output_size[0] * min_offset,
self._output_size[0] * (1 - min_offset),
seed=self._seed)
......@@ -190,10 +190,10 @@ class Mosaic:
# shift the center of the image by applying a translation to the whole
# image
ch = tf.math.round(
preprocessing_ops.rand_uniform_strong(
preprocessing_ops.random_uniform_strong(
-center[0], center[0], seed=self._seed))
cw = tf.math.round(
preprocessing_ops.rand_uniform_strong(
preprocessing_ops.random_uniform_strong(
-center[1], center[1], seed=self._seed))
# clip the boxes to those with in the image
......@@ -302,7 +302,7 @@ class Mosaic:
if self._mosaic_frequency >= 1.0:
domo = 1.0
else:
domo = preprocessing_ops.rand_uniform_strong(
domo = preprocessing_ops.random_uniform_strong(
0.0, 1.0, dtype=tf.float32, seed=self._seed)
noop = one.copy()
......@@ -324,14 +324,14 @@ class Mosaic:
if self._mixup_frequency >= 1.0:
domo = 1.0
else:
domo = preprocessing_ops.rand_uniform_strong(
domo = preprocessing_ops.random_uniform_strong(
0.0, 1.0, dtype=tf.float32, seed=self._seed)
noop = one.copy()
if domo >= (1 - self._mixup_frequency):
sample = one
otype = one["image"].dtype
r = preprocessing_ops.rand_uniform_strong(
r = preprocessing_ops.random_uniform_strong(
0.4, 0.6, tf.float32, seed=self._seed)
sample['image'] = (
r * tf.cast(one["image"], tf.float32) +
......
......@@ -27,7 +27,7 @@ def set_random_seeds(seed=0):
tf.random.set_seed(seed)
np.random.seed(seed)
def rand_uniform_strong(minval, maxval, dtype=tf.float32, seed=None, shape=[]):
def random_uniform_strong(minval, maxval, dtype=tf.float32, seed=None, shape=[]):
"""A unified function for consistent random number generation.
Equivalent to tf.random.uniform, except that minval and maxval are flipped if
......@@ -52,7 +52,7 @@ def rand_uniform_strong(minval, maxval, dtype=tf.float32, seed=None, shape=[]):
shape=shape, minval=minval, maxval=maxval, seed=seed, dtype=dtype)
def rand_scale(val, dtype=tf.float32, seed=None):
def random_scale(val, dtype=tf.float32, seed=None):
"""Generates a random number for scaling a parameter by multiplication.
Generates a random number for the scale. Half of the time, the value is
......@@ -68,8 +68,8 @@ def rand_scale(val, dtype=tf.float32, seed=None):
Returns:
The random scale.
"""
scale = rand_uniform_strong(1.0, val, dtype=dtype, seed=seed)
do_ret = rand_uniform_strong(minval=0, maxval=2, dtype=tf.int32, seed=seed)
scale = random_uniform_strong(1.0, val, dtype=dtype, seed=seed)
do_ret = random_uniform_strong(minval=0, maxval=2, dtype=tf.int32, seed=seed)
if (do_ret == 1):
return scale
return 1.0 / scale
......@@ -145,13 +145,13 @@ def get_image_shape(image):
def _augment_hsv_darknet(image, rh, rs, rv, seed=None):
"""Randomize the hue, saturation, and brightness via the darknet method."""
if rh > 0.0:
delta = rand_uniform_strong(-rh, rh, seed=seed)
delta = random_uniform_strong(-rh, rh, seed=seed)
image = tf.image.adjust_hue(image, delta)
if rs > 0.0:
delta = rand_scale(rs, seed=seed)
delta = random_scale(rs, seed=seed)
image = tf.image.adjust_saturation(image, delta)
if rv > 0.0:
delta = rand_scale(rv, seed=seed)
delta = random_scale(rv, seed=seed)
image *= delta
# clip the values of the image between 0.0 and 1.0
......@@ -166,7 +166,7 @@ def _augment_hsv_torch(image, rh, rs, rv, seed=None):
image = tf.image.rgb_to_hsv(image)
gen_range = tf.cast([rh, rs, rv], image.dtype)
scale = tf.cast([180, 255, 255], image.dtype)
r = rand_uniform_strong(
r = random_uniform_strong(
-1, 1, shape=[3], dtype=image.dtype, seed=seed) * gen_range + 1
image = tf.math.floor(tf.cast(image, scale.dtype) * scale)
......@@ -374,13 +374,13 @@ def resize_and_jitter_image(image,
# location of the corner points.
jitter_width = original_width * jitter
jitter_height = original_height * jitter
pleft = rand_uniform_strong(
pleft = random_uniform_strong(
-jitter_width, jitter_width, jitter_width.dtype, seed=seed)
pright = rand_uniform_strong(
pright = random_uniform_strong(
-jitter_width, jitter_width, jitter_width.dtype, seed=seed)
ptop = rand_uniform_strong(
ptop = random_uniform_strong(
-jitter_height, jitter_height, jitter_height.dtype, seed=seed)
pbottom = rand_uniform_strong(
pbottom = random_uniform_strong(
-jitter_height, jitter_height, jitter_height.dtype, seed=seed)
# Letter box the image.
......@@ -530,7 +530,7 @@ def _build_transform(image,
# Compute a random rotation to apply.
rotation = tf.eye(3, dtype=tf.float32)
a = deg_to_rad(rand_uniform_strong(-degrees, degrees, seed=seed))
a = deg_to_rad(random_uniform_strong(-degrees, degrees, seed=seed))
cos = tf.math.cos(a)
sin = tf.math.sin(a)
rotation = tf.tensor_scatter_nd_update(rotation,
......@@ -542,8 +542,8 @@ def _build_transform(image,
# Compute a random prespective change to apply.
prespective_warp = tf.eye(3)
Px = rand_uniform_strong(-perspective, perspective, seed=seed)
Py = rand_uniform_strong(-perspective, perspective, seed=seed)
Px = random_uniform_strong(-perspective, perspective, seed=seed)
Py = random_uniform_strong(-perspective, perspective, seed=seed)
prespective_warp = tf.tensor_scatter_nd_update(prespective_warp,
[[2, 0], [2, 1]], [Px, Py])
prespective_warp_boxes = tf.tensor_scatter_nd_update(prespective_warp,
......@@ -552,7 +552,7 @@ def _build_transform(image,
# Compute a random scaling to apply.
scale = tf.eye(3, dtype=tf.float32)
s = rand_uniform_strong(scale_min, scale_max, seed=seed)
s = random_uniform_strong(scale_min, scale_max, seed=seed)
scale = tf.tensor_scatter_nd_update(scale, [[0, 0], [1, 1]], [1 / s, 1 / s])
scale_boxes = tf.tensor_scatter_nd_update(scale, [[0, 0], [1, 1]], [s, s])
......@@ -562,14 +562,14 @@ def _build_transform(image,
# The image is contained within the image and arbitrarily translated to
# locations with in the image.
center = center_boxes = tf.eye(3, dtype=tf.float32)
Tx = rand_uniform_strong(-1, 0, seed=seed) * (cw / s - width)
Ty = rand_uniform_strong(-1, 0, seed=seed) * (ch / s - height)
Tx = random_uniform_strong(-1, 0, seed=seed) * (cw / s - width)
Ty = random_uniform_strong(-1, 0, seed=seed) * (ch / s - height)
else:
# The image can be translated outside of the output resolution window
# but the image is translated relative to the output resolution not the
# input image resolution.
Tx = rand_uniform_strong(0.5 - translate, 0.5 + translate, seed=seed)
Ty = rand_uniform_strong(0.5 - translate, 0.5 + translate, seed=seed)
Tx = random_uniform_strong(0.5 - translate, 0.5 + translate, seed=seed)
Ty = random_uniform_strong(0.5 - translate, 0.5 + translate, seed=seed)
# Center and Scale the image such that the window of translation is
# contained to the output resolution.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment