Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
7d1cfc1e
Commit
7d1cfc1e
authored
Oct 23, 2019
by
Yeqing Li
Committed by
A. Unique TensorFlower
Oct 23, 2019
Browse files
Adds files to utils folder.
PiperOrigin-RevId: 276317091
parent
638ba7a4
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
567 additions
and
29 deletions
+567
-29
official/vision/detection/utils/box_utils.py
official/vision/detection/utils/box_utils.py
+306
-25
official/vision/detection/utils/class_utils.py
official/vision/detection/utils/class_utils.py
+44
-0
official/vision/detection/utils/dataloader_utils.py
official/vision/detection/utils/dataloader_utils.py
+40
-0
official/vision/detection/utils/input_utils.py
official/vision/detection/utils/input_utils.py
+103
-0
official/vision/detection/utils/object_detection/preprocessor.py
...l/vision/detection/utils/object_detection/preprocessor.py
+74
-4
No files found.
official/vision/detection/utils/box_utils.py
View file @
7d1cfc1e
...
@@ -26,6 +26,75 @@ EPSILON = 1e-8
...
@@ -26,6 +26,75 @@ EPSILON = 1e-8
BBOX_XFORM_CLIP
=
np
.
log
(
1000.
/
16.
)
BBOX_XFORM_CLIP
=
np
.
log
(
1000.
/
16.
)
def
yxyx_to_xywh
(
boxes
):
"""Converts boxes from ymin, xmin, ymax, xmax to xmin, ymin, width, height.
Args:
boxes: a numpy array whose last dimension is 4 representing the coordinates
of boxes in ymin, xmin, ymax, xmax order.
Returns:
boxes: a numpy array whose shape is the same as `boxes` in new format.
Raises:
ValueError: If the last dimension of boxes is not 4.
"""
if
boxes
.
shape
[
-
1
]
!=
4
:
raise
ValueError
(
'boxes.shape[-1] is {:d}, but must be 4.'
.
format
(
boxes
.
shape
[
-
1
]))
boxes_ymin
=
boxes
[...,
0
]
boxes_xmin
=
boxes
[...,
1
]
boxes_width
=
boxes
[...,
3
]
-
boxes
[...,
1
]
boxes_height
=
boxes
[...,
2
]
-
boxes
[...,
0
]
new_boxes
=
np
.
stack
([
boxes_xmin
,
boxes_ymin
,
boxes_width
,
boxes_height
],
axis
=-
1
)
return
new_boxes
def
jitter_boxes
(
boxes
,
noise_scale
=
0.025
):
"""Jitter the box coordinates by some noise distribution.
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates of
boxes in ymin, xmin, ymax, xmax order.
noise_scale: a python float which specifies the magnitude of noise. The rule
of thumb is to set this between (0, 0.1]. The default value is found to
mimic the noisy detections best empirically.
Returns:
jittered_boxes: a tensor whose shape is the same as `boxes` representing
the jittered boxes.
Raises:
ValueError: If the last dimension of boxes is not 4.
"""
if
boxes
.
shape
[
-
1
]
!=
4
:
raise
ValueError
(
'boxes.shape[-1] is {:d}, but must be 4.'
.
format
(
boxes
.
shape
[
-
1
]))
with
tf
.
name_scope
(
'jitter_boxes'
):
bbox_jitters
=
tf
.
random
.
normal
(
boxes
.
get_shape
(),
stddev
=
noise_scale
)
ymin
=
boxes
[...,
0
:
1
]
xmin
=
boxes
[...,
1
:
2
]
ymax
=
boxes
[...,
2
:
3
]
xmax
=
boxes
[...,
3
:
4
]
width
=
xmax
-
xmin
height
=
ymax
-
ymin
new_center_x
=
(
xmin
+
xmax
)
/
2.0
+
bbox_jitters
[...,
0
:
1
]
*
width
new_center_y
=
(
ymin
+
ymax
)
/
2.0
+
bbox_jitters
[...,
1
:
2
]
*
height
new_width
=
width
*
tf
.
math
.
exp
(
bbox_jitters
[...,
2
:
3
])
new_height
=
height
*
tf
.
math
.
exp
(
bbox_jitters
[...,
3
:
4
])
jittered_boxes
=
tf
.
concat
([
new_center_y
-
new_height
*
0.5
,
new_center_x
-
new_width
*
0.5
,
new_center_y
+
new_height
*
0.5
,
new_center_x
+
new_width
*
0.5
],
axis
=-
1
)
return
jittered_boxes
def
normalize_boxes
(
boxes
,
image_shape
):
def
normalize_boxes
(
boxes
,
image_shape
):
"""Converts boxes to the normalized coordinates.
"""Converts boxes to the normalized coordinates.
...
@@ -44,8 +113,8 @@ def normalize_boxes(boxes, image_shape):
...
@@ -44,8 +113,8 @@ def normalize_boxes(boxes, image_shape):
ValueError: If the last dimension of boxes is not 4.
ValueError: If the last dimension of boxes is not 4.
"""
"""
if
boxes
.
shape
[
-
1
]
!=
4
:
if
boxes
.
shape
[
-
1
]
!=
4
:
raise
ValueError
(
raise
ValueError
(
'boxes.shape[-1] is {:d}, but must be 4.'
.
format
(
'
boxes.shape[
1] is {:d}, but must be 4.'
.
format
(
boxes
.
shape
[
1
]))
boxes
.
shape
[
-
1
]))
with
tf
.
name_scope
(
'normalize_boxes'
):
with
tf
.
name_scope
(
'normalize_boxes'
):
if
isinstance
(
image_shape
,
list
)
or
isinstance
(
image_shape
,
tuple
):
if
isinstance
(
image_shape
,
list
)
or
isinstance
(
image_shape
,
tuple
):
...
@@ -86,13 +155,13 @@ def denormalize_boxes(boxes, image_shape):
...
@@ -86,13 +155,13 @@ def denormalize_boxes(boxes, image_shape):
height
,
width
=
image_shape
height
,
width
=
image_shape
else
:
else
:
image_shape
=
tf
.
cast
(
image_shape
,
dtype
=
boxes
.
dtype
)
image_shape
=
tf
.
cast
(
image_shape
,
dtype
=
boxes
.
dtype
)
height
=
image_shape
[...,
0
:
1
]
height
,
width
=
tf
.
split
(
image_shape
,
2
,
axis
=-
1
)
width
=
image_shape
[...,
1
:
2
]
ymin
=
boxes
[...,
0
:
1
]
*
height
ymin
,
xmin
,
ymax
,
xmax
=
tf
.
split
(
boxes
,
4
,
axis
=-
1
)
xmin
=
boxes
[...,
1
:
2
]
*
width
ymin
=
ymin
*
height
ymax
=
boxes
[...,
2
:
3
]
*
height
xmin
=
xmin
*
width
xmax
=
boxes
[...,
3
:
4
]
*
width
ymax
=
ymax
*
height
xmax
=
xmax
*
width
denormalized_boxes
=
tf
.
concat
([
ymin
,
xmin
,
ymax
,
xmax
],
axis
=-
1
)
denormalized_boxes
=
tf
.
concat
([
ymin
,
xmin
,
ymax
,
xmax
],
axis
=-
1
)
return
denormalized_boxes
return
denormalized_boxes
...
@@ -116,10 +185,10 @@ def clip_boxes(boxes, image_shape):
...
@@ -116,10 +185,10 @@ def clip_boxes(boxes, image_shape):
ValueError: If the last dimension of boxes is not 4.
ValueError: If the last dimension of boxes is not 4.
"""
"""
if
boxes
.
shape
[
-
1
]
!=
4
:
if
boxes
.
shape
[
-
1
]
!=
4
:
raise
ValueError
(
raise
ValueError
(
'boxes.shape[-1] is {:d}, but must be 4.'
.
format
(
'
boxes.shape[
1] is {:d}, but must be 4.'
.
format
(
boxes
.
shape
[
1
]))
boxes
.
shape
[
-
1
]))
with
tf
.
name_scope
(
'c
ro
p_boxes'
):
with
tf
.
name_scope
(
'c
li
p_boxes'
):
if
isinstance
(
image_shape
,
list
)
or
isinstance
(
image_shape
,
tuple
):
if
isinstance
(
image_shape
,
list
)
or
isinstance
(
image_shape
,
tuple
):
height
,
width
=
image_shape
height
,
width
=
image_shape
else
:
else
:
...
@@ -132,10 +201,10 @@ def clip_boxes(boxes, image_shape):
...
@@ -132,10 +201,10 @@ def clip_boxes(boxes, image_shape):
ymax
=
boxes
[...,
2
:
3
]
ymax
=
boxes
[...,
2
:
3
]
xmax
=
boxes
[...,
3
:
4
]
xmax
=
boxes
[...,
3
:
4
]
clipped_ymin
=
tf
.
maximum
(
tf
.
minimum
(
ymin
,
height
-
1.0
),
0.0
)
clipped_ymin
=
tf
.
math
.
maximum
(
tf
.
math
.
minimum
(
ymin
,
height
-
1.0
),
0.0
)
clipped_ymax
=
tf
.
maximum
(
tf
.
minimum
(
ymax
,
height
-
1.0
),
0.0
)
clipped_ymax
=
tf
.
math
.
maximum
(
tf
.
math
.
minimum
(
ymax
,
height
-
1.0
),
0.0
)
clipped_xmin
=
tf
.
maximum
(
tf
.
minimum
(
xmin
,
width
-
1.0
),
0.0
)
clipped_xmin
=
tf
.
math
.
maximum
(
tf
.
math
.
minimum
(
xmin
,
width
-
1.0
),
0.0
)
clipped_xmax
=
tf
.
maximum
(
tf
.
minimum
(
xmax
,
width
-
1.0
),
0.0
)
clipped_xmax
=
tf
.
math
.
maximum
(
tf
.
math
.
minimum
(
xmax
,
width
-
1.0
),
0.0
)
clipped_boxes
=
tf
.
concat
(
clipped_boxes
=
tf
.
concat
(
[
clipped_ymin
,
clipped_xmin
,
clipped_ymax
,
clipped_xmax
],
[
clipped_ymin
,
clipped_xmin
,
clipped_ymax
,
clipped_xmax
],
...
@@ -143,14 +212,47 @@ def clip_boxes(boxes, image_shape):
...
@@ -143,14 +212,47 @@ def clip_boxes(boxes, image_shape):
return
clipped_boxes
return
clipped_boxes
def
compute_outer_boxes
(
boxes
,
image_shape
,
scale
=
1.0
):
"""Compute outer box encloses an object with a margin.
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates of
boxes in ymin, xmin, ymax, xmax order.
image_shape: a list of two integers, a two-element vector or a tensor such
that all but the last dimensions are `broadcastable` to `boxes`. The last
dimension is 2, which represents [height, width].
scale: a float number specifying the scale of output outer boxes to input
`boxes`.
Returns:
outer_boxes: a tensor whose shape is the same as `boxes` representing the
outer boxes.
"""
if
scale
<
1.0
:
raise
ValueError
(
'scale is {}, but outer box scale must be greater than 1.0.'
.
format
(
scale
))
centers_y
=
(
boxes
[...,
0
]
+
boxes
[...,
2
])
/
2.0
centers_x
=
(
boxes
[...,
1
]
+
boxes
[...,
3
])
/
2.0
box_height
=
(
boxes
[...,
2
]
-
boxes
[...,
0
])
*
scale
box_width
=
(
boxes
[...,
3
]
-
boxes
[...,
1
])
*
scale
outer_boxes
=
tf
.
stack
([
centers_y
-
box_height
/
2.0
,
centers_x
-
box_width
/
2.0
,
centers_y
+
box_height
/
2.0
,
centers_x
+
box_width
/
2.0
],
axis
=
1
)
outer_boxes
=
clip_boxes
(
outer_boxes
,
image_shape
)
return
outer_boxes
def
encode_boxes
(
boxes
,
anchors
,
weights
=
None
):
def
encode_boxes
(
boxes
,
anchors
,
weights
=
None
):
"""Encode boxes to targets.
"""Encode boxes to targets.
Args:
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates
boxes: a tensor whose last dimension is 4 representing the coordinates
of boxes in ymin, xmin, ymax, xmax order.
of boxes in ymin, xmin, ymax, xmax order.
anchors: a tensor whose shape is the same as
`boxes` representing the
anchors: a tensor whose shape is the same as
, or `broadcastable` to `boxes`,
coordinates of anchors in ymin, xmin, ymax, xmax order.
representing the
coordinates of anchors in ymin, xmin, ymax, xmax order.
weights: None or a list of four float numbers used to scale coordinates.
weights: None or a list of four float numbers used to scale coordinates.
Returns:
Returns:
...
@@ -161,8 +263,8 @@ def encode_boxes(boxes, anchors, weights=None):
...
@@ -161,8 +263,8 @@ def encode_boxes(boxes, anchors, weights=None):
ValueError: If the last dimension of boxes is not 4.
ValueError: If the last dimension of boxes is not 4.
"""
"""
if
boxes
.
shape
[
-
1
]
!=
4
:
if
boxes
.
shape
[
-
1
]
!=
4
:
raise
ValueError
(
raise
ValueError
(
'boxes.shape[-1] is {:d}, but must be 4.'
.
format
(
'
boxes.shape[
1] is {:d}, but must be 4.'
.
format
(
boxes
.
shape
[
1
]))
boxes
.
shape
[
-
1
]))
with
tf
.
name_scope
(
'encode_boxes'
):
with
tf
.
name_scope
(
'encode_boxes'
):
boxes
=
tf
.
cast
(
boxes
,
dtype
=
anchors
.
dtype
)
boxes
=
tf
.
cast
(
boxes
,
dtype
=
anchors
.
dtype
)
...
@@ -206,14 +308,18 @@ def decode_boxes(encoded_boxes, anchors, weights=None):
...
@@ -206,14 +308,18 @@ def decode_boxes(encoded_boxes, anchors, weights=None):
Args:
Args:
encoded_boxes: a tensor whose last dimension is 4 representing the
encoded_boxes: a tensor whose last dimension is 4 representing the
coordinates of encoded boxes in ymin, xmin, ymax, xmax order.
coordinates of encoded boxes in ymin, xmin, ymax, xmax order.
anchors: a tensor whose shape is the same as
`boxes` representing the
anchors: a tensor whose shape is the same as
, or `broadcastable` to `boxes`,
coordinates of anchors in ymin, xmin, ymax, xmax order.
representing the
coordinates of anchors in ymin, xmin, ymax, xmax order.
weights: None or a list of four float numbers used to scale coordinates.
weights: None or a list of four float numbers used to scale coordinates.
Returns:
Returns:
encoded_boxes: a tensor whose shape is the same as `boxes` representing the
encoded_boxes: a tensor whose shape is the same as `boxes` representing the
decoded box targets.
decoded box targets.
"""
"""
if
encoded_boxes
.
shape
[
-
1
]
!=
4
:
raise
ValueError
(
'encoded_boxes.shape[-1] is {:d}, but must be 4.'
.
format
(
encoded_boxes
.
shape
[
-
1
]))
with
tf
.
name_scope
(
'decode_boxes'
):
with
tf
.
name_scope
(
'decode_boxes'
):
encoded_boxes
=
tf
.
cast
(
encoded_boxes
,
dtype
=
anchors
.
dtype
)
encoded_boxes
=
tf
.
cast
(
encoded_boxes
,
dtype
=
anchors
.
dtype
)
dy
=
encoded_boxes
[...,
0
:
1
]
dy
=
encoded_boxes
[...,
0
:
1
]
...
@@ -225,8 +331,8 @@ def decode_boxes(encoded_boxes, anchors, weights=None):
...
@@ -225,8 +331,8 @@ def decode_boxes(encoded_boxes, anchors, weights=None):
dx
/=
weights
[
1
]
dx
/=
weights
[
1
]
dh
/=
weights
[
2
]
dh
/=
weights
[
2
]
dw
/=
weights
[
3
]
dw
/=
weights
[
3
]
dh
=
tf
.
minimum
(
dh
,
BBOX_XFORM_CLIP
)
dh
=
tf
.
math
.
minimum
(
dh
,
BBOX_XFORM_CLIP
)
dw
=
tf
.
minimum
(
dw
,
BBOX_XFORM_CLIP
)
dw
=
tf
.
math
.
minimum
(
dw
,
BBOX_XFORM_CLIP
)
anchor_ymin
=
anchors
[...,
0
:
1
]
anchor_ymin
=
anchors
[...,
0
:
1
]
anchor_xmin
=
anchors
[...,
1
:
2
]
anchor_xmin
=
anchors
[...,
1
:
2
]
...
@@ -239,8 +345,8 @@ def decode_boxes(encoded_boxes, anchors, weights=None):
...
@@ -239,8 +345,8 @@ def decode_boxes(encoded_boxes, anchors, weights=None):
decoded_boxes_yc
=
dy
*
anchor_h
+
anchor_yc
decoded_boxes_yc
=
dy
*
anchor_h
+
anchor_yc
decoded_boxes_xc
=
dx
*
anchor_w
+
anchor_xc
decoded_boxes_xc
=
dx
*
anchor_w
+
anchor_xc
decoded_boxes_h
=
tf
.
exp
(
dh
)
*
anchor_h
decoded_boxes_h
=
tf
.
math
.
exp
(
dh
)
*
anchor_h
decoded_boxes_w
=
tf
.
exp
(
dw
)
*
anchor_w
decoded_boxes_w
=
tf
.
math
.
exp
(
dw
)
*
anchor_w
decoded_boxes_ymin
=
decoded_boxes_yc
-
0.5
*
decoded_boxes_h
decoded_boxes_ymin
=
decoded_boxes_yc
-
0.5
*
decoded_boxes_h
decoded_boxes_xmin
=
decoded_boxes_xc
-
0.5
*
decoded_boxes_w
decoded_boxes_xmin
=
decoded_boxes_xc
-
0.5
*
decoded_boxes_w
...
@@ -252,3 +358,178 @@ def decode_boxes(encoded_boxes, anchors, weights=None):
...
@@ -252,3 +358,178 @@ def decode_boxes(encoded_boxes, anchors, weights=None):
decoded_boxes_ymax
,
decoded_boxes_xmax
],
decoded_boxes_ymax
,
decoded_boxes_xmax
],
axis
=-
1
)
axis
=-
1
)
return
decoded_boxes
return
decoded_boxes
def
filter_boxes
(
boxes
,
scores
,
image_shape
,
min_size_threshold
):
"""Filter and remove boxes that are too small or fall outside the image.
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates of
boxes in ymin, xmin, ymax, xmax order.
scores: a tensor whose shape is the same as tf.shape(boxes)[:-1]
representing the original scores of the boxes.
image_shape: a tensor whose shape is the same as, or `broadcastable` to
`boxes` except the last dimension, which is 2, representing [height,
width] of the scaled image.
min_size_threshold: a float representing the minimal box size in each side
(w.r.t. the scaled image). Boxes whose sides are smaller than it will be
filtered out.
Returns:
filtered_boxes: a tensor whose shape is the same as `boxes` but with
the position of the filtered boxes are filled with 0.
filtered_scores: a tensor whose shape is the same as 'scores' but with
the positinon of the filtered boxes filled with 0.
"""
if
boxes
.
shape
[
-
1
]
!=
4
:
raise
ValueError
(
'boxes.shape[1] is {:d}, but must be 4.'
.
format
(
boxes
.
shape
[
-
1
]))
with
tf
.
name_scope
(
'filter_boxes'
):
if
isinstance
(
image_shape
,
list
)
or
isinstance
(
image_shape
,
tuple
):
height
,
width
=
image_shape
else
:
image_shape
=
tf
.
cast
(
image_shape
,
dtype
=
boxes
.
dtype
)
height
=
image_shape
[...,
0
]
width
=
image_shape
[...,
1
]
ymin
=
boxes
[...,
0
]
xmin
=
boxes
[...,
1
]
ymax
=
boxes
[...,
2
]
xmax
=
boxes
[...,
3
]
h
=
ymax
-
ymin
+
1.0
w
=
xmax
-
xmin
+
1.0
yc
=
ymin
+
0.5
*
h
xc
=
xmin
+
0.5
*
w
min_size
=
tf
.
cast
(
tf
.
math
.
maximum
(
min_size_threshold
,
1.0
),
dtype
=
boxes
.
dtype
)
filtered_size_mask
=
tf
.
math
.
logical_and
(
tf
.
math
.
greater
(
h
,
min_size
),
tf
.
math
.
greater
(
w
,
min_size
))
filtered_center_mask
=
tf
.
logical_and
(
tf
.
math
.
logical_and
(
tf
.
math
.
greater
(
yc
,
0.0
),
tf
.
math
.
less
(
yc
,
height
)),
tf
.
math
.
logical_and
(
tf
.
math
.
greater
(
xc
,
0.0
),
tf
.
math
.
less
(
xc
,
width
)))
filtered_mask
=
tf
.
math
.
logical_and
(
filtered_size_mask
,
filtered_center_mask
)
filtered_scores
=
tf
.
where
(
filtered_mask
,
scores
,
tf
.
zeros_like
(
scores
))
filtered_boxes
=
tf
.
cast
(
tf
.
expand_dims
(
filtered_mask
,
axis
=-
1
),
dtype
=
boxes
.
dtype
)
*
boxes
return
filtered_boxes
,
filtered_scores
def
filter_boxes_by_scores
(
boxes
,
scores
,
min_score_threshold
):
"""Filter and remove boxes whose scores are smaller than the threshold.
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates of
boxes in ymin, xmin, ymax, xmax order.
scores: a tensor whose shape is the same as tf.shape(boxes)[:-1]
representing the original scores of the boxes.
min_score_threshold: a float representing the minimal box score threshold.
Boxes whose score are smaller than it will be filtered out.
Returns:
filtered_boxes: a tensor whose shape is the same as `boxes` but with
the position of the filtered boxes are filled with 0.
filtered_scores: a tensor whose shape is the same as 'scores' but with
the
"""
if
boxes
.
shape
[
-
1
]
!=
4
:
raise
ValueError
(
'boxes.shape[1] is {:d}, but must be 4.'
.
format
(
boxes
.
shape
[
-
1
]))
with
tf
.
name_scope
(
'filter_boxes_by_scores'
):
filtered_mask
=
tf
.
math
.
greater
(
scores
,
min_score_threshold
)
filtered_scores
=
tf
.
where
(
filtered_mask
,
scores
,
tf
.
zeros_like
(
scores
))
filtered_boxes
=
tf
.
cast
(
tf
.
expand_dims
(
filtered_mask
,
axis
=-
1
),
dtype
=
boxes
.
dtype
)
*
boxes
return
filtered_boxes
,
filtered_scores
def
top_k_boxes
(
boxes
,
scores
,
k
):
"""Sort and select top k boxes according to the scores.
Args:
boxes: a tensor of shape [batch_size, N, 4] representing the coordiante of
the boxes. N is the number of boxes per image.
scores: a tensor of shsape [batch_size, N] representing the socre of the
boxes.
k: an integer or a tensor indicating the top k number.
Returns:
selected_boxes: a tensor of shape [batch_size, k, 4] representing the
selected top k box coordinates.
selected_scores: a tensor of shape [batch_size, k] representing the selected
top k box scores.
"""
with
tf
.
name_scope
(
'top_k_boxes'
):
selected_scores
,
top_k_indices
=
tf
.
nn
.
top_k
(
scores
,
k
=
k
,
sorted
=
True
)
batch_size
,
_
=
scores
.
get_shape
().
as_list
()
if
batch_size
==
1
:
selected_boxes
=
tf
.
squeeze
(
tf
.
gather
(
boxes
,
top_k_indices
,
axis
=
1
),
axis
=
1
)
else
:
top_k_indices_shape
=
tf
.
shape
(
top_k_indices
)
batch_indices
=
(
tf
.
expand_dims
(
tf
.
range
(
top_k_indices_shape
[
0
]),
axis
=-
1
)
*
tf
.
ones
([
1
,
top_k_indices_shape
[
-
1
]],
dtype
=
tf
.
int32
))
gather_nd_indices
=
tf
.
stack
([
batch_indices
,
top_k_indices
],
axis
=-
1
)
selected_boxes
=
tf
.
gather_nd
(
boxes
,
gather_nd_indices
)
return
selected_boxes
,
selected_scores
def
bbox_overlap
(
boxes
,
gt_boxes
):
"""Calculates the overlap between proposal and ground truth boxes.
Some `gt_boxes` may have been padded. The returned `iou` tensor for these
boxes will be -1.
Args:
boxes: a tensor with a shape of [batch_size, N, 4]. N is the number of
proposals before groundtruth assignment (e.g., rpn_post_nms_topn). The
last dimension is the pixel coordinates in [ymin, xmin, ymax, xmax] form.
gt_boxes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES, 4]. This
tensor might have paddings with a negative value.
Returns:
iou: a tensor with as a shape of [batch_size, N, MAX_NUM_INSTANCES].
"""
with
tf
.
name_scope
(
'bbox_overlap'
):
bb_y_min
,
bb_x_min
,
bb_y_max
,
bb_x_max
=
tf
.
split
(
value
=
boxes
,
num_or_size_splits
=
4
,
axis
=
2
)
gt_y_min
,
gt_x_min
,
gt_y_max
,
gt_x_max
=
tf
.
split
(
value
=
gt_boxes
,
num_or_size_splits
=
4
,
axis
=
2
)
# Calculates the intersection area.
i_xmin
=
tf
.
math
.
maximum
(
bb_x_min
,
tf
.
transpose
(
gt_x_min
,
[
0
,
2
,
1
]))
i_xmax
=
tf
.
math
.
minimum
(
bb_x_max
,
tf
.
transpose
(
gt_x_max
,
[
0
,
2
,
1
]))
i_ymin
=
tf
.
math
.
maximum
(
bb_y_min
,
tf
.
transpose
(
gt_y_min
,
[
0
,
2
,
1
]))
i_ymax
=
tf
.
math
.
minimum
(
bb_y_max
,
tf
.
transpose
(
gt_y_max
,
[
0
,
2
,
1
]))
i_area
=
tf
.
math
.
maximum
((
i_xmax
-
i_xmin
),
0
)
*
tf
.
math
.
maximum
(
(
i_ymax
-
i_ymin
),
0
)
# Calculates the union area.
bb_area
=
(
bb_y_max
-
bb_y_min
)
*
(
bb_x_max
-
bb_x_min
)
gt_area
=
(
gt_y_max
-
gt_y_min
)
*
(
gt_x_max
-
gt_x_min
)
# Adds a small epsilon to avoid divide-by-zero.
u_area
=
bb_area
+
tf
.
transpose
(
gt_area
,
[
0
,
2
,
1
])
-
i_area
+
1e-8
# Calculates IoU.
iou
=
i_area
/
u_area
# Fills -1 for IoU entries between the padded ground truth boxes.
gt_invalid_mask
=
tf
.
less
(
tf
.
reduce_max
(
gt_boxes
,
axis
=-
1
,
keepdims
=
True
),
0.0
)
padding_mask
=
tf
.
logical_or
(
tf
.
zeros_like
(
bb_x_min
,
dtype
=
tf
.
bool
),
tf
.
transpose
(
gt_invalid_mask
,
[
0
,
2
,
1
]))
iou
=
tf
.
where
(
padding_mask
,
-
tf
.
ones_like
(
iou
),
iou
)
return
iou
official/vision/detection/utils/class_utils.py
0 → 100644
View file @
7d1cfc1e
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utility functions for handling dataset object categories."""
def
coco_split_class_ids
(
split_name
):
"""Return the COCO class split ids based on split name and training mode.
Args:
split_name: The name of dataset split.
Returns:
class_ids: a python list of integer.
"""
if
split_name
==
'all'
:
return
[]
elif
split_name
==
'voc'
:
return
[
1
,
2
,
3
,
4
,
5
,
6
,
7
,
9
,
16
,
17
,
18
,
19
,
20
,
21
,
44
,
62
,
63
,
64
,
67
,
72
]
elif
split_name
==
'nonvoc'
:
return
[
8
,
10
,
11
,
13
,
14
,
15
,
22
,
23
,
24
,
25
,
27
,
28
,
31
,
32
,
33
,
34
,
35
,
36
,
37
,
38
,
39
,
40
,
41
,
42
,
43
,
46
,
47
,
48
,
49
,
50
,
51
,
52
,
53
,
54
,
55
,
56
,
57
,
58
,
59
,
60
,
61
,
65
,
70
,
73
,
74
,
75
,
76
,
77
,
78
,
79
,
80
,
81
,
82
,
84
,
85
,
86
,
87
,
88
,
89
,
90
]
else
:
raise
ValueError
(
'Invalid split name {}!!!'
.
format
(
split_name
))
official/vision/detection/utils/dataloader_utils.py
0 → 100644
View file @
7d1cfc1e
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utility functions for dataloader."""
import
tensorflow.compat.v2
as
tf
from
official.vision.detection.utils
import
input_utils
def
process_source_id
(
source_id
):
"""Processes source_id to the right format."""
if
source_id
.
dtype
==
tf
.
string
:
source_id
=
tf
.
cast
(
tf
.
strings
.
to_number
(
source_id
),
tf
.
int64
)
with
tf
.
control_dependencies
([
source_id
]):
source_id
=
tf
.
cond
(
pred
=
tf
.
equal
(
tf
.
size
(
input
=
source_id
),
0
),
true_fn
=
lambda
:
tf
.
cast
(
tf
.
constant
(
-
1
),
tf
.
int64
),
false_fn
=
lambda
:
tf
.
identity
(
source_id
))
return
source_id
def
pad_groundtruths_to_fixed_size
(
gt
,
n
):
"""Pads the first dimension of groundtruths labels to the fixed size."""
gt
[
'boxes'
]
=
input_utils
.
pad_to_fixed_size
(
gt
[
'boxes'
],
n
,
-
1
)
gt
[
'is_crowds'
]
=
input_utils
.
pad_to_fixed_size
(
gt
[
'is_crowds'
],
n
,
0
)
gt
[
'areas'
]
=
input_utils
.
pad_to_fixed_size
(
gt
[
'areas'
],
n
,
-
1
)
gt
[
'classes'
]
=
input_utils
.
pad_to_fixed_size
(
gt
[
'classes'
],
n
,
-
1
)
return
gt
official/vision/detection/utils/input_utils.py
View file @
7d1cfc1e
...
@@ -182,6 +182,109 @@ def resize_and_crop_image(image,
...
@@ -182,6 +182,109 @@ def resize_and_crop_image(image,
scaled_image
=
tf
.
image
.
resize
(
scaled_image
=
tf
.
image
.
resize
(
image
,
tf
.
cast
(
scaled_size
,
tf
.
int32
),
method
=
method
)
image
,
tf
.
cast
(
scaled_size
,
tf
.
int32
),
method
=
method
)
if
random_jittering
:
scaled_image
=
scaled_image
[
offset
[
0
]:
offset
[
0
]
+
desired_size
[
0
],
offset
[
1
]:
offset
[
1
]
+
desired_size
[
1
],
:]
output_image
=
tf
.
image
.
pad_to_bounding_box
(
scaled_image
,
0
,
0
,
padded_size
[
0
],
padded_size
[
1
])
image_info
=
tf
.
stack
(
[
image_size
,
scaled_size
,
image_scale
,
tf
.
cast
(
offset
,
tf
.
float32
)])
return
output_image
,
image_info
def
resize_and_crop_image_v2
(
image
,
short_side
,
long_side
,
padded_size
,
aug_scale_min
=
1.0
,
aug_scale_max
=
1.0
,
seed
=
1
,
method
=
tf
.
image
.
ResizeMethod
.
BILINEAR
):
"""Resizes the input image to output size (Faster R-CNN style).
Resize and pad images given the specified short / long side length and the
stride size.
Here are the preprocessing steps.
1. For a given image, keep its aspect ratio and first try to rescale the short
side of the original image to `short_side`.
2. If the scaled image after 1 has a long side that exceeds `long_side`, keep
the aspect ratio and rescal the long side of the image to `long_side`.
2. Pad the rescaled image to the padded_size.
Args:
image: a `Tensor` of shape [height, width, 3] representing an image.
short_side: a scalar `Tensor` or `int` representing the desired short side
to be rescaled to.
long_side: a scalar `Tensor` or `int` representing the desired long side to
be rescaled to.
padded_size: a `Tensor` or `int` list/tuple of two elements representing
[height, width] of the padded output image size. Padding will be applied
after scaling the image to the desired_size.
aug_scale_min: a `float` with range between [0, 1.0] representing minimum
random scale applied to desired_size for training scale jittering.
aug_scale_max: a `float` with range between [1.0, inf] representing maximum
random scale applied to desired_size for training scale jittering.
seed: seed for random scale jittering.
method: function to resize input image to scaled image.
Returns:
output_image: `Tensor` of shape [height, width, 3] where [height, width]
equals to `output_size`.
image_info: a 2D `Tensor` that encodes the information of the image and the
applied preprocessing. It is in the format of
[[original_height, original_width], [scaled_height, scaled_width],
[y_scale, x_scale], [y_offset, x_offset]], where [scaled_height,
scaled_width] is the actual scaled image size, and [y_scale, x_scale] is
the scaling factor, which is the ratio of
scaled dimension / original dimension.
"""
with
tf
.
name_scope
(
'resize_and_crop_image_v2'
):
image_size
=
tf
.
cast
(
tf
.
shape
(
image
)[
0
:
2
],
tf
.
float32
)
scale_using_short_side
=
(
short_side
/
tf
.
math
.
minimum
(
image_size
[
0
],
image_size
[
1
]))
scale_using_long_side
=
(
long_side
/
tf
.
math
.
maximum
(
image_size
[
0
],
image_size
[
1
]))
scaled_size
=
tf
.
math
.
round
(
image_size
*
scale_using_short_side
)
scaled_size
=
tf
.
where
(
tf
.
math
.
greater
(
tf
.
math
.
maximum
(
scaled_size
[
0
],
scaled_size
[
1
]),
long_side
),
tf
.
math
.
round
(
image_size
*
scale_using_long_side
),
scaled_size
)
desired_size
=
scaled_size
random_jittering
=
(
aug_scale_min
!=
1.0
or
aug_scale_max
!=
1.0
)
if
random_jittering
:
random_scale
=
tf
.
random
.
uniform
([],
aug_scale_min
,
aug_scale_max
,
seed
=
seed
)
scaled_size
=
tf
.
math
.
round
(
random_scale
*
scaled_size
)
# Computes 2D image_scale.
image_scale
=
scaled_size
/
image_size
# Selects non-zero random offset (x, y) if scaled image is larger than
# desired_size.
if
random_jittering
:
max_offset
=
scaled_size
-
desired_size
max_offset
=
tf
.
where
(
tf
.
math
.
less
(
max_offset
,
0
),
tf
.
zeros_like
(
max_offset
),
max_offset
)
offset
=
max_offset
*
tf
.
random
.
uniform
([
2
,
],
0
,
1
,
seed
=
seed
)
offset
=
tf
.
cast
(
offset
,
tf
.
int32
)
else
:
offset
=
tf
.
zeros
((
2
,),
tf
.
int32
)
scaled_image
=
tf
.
image
.
resize
(
image
,
tf
.
cast
(
scaled_size
,
tf
.
int32
),
method
=
method
)
if
random_jittering
:
if
random_jittering
:
scaled_image
=
scaled_image
[
scaled_image
=
scaled_image
[
offset
[
0
]:
offset
[
0
]
+
desired_size
[
0
],
offset
[
0
]:
offset
[
0
]
+
desired_size
[
0
],
...
...
official/vision/detection/utils/object_detection/preprocessor.py
View file @
7d1cfc1e
...
@@ -100,7 +100,9 @@ def keypoint_flip_horizontal(keypoints, flip_point, flip_permutation,
...
@@ -100,7 +100,9 @@ def keypoint_flip_horizontal(keypoints, flip_point, flip_permutation,
Returns:
Returns:
new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
"""
"""
with
tf
.
name_scope
(
scope
,
'FlipHorizontal'
):
if
not
scope
:
scope
=
'FlipHorizontal'
with
tf
.
name_scope
(
scope
):
keypoints
=
tf
.
transpose
(
a
=
keypoints
,
perm
=
[
1
,
0
,
2
])
keypoints
=
tf
.
transpose
(
a
=
keypoints
,
perm
=
[
1
,
0
,
2
])
keypoints
=
tf
.
gather
(
keypoints
,
flip_permutation
)
keypoints
=
tf
.
gather
(
keypoints
,
flip_permutation
)
v
,
u
=
tf
.
split
(
value
=
keypoints
,
num_or_size_splits
=
2
,
axis
=
2
)
v
,
u
=
tf
.
split
(
value
=
keypoints
,
num_or_size_splits
=
2
,
axis
=
2
)
...
@@ -110,6 +112,70 @@ def keypoint_flip_horizontal(keypoints, flip_point, flip_permutation,
...
@@ -110,6 +112,70 @@ def keypoint_flip_horizontal(keypoints, flip_point, flip_permutation,
return
new_keypoints
return
new_keypoints
def
keypoint_change_coordinate_frame
(
keypoints
,
window
,
scope
=
None
):
"""Changes coordinate frame of the keypoints to be relative to window's frame.
Given a window of the form [y_min, x_min, y_max, x_max], changes keypoint
coordinates from keypoints of shape [num_instances, num_keypoints, 2]
to be relative to this window.
An example use case is data augmentation: where we are given groundtruth
keypoints and would like to randomly crop the image to some window. In this
case we need to change the coordinate frame of each groundtruth keypoint to be
relative to this new window.
Args:
keypoints: a tensor of shape [num_instances, num_keypoints, 2]
window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
window we should change the coordinate frame to.
scope: name scope.
Returns:
new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
"""
if
not
scope
:
scope
=
'ChangeCoordinateFrame'
with
tf
.
name_scope
(
scope
):
win_height
=
window
[
2
]
-
window
[
0
]
win_width
=
window
[
3
]
-
window
[
1
]
new_keypoints
=
box_list_ops
.
scale
(
keypoints
-
[
window
[
0
],
window
[
1
]],
1.0
/
win_height
,
1.0
/
win_width
)
return
new_keypoints
def
keypoint_prune_outside_window
(
keypoints
,
window
,
scope
=
None
):
"""Prunes keypoints that fall outside a given window.
This function replaces keypoints that fall outside the given window with nan.
See also clip_to_window which clips any keypoints that fall outside the given
window.
Args:
keypoints: a tensor of shape [num_instances, num_keypoints, 2]
window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
window outside of which the op should prune the keypoints.
scope: name scope.
Returns:
new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
"""
if
not
scope
:
scope
=
'PruneOutsideWindow'
with
tf
.
name_scope
(
scope
):
y
,
x
=
tf
.
split
(
value
=
keypoints
,
num_or_size_splits
=
2
,
axis
=
2
)
win_y_min
,
win_x_min
,
win_y_max
,
win_x_max
=
tf
.
unstack
(
window
)
valid_indices
=
tf
.
logical_and
(
tf
.
logical_and
(
y
>=
win_y_min
,
y
<=
win_y_max
),
tf
.
logical_and
(
x
>=
win_x_min
,
x
<=
win_x_max
))
new_y
=
tf
.
where
(
valid_indices
,
y
,
np
.
nan
*
tf
.
ones_like
(
y
))
new_x
=
tf
.
where
(
valid_indices
,
x
,
np
.
nan
*
tf
.
ones_like
(
x
))
new_keypoints
=
tf
.
concat
([
new_y
,
new_x
],
2
)
return
new_keypoints
def
random_horizontal_flip
(
image
,
def
random_horizontal_flip
(
image
,
boxes
=
None
,
boxes
=
None
,
masks
=
None
,
masks
=
None
,
...
@@ -334,7 +400,7 @@ def resize_to_range(image,
...
@@ -334,7 +400,7 @@ def resize_to_range(image,
if
len
(
image
.
get_shape
())
!=
3
:
if
len
(
image
.
get_shape
())
!=
3
:
raise
ValueError
(
'Image should be 3D tensor'
)
raise
ValueError
(
'Image should be 3D tensor'
)
with
tf
.
name_scope
(
'ResizeToRange'
,
values
=
[
image
,
min_dimension
]
):
with
tf
.
name_scope
(
'ResizeToRange'
):
if
image
.
get_shape
().
is_fully_defined
():
if
image
.
get_shape
().
is_fully_defined
():
new_size
=
_compute_new_static_size
(
image
,
min_dimension
,
max_dimension
)
new_size
=
_compute_new_static_size
(
image
,
min_dimension
,
max_dimension
)
else
:
else
:
...
@@ -389,7 +455,9 @@ def box_list_scale(boxlist, y_scale, x_scale, scope=None):
...
@@ -389,7 +455,9 @@ def box_list_scale(boxlist, y_scale, x_scale, scope=None):
Returns:
Returns:
boxlist: BoxList holding N boxes
boxlist: BoxList holding N boxes
"""
"""
with
tf
.
name_scope
(
scope
,
'Scale'
):
if
not
scope
:
scope
=
'Scale'
with
tf
.
name_scope
(
scope
):
y_scale
=
tf
.
cast
(
y_scale
,
tf
.
float32
)
y_scale
=
tf
.
cast
(
y_scale
,
tf
.
float32
)
x_scale
=
tf
.
cast
(
x_scale
,
tf
.
float32
)
x_scale
=
tf
.
cast
(
x_scale
,
tf
.
float32
)
y_min
,
x_min
,
y_max
,
x_max
=
tf
.
split
(
y_min
,
x_min
,
y_max
,
x_max
=
tf
.
split
(
...
@@ -415,7 +483,9 @@ def keypoint_scale(keypoints, y_scale, x_scale, scope=None):
...
@@ -415,7 +483,9 @@ def keypoint_scale(keypoints, y_scale, x_scale, scope=None):
Returns:
Returns:
new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
"""
"""
with
tf
.
name_scope
(
scope
,
'Scale'
):
if
not
scope
:
scope
=
'Scale'
with
tf
.
name_scope
(
scope
):
y_scale
=
tf
.
cast
(
y_scale
,
tf
.
float32
)
y_scale
=
tf
.
cast
(
y_scale
,
tf
.
float32
)
x_scale
=
tf
.
cast
(
x_scale
,
tf
.
float32
)
x_scale
=
tf
.
cast
(
x_scale
,
tf
.
float32
)
new_keypoints
=
keypoints
*
[[[
y_scale
,
x_scale
]]]
new_keypoints
=
keypoints
*
[[[
y_scale
,
x_scale
]]]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment