Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
999fae62
Commit
999fae62
authored
Aug 12, 2020
by
Hongkun Yu
Committed by
A. Unique TensorFlower
Aug 12, 2020
Browse files
Internal change
PiperOrigin-RevId: 326286926
parent
94561082
Changes
205
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
452 additions
and
511 deletions
+452
-511
official/vision/detection/modeling/retinanet_model.py
official/vision/detection/modeling/retinanet_model.py
+5
-5
official/vision/detection/modeling/shapemask_model.py
official/vision/detection/modeling/shapemask_model.py
+66
-74
official/vision/detection/ops/nms.py
official/vision/detection/ops/nms.py
+7
-11
official/vision/detection/ops/postprocess_ops.py
official/vision/detection/ops/postprocess_ops.py
+25
-29
official/vision/detection/ops/roi_ops.py
official/vision/detection/ops/roi_ops.py
+22
-26
official/vision/detection/ops/spatial_transform_ops.py
official/vision/detection/ops/spatial_transform_ops.py
+40
-45
official/vision/detection/ops/target_ops.py
official/vision/detection/ops/target_ops.py
+61
-69
official/vision/detection/utils/box_utils.py
official/vision/detection/utils/box_utils.py
+17
-17
official/vision/detection/utils/input_utils.py
official/vision/detection/utils/input_utils.py
+21
-28
official/vision/detection/utils/mask_utils.py
official/vision/detection/utils/mask_utils.py
+13
-34
official/vision/detection/utils/object_detection/balanced_positive_negative_sampler.py
...ls/object_detection/balanced_positive_negative_sampler.py
+26
-23
official/vision/detection/utils/object_detection/box_coder.py
...cial/vision/detection/utils/object_detection/box_coder.py
+5
-7
official/vision/detection/utils/object_detection/box_list.py
official/vision/detection/utils/object_detection/box_list.py
+4
-5
official/vision/detection/utils/object_detection/box_list_ops.py
...l/vision/detection/utils/object_detection/box_list_ops.py
+78
-65
official/vision/detection/utils/object_detection/faster_rcnn_box_coder.py
...detection/utils/object_detection/faster_rcnn_box_coder.py
+3
-4
official/vision/detection/utils/object_detection/matcher.py
official/vision/detection/utils/object_detection/matcher.py
+11
-14
official/vision/detection/utils/object_detection/minibatch_sampler.py
...ion/detection/utils/object_detection/minibatch_sampler.py
+4
-5
official/vision/detection/utils/object_detection/ops.py
official/vision/detection/utils/object_detection/ops.py
+5
-6
official/vision/detection/utils/object_detection/preprocessor.py
...l/vision/detection/utils/object_detection/preprocessor.py
+39
-43
official/vision/detection/utils/object_detection/region_similarity_calculator.py
...on/utils/object_detection/region_similarity_calculator.py
+0
-1
No files found.
official/vision/detection/modeling/retinanet_model.py
View file @
999fae62
...
@@ -52,15 +52,15 @@ class RetinanetModel(base_model.Model):
...
@@ -52,15 +52,15 @@ class RetinanetModel(base_model.Model):
# Predict function.
# Predict function.
self
.
_generate_detections_fn
=
postprocess_ops
.
MultilevelDetectionGenerator
(
self
.
_generate_detections_fn
=
postprocess_ops
.
MultilevelDetectionGenerator
(
params
.
architecture
.
min_level
,
params
.
architecture
.
min_level
,
params
.
architecture
.
max_level
,
params
.
architecture
.
max_level
,
params
.
postprocess
)
params
.
postprocess
)
self
.
_transpose_input
=
params
.
train
.
transpose_input
self
.
_transpose_input
=
params
.
train
.
transpose_input
assert
not
self
.
_transpose_input
,
'Transpose input is not supported.'
assert
not
self
.
_transpose_input
,
'Transpose input is not supported.'
# Input layer.
# Input layer.
self
.
_input_layer
=
tf
.
keras
.
layers
.
Input
(
self
.
_input_layer
=
tf
.
keras
.
layers
.
Input
(
shape
=
(
None
,
None
,
params
.
retinanet_parser
.
num_channels
),
name
=
''
,
shape
=
(
None
,
None
,
params
.
retinanet_parser
.
num_channels
),
name
=
''
,
dtype
=
tf
.
bfloat16
if
self
.
_use_bfloat16
else
tf
.
float32
)
dtype
=
tf
.
bfloat16
if
self
.
_use_bfloat16
else
tf
.
float32
)
def
build_outputs
(
self
,
inputs
,
mode
):
def
build_outputs
(
self
,
inputs
,
mode
):
...
@@ -141,8 +141,8 @@ class RetinanetModel(base_model.Model):
...
@@ -141,8 +141,8 @@ class RetinanetModel(base_model.Model):
raise
ValueError
(
'"%s" is missing in outputs, requried %s found %s'
,
raise
ValueError
(
'"%s" is missing in outputs, requried %s found %s'
,
field
,
required_label_fields
,
labels
.
keys
())
field
,
required_label_fields
,
labels
.
keys
())
boxes
,
scores
,
classes
,
valid_detections
=
self
.
_generate_detections_fn
(
boxes
,
scores
,
classes
,
valid_detections
=
self
.
_generate_detections_fn
(
outputs
[
'box_outputs'
],
outputs
[
'cls_outputs'
],
outputs
[
'box_outputs'
],
outputs
[
'cls_outputs'
],
labels
[
'anchor_boxes'
],
labels
[
'anchor_boxes'
],
labels
[
'image_info'
][:,
1
:
2
,
:])
labels
[
'image_info'
][:,
1
:
2
,
:])
# Discards the old output tensors to save memory. The `cls_outputs` and
# Discards the old output tensors to save memory. The `cls_outputs` and
# `box_outputs` are pretty big and could potentiall lead to memory issue.
# `box_outputs` are pretty big and could potentiall lead to memory issue.
outputs
=
{
outputs
=
{
...
...
official/vision/detection/modeling/shapemask_model.py
View file @
999fae62
...
@@ -61,13 +61,11 @@ class ShapeMaskModel(base_model.Model):
...
@@ -61,13 +61,11 @@ class ShapeMaskModel(base_model.Model):
params
.
shapemask_loss
.
shape_prior_loss_weight
)
params
.
shapemask_loss
.
shape_prior_loss_weight
)
self
.
_coarse_mask_loss_weight
=
(
self
.
_coarse_mask_loss_weight
=
(
params
.
shapemask_loss
.
coarse_mask_loss_weight
)
params
.
shapemask_loss
.
coarse_mask_loss_weight
)
self
.
_fine_mask_loss_weight
=
(
self
.
_fine_mask_loss_weight
=
(
params
.
shapemask_loss
.
fine_mask_loss_weight
)
params
.
shapemask_loss
.
fine_mask_loss_weight
)
# Predict function.
# Predict function.
self
.
_generate_detections_fn
=
postprocess_ops
.
MultilevelDetectionGenerator
(
self
.
_generate_detections_fn
=
postprocess_ops
.
MultilevelDetectionGenerator
(
params
.
architecture
.
min_level
,
params
.
architecture
.
min_level
,
params
.
architecture
.
max_level
,
params
.
architecture
.
max_level
,
params
.
postprocess
)
params
.
postprocess
)
def
build_outputs
(
self
,
inputs
,
mode
):
def
build_outputs
(
self
,
inputs
,
mode
):
...
@@ -79,10 +77,8 @@ class ShapeMaskModel(base_model.Model):
...
@@ -79,10 +77,8 @@ class ShapeMaskModel(base_model.Model):
else
:
else
:
anchor_boxes
=
anchor
.
Anchor
(
anchor_boxes
=
anchor
.
Anchor
(
self
.
_params
.
architecture
.
min_level
,
self
.
_params
.
architecture
.
min_level
,
self
.
_params
.
architecture
.
max_level
,
self
.
_params
.
architecture
.
max_level
,
self
.
_params
.
anchor
.
num_scales
,
self
.
_params
.
anchor
.
num_scales
,
self
.
_params
.
anchor
.
aspect_ratios
,
self
.
_params
.
anchor
.
anchor_size
,
self
.
_params
.
anchor
.
aspect_ratios
,
self
.
_params
.
anchor
.
anchor_size
,
images
.
get_shape
().
as_list
()[
1
:
3
]).
multilevel_boxes
images
.
get_shape
().
as_list
()[
1
:
3
]).
multilevel_boxes
batch_size
=
tf
.
shape
(
images
)[
0
]
batch_size
=
tf
.
shape
(
images
)[
0
]
...
@@ -96,8 +92,7 @@ class ShapeMaskModel(base_model.Model):
...
@@ -96,8 +92,7 @@ class ShapeMaskModel(base_model.Model):
fpn_features
,
is_training
=
is_training
)
fpn_features
,
is_training
=
is_training
)
valid_boxes
,
valid_scores
,
valid_classes
,
valid_detections
=
(
valid_boxes
,
valid_scores
,
valid_classes
,
valid_detections
=
(
self
.
_generate_detections_fn
(
box_outputs
,
cls_outputs
,
self
.
_generate_detections_fn
(
box_outputs
,
cls_outputs
,
anchor_boxes
,
anchor_boxes
,
inputs
[
'image_info'
][:,
1
:
2
,
:]))
inputs
[
'image_info'
][:,
1
:
2
,
:]))
image_size
=
images
.
get_shape
().
as_list
()[
1
:
3
]
image_size
=
images
.
get_shape
().
as_list
()[
1
:
3
]
...
@@ -124,22 +119,18 @@ class ShapeMaskModel(base_model.Model):
...
@@ -124,22 +119,18 @@ class ShapeMaskModel(base_model.Model):
return
boxes
,
classes
,
outer_boxes
return
boxes
,
classes
,
outer_boxes
boxes
,
classes
,
outer_boxes
=
SampledBoxesLayer
()(
boxes
,
classes
,
outer_boxes
=
SampledBoxesLayer
()(
inputs
,
valid_boxes
,
valid_classes
,
inputs
,
valid_outer_boxes
,
training
=
is_training
)
valid_boxes
,
valid_classes
,
instance_features
,
prior_masks
=
self
.
_shape_prior_head_fn
(
fpn_features
,
valid_outer_boxes
,
boxes
,
training
=
is_training
)
outer_boxes
,
classes
,
instance_features
,
prior_masks
=
self
.
_shape_prior_head_fn
(
is_training
)
fpn_features
,
boxes
,
outer_boxes
,
classes
,
is_training
)
coarse_mask_logits
=
self
.
_coarse_mask_fn
(
instance_features
,
coarse_mask_logits
=
self
.
_coarse_mask_fn
(
instance_features
,
prior_masks
,
prior_masks
,
classes
,
is_training
)
classes
,
fine_mask_logits
=
self
.
_fine_mask_fn
(
instance_features
,
coarse_mask_logits
,
is_training
)
classes
,
is_training
)
fine_mask_logits
=
self
.
_fine_mask_fn
(
instance_features
,
coarse_mask_logits
,
classes
,
is_training
)
model_outputs
=
{
model_outputs
=
{
'cls_outputs'
:
cls_outputs
,
'cls_outputs'
:
cls_outputs
,
...
@@ -177,18 +168,15 @@ class ShapeMaskModel(base_model.Model):
...
@@ -177,18 +168,15 @@ class ShapeMaskModel(base_model.Model):
labels
[
'num_positives'
])
labels
[
'num_positives'
])
# Adds Shapemask model losses.
# Adds Shapemask model losses.
shape_prior_loss
=
self
.
_shapemask_prior_loss_fn
(
shape_prior_loss
=
self
.
_shapemask_prior_loss_fn
(
outputs
[
'prior_masks'
],
outputs
[
'prior_masks'
],
labels
[
'mask_targets'
],
labels
[
'mask_targets'
],
labels
[
'mask_is_valid'
])
labels
[
'mask_is_valid'
])
coarse_mask_loss
=
self
.
_shapemask_loss_fn
(
outputs
[
'coarse_mask_logits'
],
coarse_mask_loss
=
self
.
_shapemask_loss_fn
(
labels
[
'mask_targets'
],
outputs
[
'coarse_mask_logits'
],
labels
[
'mask_is_valid'
])
labels
[
'mask_targets'
],
fine_mask_loss
=
self
.
_shapemask_loss_fn
(
outputs
[
'fine_mask_logits'
],
labels
[
'mask_is_valid'
])
labels
[
'fine_mask_targets'
],
fine_mask_loss
=
self
.
_shapemask_loss_fn
(
labels
[
'mask_is_valid'
])
outputs
[
'fine_mask_logits'
],
labels
[
'fine_mask_targets'
],
labels
[
'mask_is_valid'
])
model_loss
=
(
model_loss
=
(
cls_loss
+
self
.
_box_loss_weight
*
box_loss
+
cls_loss
+
self
.
_box_loss_weight
*
box_loss
+
...
@@ -222,43 +210,46 @@ class ShapeMaskModel(base_model.Model):
...
@@ -222,43 +210,46 @@ class ShapeMaskModel(base_model.Model):
if
is_training
:
if
is_training
:
batch_size
=
params
.
train
.
batch_size
batch_size
=
params
.
train
.
batch_size
input_layer
=
{
input_layer
=
{
'image'
:
tf
.
keras
.
layers
.
Input
(
'image'
:
shape
=
input_shape
,
tf
.
keras
.
layers
.
Input
(
batch_size
=
batch_size
,
shape
=
input_shape
,
name
=
'image'
,
batch_size
=
batch_size
,
dtype
=
tf
.
bfloat16
if
self
.
_use_bfloat16
else
tf
.
float32
),
name
=
'image'
,
'image_info'
:
tf
.
keras
.
layers
.
Input
(
dtype
=
tf
.
bfloat16
if
self
.
_use_bfloat16
else
tf
.
float32
),
shape
=
[
4
,
2
],
'image_info'
:
batch_size
=
batch_size
,
tf
.
keras
.
layers
.
Input
(
name
=
'image_info'
),
shape
=
[
4
,
2
],
batch_size
=
batch_size
,
name
=
'image_info'
),
'mask_classes'
:
tf
.
keras
.
layers
.
Input
(
'mask_classes'
:
shape
=
[
params
.
shapemask_parser
.
num_sampled_masks
],
tf
.
keras
.
layers
.
Input
(
batch_size
=
batch_size
,
shape
=
[
params
.
shapemask_parser
.
num_sampled_masks
],
name
=
'mask_classes'
,
batch_size
=
batch_size
,
dtype
=
tf
.
int64
),
name
=
'mask_classes'
,
'mask_outer_boxes'
:
tf
.
keras
.
layers
.
Input
(
dtype
=
tf
.
int64
),
shape
=
[
params
.
shapemask_parser
.
num_sampled_masks
,
4
],
'mask_outer_boxes'
:
batch_size
=
batch_size
,
tf
.
keras
.
layers
.
Input
(
name
=
'mask_outer_boxes'
,
shape
=
[
params
.
shapemask_parser
.
num_sampled_masks
,
4
],
dtype
=
tf
.
float32
),
batch_size
=
batch_size
,
'mask_boxes'
:
tf
.
keras
.
layers
.
Input
(
name
=
'mask_outer_boxes'
,
shape
=
[
params
.
shapemask_parser
.
num_sampled_masks
,
4
],
dtype
=
tf
.
float32
),
batch_size
=
batch_size
,
'mask_boxes'
:
name
=
'mask_boxes'
,
tf
.
keras
.
layers
.
Input
(
dtype
=
tf
.
float32
),
shape
=
[
params
.
shapemask_parser
.
num_sampled_masks
,
4
],
batch_size
=
batch_size
,
name
=
'mask_boxes'
,
dtype
=
tf
.
float32
),
}
}
else
:
else
:
batch_size
=
params
.
eval
.
batch_size
batch_size
=
params
.
eval
.
batch_size
input_layer
=
{
input_layer
=
{
'image'
:
tf
.
keras
.
layers
.
Input
(
'image'
:
shape
=
input_shape
,
tf
.
keras
.
layers
.
Input
(
batch_size
=
batch_siz
e
,
shape
=
input_shap
e
,
name
=
'image'
,
batch_size
=
batch_size
,
dtype
=
tf
.
bfloat16
if
self
.
_use_bfloat16
else
tf
.
float32
)
,
name
=
'image'
,
'image_info'
:
tf
.
keras
.
layers
.
Input
(
dtype
=
tf
.
bfloat16
if
self
.
_use_bfloat16
else
tf
.
float32
),
shape
=
[
4
,
2
],
'image_info'
:
batch_size
=
batch_size
,
tf
.
keras
.
layers
.
Input
(
name
=
'image_info'
),
shape
=
[
4
,
2
],
batch_size
=
batch_size
,
name
=
'image_info'
),
}
}
return
input_layer
return
input_layer
...
@@ -277,9 +268,10 @@ class ShapeMaskModel(base_model.Model):
...
@@ -277,9 +268,10 @@ class ShapeMaskModel(base_model.Model):
return
self
.
_keras_model
return
self
.
_keras_model
def
post_processing
(
self
,
labels
,
outputs
):
def
post_processing
(
self
,
labels
,
outputs
):
required_output_fields
=
[
'num_detections'
,
'detection_boxes'
,
required_output_fields
=
[
'detection_classes'
,
'detection_masks'
,
'num_detections'
,
'detection_boxes'
,
'detection_classes'
,
'detection_scores'
]
'detection_masks'
,
'detection_scores'
]
for
field
in
required_output_fields
:
for
field
in
required_output_fields
:
if
field
not
in
outputs
:
if
field
not
in
outputs
:
...
...
official/vision/detection/ops/nms.py
View file @
999fae62
...
@@ -22,7 +22,6 @@ import tensorflow as tf
...
@@ -22,7 +22,6 @@ import tensorflow as tf
from
official.vision.detection.utils
import
box_utils
from
official.vision.detection.utils
import
box_utils
NMS_TILE_SIZE
=
512
NMS_TILE_SIZE
=
512
...
@@ -106,9 +105,7 @@ def _suppression_loop_body(boxes, iou_threshold, output_size, idx):
...
@@ -106,9 +105,7 @@ def _suppression_loop_body(boxes, iou_threshold, output_size, idx):
return
boxes
,
iou_threshold
,
output_size
,
idx
+
1
return
boxes
,
iou_threshold
,
output_size
,
idx
+
1
def
sorted_non_max_suppression_padded
(
scores
,
def
sorted_non_max_suppression_padded
(
scores
,
boxes
,
max_output_size
,
boxes
,
max_output_size
,
iou_threshold
):
iou_threshold
):
"""A wrapper that handles non-maximum suppression.
"""A wrapper that handles non-maximum suppression.
...
@@ -177,19 +174,18 @@ def sorted_non_max_suppression_padded(scores,
...
@@ -177,19 +174,18 @@ def sorted_non_max_suppression_padded(scores,
idx
<
num_boxes
//
NMS_TILE_SIZE
)
idx
<
num_boxes
//
NMS_TILE_SIZE
)
selected_boxes
,
_
,
output_size
,
_
=
tf
.
while_loop
(
selected_boxes
,
_
,
output_size
,
_
=
tf
.
while_loop
(
_loop_cond
,
_suppression_loop_body
,
[
_loop_cond
,
_suppression_loop_body
,
boxes
,
iou_threshold
,
[
boxes
,
iou_threshold
,
tf
.
zeros
([
batch_size
],
tf
.
int32
),
tf
.
zeros
([
batch_size
],
tf
.
int32
),
tf
.
constant
(
0
)
tf
.
constant
(
0
)])
])
idx
=
num_boxes
-
tf
.
cast
(
idx
=
num_boxes
-
tf
.
cast
(
tf
.
nn
.
top_k
(
tf
.
nn
.
top_k
(
tf
.
cast
(
tf
.
reduce_any
(
selected_boxes
>
0
,
[
2
]),
tf
.
int32
)
*
tf
.
cast
(
tf
.
reduce_any
(
selected_boxes
>
0
,
[
2
]),
tf
.
int32
)
*
tf
.
expand_dims
(
tf
.
range
(
num_boxes
,
0
,
-
1
),
0
),
max_output_size
)[
0
],
tf
.
expand_dims
(
tf
.
range
(
num_boxes
,
0
,
-
1
),
0
),
max_output_size
)[
0
],
tf
.
int32
)
tf
.
int32
)
idx
=
tf
.
minimum
(
idx
,
num_boxes
-
1
)
idx
=
tf
.
minimum
(
idx
,
num_boxes
-
1
)
idx
=
tf
.
reshape
(
idx
=
tf
.
reshape
(
idx
+
tf
.
reshape
(
tf
.
range
(
batch_size
)
*
num_boxes
,
[
-
1
,
1
]),
idx
+
tf
.
reshape
(
tf
.
range
(
batch_size
)
*
num_boxes
,
[
-
1
,
1
]),
[
-
1
])
[
-
1
])
boxes
=
tf
.
reshape
(
boxes
=
tf
.
reshape
(
tf
.
gather
(
tf
.
reshape
(
boxes
,
[
-
1
,
4
]),
idx
),
tf
.
gather
(
tf
.
reshape
(
boxes
,
[
-
1
,
4
]),
idx
),
[
batch_size
,
max_output_size
,
4
])
[
batch_size
,
max_output_size
,
4
])
...
...
official/vision/detection/ops/postprocess_ops.py
View file @
999fae62
...
@@ -19,6 +19,7 @@ from __future__ import division
...
@@ -19,6 +19,7 @@ from __future__ import division
from
__future__
import
print_function
from
__future__
import
print_function
import
functools
import
functools
import
tensorflow
as
tf
import
tensorflow
as
tf
from
official.vision.detection.ops
import
nms
from
official.vision.detection.ops
import
nms
...
@@ -202,15 +203,14 @@ def _generate_detections_per_image(boxes,
...
@@ -202,15 +203,14 @@ def _generate_detections_per_image(boxes,
scores_i
,
k
=
tf
.
minimum
(
tf
.
shape
(
input
=
scores_i
)[
-
1
],
pre_nms_num_boxes
))
scores_i
,
k
=
tf
.
minimum
(
tf
.
shape
(
input
=
scores_i
)[
-
1
],
pre_nms_num_boxes
))
boxes_i
=
tf
.
gather
(
boxes_i
,
indices
)
boxes_i
=
tf
.
gather
(
boxes_i
,
indices
)
(
nmsed_indices_i
,
(
nmsed_indices_i
,
nmsed_num_valid_i
)
=
tf
.
image
.
non_max_suppression_padded
(
nmsed_num_valid_i
)
=
tf
.
image
.
non_max_suppression_padded
(
tf
.
cast
(
boxes_i
,
tf
.
float32
),
tf
.
cast
(
boxes_i
,
tf
.
float32
),
tf
.
cast
(
scores_i
,
tf
.
float32
),
tf
.
cast
(
scores_i
,
tf
.
float32
),
max_total_size
,
max_total_size
,
iou_threshold
=
nms_iou_threshold
,
iou_threshold
=
nms_iou_threshold
,
score_threshold
=
score_threshold
,
score_threshold
=
score_threshold
,
pad_to_max_output_size
=
True
,
pad_to_max_output_size
=
True
,
name
=
'nms_detections_'
+
str
(
i
))
name
=
'nms_detections_'
+
str
(
i
))
nmsed_boxes_i
=
tf
.
gather
(
boxes_i
,
nmsed_indices_i
)
nmsed_boxes_i
=
tf
.
gather
(
boxes_i
,
nmsed_indices_i
)
nmsed_scores_i
=
tf
.
gather
(
scores_i
,
nmsed_indices_i
)
nmsed_scores_i
=
tf
.
gather
(
scores_i
,
nmsed_indices_i
)
# Sets scores of invalid boxes to -1.
# Sets scores of invalid boxes to -1.
...
@@ -235,11 +235,8 @@ def _generate_detections_per_image(boxes,
...
@@ -235,11 +235,8 @@ def _generate_detections_per_image(boxes,
return
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
return
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
def
_generate_detections_batched
(
boxes
,
def
_generate_detections_batched
(
boxes
,
scores
,
max_total_size
,
scores
,
nms_iou_threshold
,
score_threshold
):
max_total_size
,
nms_iou_threshold
,
score_threshold
):
"""Generates detected boxes with scores and classes for one-stage detector.
"""Generates detected boxes with scores and classes for one-stage detector.
The function takes output of multi-level ConvNets and anchor boxes and
The function takes output of multi-level ConvNets and anchor boxes and
...
@@ -247,19 +244,20 @@ def _generate_detections_batched(boxes,
...
@@ -247,19 +244,20 @@ def _generate_detections_batched(boxes,
supported on TPU currently.
supported on TPU currently.
Args:
Args:
boxes: a tensor with shape [batch_size, N, num_classes, 4] or
boxes: a tensor with shape [batch_size, N, num_classes, 4] or
[batch_size,
[batch_size,
N, 1, 4], which box predictions on all feature levels. The N
N, 1, 4], which box predictions on all feature levels. The N
is the number
is the number
of total anchors on all levels.
of total anchors on all levels.
scores: a tensor with shape [batch_size, N, num_classes], which
scores: a tensor with shape [batch_size, N, num_classes], which
stacks class
stacks class
probability on all feature levels. The N is the number of
probability on all feature levels. The N is the number of
total anchors on
total anchors on
all levels. The num_classes is the number of classes
all levels. The num_classes is the number of classes
predicted by the
predicted by the
model. Note that the class_outputs here is the raw score.
model. Note that the class_outputs here is the raw score.
max_total_size: a scalar representing maximum number of boxes retained over
max_total_size: a scalar representing maximum number of boxes retained over
all classes.
all classes.
nms_iou_threshold: a float representing the threshold for deciding whether
nms_iou_threshold: a float representing the threshold for deciding whether
boxes overlap too much with respect to IOU.
boxes overlap too much with respect to IOU.
score_threshold: a float representing the threshold for deciding when to
score_threshold: a float representing the threshold for deciding when to
remove boxes based on score.
remove boxes based on score.
Returns:
Returns:
nms_boxes: `float` Tensor of shape [batch_size, max_total_size, 4]
nms_boxes: `float` Tensor of shape [batch_size, max_total_size, 4]
representing top detected boxes in [y1, x1, y2, x2].
representing top detected boxes in [y1, x1, y2, x2].
...
@@ -285,7 +283,8 @@ def _generate_detections_batched(boxes,
...
@@ -285,7 +283,8 @@ def _generate_detections_batched(boxes,
max_total_size
=
max_total_size
,
max_total_size
=
max_total_size
,
iou_threshold
=
nms_iou_threshold
,
iou_threshold
=
nms_iou_threshold
,
score_threshold
=
score_threshold
,
score_threshold
=
score_threshold
,
pad_per_class
=
False
,)
pad_per_class
=
False
,
)
# De-normalizes box cooridinates.
# De-normalizes box cooridinates.
nmsed_boxes
*=
normalizer
nmsed_boxes
*=
normalizer
nmsed_classes
=
tf
.
cast
(
nmsed_classes
,
tf
.
int32
)
nmsed_classes
=
tf
.
cast
(
nmsed_classes
,
tf
.
int32
)
...
@@ -382,16 +381,13 @@ class GenericDetectionGenerator(object):
...
@@ -382,16 +381,13 @@ class GenericDetectionGenerator(object):
box_outputs
=
tf
.
reshape
(
box_outputs
=
tf
.
reshape
(
box_outputs
,
box_outputs
,
tf
.
stack
([
batch_size
,
num_locations
,
num_classes
,
4
],
axis
=-
1
))
tf
.
stack
([
batch_size
,
num_locations
,
num_classes
,
4
],
axis
=-
1
))
box_outputs
=
tf
.
slice
(
box_outputs
=
tf
.
slice
(
box_outputs
,
[
0
,
0
,
1
,
0
],
[
-
1
,
-
1
,
-
1
,
-
1
])
box_outputs
,
[
0
,
0
,
1
,
0
],
[
-
1
,
-
1
,
-
1
,
-
1
])
anchor_boxes
=
tf
.
tile
(
anchor_boxes
=
tf
.
tile
(
tf
.
expand_dims
(
anchor_boxes
,
axis
=
2
),
[
1
,
1
,
num_classes
-
1
,
1
])
tf
.
expand_dims
(
anchor_boxes
,
axis
=
2
),
[
1
,
1
,
num_classes
-
1
,
1
])
box_outputs
=
tf
.
reshape
(
box_outputs
=
tf
.
reshape
(
box_outputs
,
box_outputs
,
tf
.
stack
([
batch_size
,
num_detections
,
4
],
axis
=-
1
))
tf
.
stack
([
batch_size
,
num_detections
,
4
],
axis
=-
1
))
anchor_boxes
=
tf
.
reshape
(
anchor_boxes
=
tf
.
reshape
(
anchor_boxes
,
anchor_boxes
,
tf
.
stack
([
batch_size
,
num_detections
,
4
],
axis
=-
1
))
tf
.
stack
([
batch_size
,
num_detections
,
4
],
axis
=-
1
))
# Box decoding.
# Box decoding.
decoded_boxes
=
box_utils
.
decode_boxes
(
decoded_boxes
=
box_utils
.
decode_boxes
(
...
...
official/vision/detection/ops/roi_ops.py
View file @
999fae62
...
@@ -56,8 +56,8 @@ def multilevel_propose_rois(rpn_boxes,
...
@@ -56,8 +56,8 @@ def multilevel_propose_rois(rpn_boxes,
rpn_scores: a dict with keys representing FPN levels and values representing
rpn_scores: a dict with keys representing FPN levels and values representing
logit tensors of shape [batch_size, feature_h, feature_w, num_anchors].
logit tensors of shape [batch_size, feature_h, feature_w, num_anchors].
anchor_boxes: a dict with keys representing FPN levels and values
anchor_boxes: a dict with keys representing FPN levels and values
representing anchor box tensors of shape
representing anchor box tensors of shape
[batch_size, feature_h,
[batch_size, feature_h,
feature_w, num_anchors * 4].
feature_w, num_anchors * 4].
image_shape: a tensor of shape [batch_size, 2] where the last dimension are
image_shape: a tensor of shape [batch_size, 2] where the last dimension are
[height, width] of the scaled image.
[height, width] of the scaled image.
rpn_pre_nms_top_k: an integer of top scoring RPN proposals *per level* to
rpn_pre_nms_top_k: an integer of top scoring RPN proposals *per level* to
...
@@ -112,17 +112,14 @@ def multilevel_propose_rois(rpn_boxes,
...
@@ -112,17 +112,14 @@ def multilevel_propose_rois(rpn_boxes,
this_level_scores
=
tf
.
sigmoid
(
this_level_scores
)
this_level_scores
=
tf
.
sigmoid
(
this_level_scores
)
if
decode_boxes
:
if
decode_boxes
:
this_level_boxes
=
box_utils
.
decode_boxes
(
this_level_boxes
=
box_utils
.
decode_boxes
(
this_level_boxes
,
this_level_boxes
,
this_level_anchors
)
this_level_anchors
)
if
clip_boxes
:
if
clip_boxes
:
this_level_boxes
=
box_utils
.
clip_boxes
(
this_level_boxes
=
box_utils
.
clip_boxes
(
this_level_boxes
,
image_shape
)
this_level_boxes
,
image_shape
)
if
rpn_min_size_threshold
>
0.0
:
if
rpn_min_size_threshold
>
0.0
:
this_level_boxes
,
this_level_scores
=
box_utils
.
filter_boxes
(
this_level_boxes
,
this_level_scores
=
box_utils
.
filter_boxes
(
this_level_boxes
,
this_level_boxes
,
this_level_scores
,
image_shape
,
this_level_scores
,
image_shape
,
rpn_min_size_threshold
)
rpn_min_size_threshold
)
this_level_pre_nms_top_k
=
min
(
num_boxes
,
rpn_pre_nms_top_k
)
this_level_pre_nms_top_k
=
min
(
num_boxes
,
rpn_pre_nms_top_k
)
...
@@ -142,8 +139,9 @@ def multilevel_propose_rois(rpn_boxes,
...
@@ -142,8 +139,9 @@ def multilevel_propose_rois(rpn_boxes,
else
:
else
:
if
rpn_score_threshold
>
0.0
:
if
rpn_score_threshold
>
0.0
:
this_level_boxes
,
this_level_scores
=
(
this_level_boxes
,
this_level_scores
=
(
box_utils
.
filter_boxes_by_scores
(
box_utils
.
filter_boxes_by_scores
(
this_level_boxes
,
this_level_boxes
,
this_level_scores
,
rpn_score_threshold
))
this_level_scores
,
rpn_score_threshold
))
this_level_boxes
,
this_level_scores
=
box_utils
.
top_k_boxes
(
this_level_boxes
,
this_level_scores
=
box_utils
.
top_k_boxes
(
this_level_boxes
,
this_level_scores
,
k
=
this_level_pre_nms_top_k
)
this_level_boxes
,
this_level_scores
,
k
=
this_level_pre_nms_top_k
)
this_level_roi_scores
,
this_level_rois
=
(
this_level_roi_scores
,
this_level_rois
=
(
...
@@ -154,9 +152,7 @@ def multilevel_propose_rois(rpn_boxes,
...
@@ -154,9 +152,7 @@ def multilevel_propose_rois(rpn_boxes,
iou_threshold
=
rpn_nms_threshold
))
iou_threshold
=
rpn_nms_threshold
))
else
:
else
:
this_level_rois
,
this_level_roi_scores
=
box_utils
.
top_k_boxes
(
this_level_rois
,
this_level_roi_scores
=
box_utils
.
top_k_boxes
(
this_level_rois
,
this_level_rois
,
this_level_scores
,
k
=
this_level_post_nms_top_k
)
this_level_scores
,
k
=
this_level_post_nms_top_k
)
rois
.
append
(
this_level_rois
)
rois
.
append
(
this_level_rois
)
roi_scores
.
append
(
this_level_roi_scores
)
roi_scores
.
append
(
this_level_roi_scores
)
...
@@ -199,8 +195,8 @@ class ROIGenerator(object):
...
@@ -199,8 +195,8 @@ class ROIGenerator(object):
scores: a dict with keys representing FPN levels and values representing
scores: a dict with keys representing FPN levels and values representing
logit tensors of shape [batch_size, feature_h, feature_w, num_anchors].
logit tensors of shape [batch_size, feature_h, feature_w, num_anchors].
anchor_boxes: a dict with keys representing FPN levels and values
anchor_boxes: a dict with keys representing FPN levels and values
representing anchor box tensors of shape
representing anchor box tensors of shape
[batch_size, feature_h,
[batch_size, feature_h,
feature_w, num_anchors * 4].
feature_w, num_anchors * 4].
image_shape: a tensor of shape [batch_size, 2] where the last dimension
image_shape: a tensor of shape [batch_size, 2] where the last dimension
are [height, width] of the scaled image.
are [height, width] of the scaled image.
is_training: a bool indicating whether it is in training or inference
is_training: a bool indicating whether it is in training or inference
...
@@ -220,16 +216,16 @@ class ROIGenerator(object):
...
@@ -220,16 +216,16 @@ class ROIGenerator(object):
scores
,
scores
,
anchor_boxes
,
anchor_boxes
,
image_shape
,
image_shape
,
rpn_pre_nms_top_k
=
(
self
.
_rpn_pre_nms_top_k
if
is_training
rpn_pre_nms_top_k
=
(
self
.
_rpn_pre_nms_top_k
else
self
.
_test_rpn_pre_nms_top_k
),
if
is_training
else
self
.
_test_rpn_pre_nms_top_k
),
rpn_post_nms_top_k
=
(
self
.
_rpn_post_nms_top_k
if
is_training
rpn_post_nms_top_k
=
(
self
.
_rpn_post_nms_top_k
else
self
.
_test_rpn_post_nms_top_k
),
if
is_training
else
self
.
_test_rpn_post_nms_top_k
),
rpn_nms_threshold
=
(
self
.
_rpn_nms_threshold
if
is_training
rpn_nms_threshold
=
(
self
.
_rpn_nms_threshold
else
self
.
_test_rpn_nms_threshold
),
if
is_training
else
self
.
_test_rpn_nms_threshold
),
rpn_score_threshold
=
(
self
.
_rpn_score_threshold
if
is_training
rpn_score_threshold
=
(
self
.
_rpn_score_threshold
if
is_training
else
else
self
.
_test_rpn_score_threshold
),
self
.
_test_rpn_score_threshold
),
rpn_min_size_threshold
=
(
self
.
_rpn_min_size_threshold
if
is_training
rpn_min_size_threshold
=
(
self
.
_rpn_min_size_threshold
if
is_training
else
else
self
.
_test_rpn_min_size_threshold
),
self
.
_test_rpn_min_size_threshold
),
decode_boxes
=
True
,
decode_boxes
=
True
,
clip_boxes
=
True
,
clip_boxes
=
True
,
use_batched_nms
=
self
.
_use_batched_nms
,
use_batched_nms
=
self
.
_use_batched_nms
,
...
...
official/vision/detection/ops/spatial_transform_ops.py
View file @
999fae62
...
@@ -20,7 +20,6 @@ from __future__ import print_function
...
@@ -20,7 +20,6 @@ from __future__ import print_function
import
tensorflow
as
tf
import
tensorflow
as
tf
_EPSILON
=
1e-8
_EPSILON
=
1e-8
...
@@ -30,6 +29,7 @@ def nearest_upsampling(data, scale):
...
@@ -30,6 +29,7 @@ def nearest_upsampling(data, scale):
Args:
Args:
data: A tensor with a shape of [batch, height_in, width_in, channels].
data: A tensor with a shape of [batch, height_in, width_in, channels].
scale: An integer multiple to scale resolution of input data.
scale: An integer multiple to scale resolution of input data.
Returns:
Returns:
data_up: A tensor with a shape of
data_up: A tensor with a shape of
[batch, height_in*scale, width_in*scale, channels]. Same dtype as input
[batch, height_in*scale, width_in*scale, channels]. Same dtype as input
...
@@ -382,8 +382,7 @@ def multilevel_crop_and_resize(features, boxes, output_size=7):
...
@@ -382,8 +382,7 @@ def multilevel_crop_and_resize(features, boxes, output_size=7):
areas_sqrt
=
tf
.
sqrt
(
box_height
*
box_width
)
areas_sqrt
=
tf
.
sqrt
(
box_height
*
box_width
)
levels
=
tf
.
cast
(
levels
=
tf
.
cast
(
tf
.
math
.
floordiv
(
tf
.
math
.
floordiv
(
tf
.
math
.
log
(
tf
.
divide
(
areas_sqrt
,
224.0
)),
tf
.
math
.
log
(
2.0
))
+
tf
.
math
.
log
(
tf
.
divide
(
areas_sqrt
,
224.0
)),
tf
.
math
.
log
(
2.0
))
+
4.0
,
4.0
,
dtype
=
tf
.
int32
)
dtype
=
tf
.
int32
)
# Maps levels between [min_level, max_level].
# Maps levels between [min_level, max_level].
levels
=
tf
.
minimum
(
max_level
,
tf
.
maximum
(
levels
,
min_level
))
levels
=
tf
.
minimum
(
max_level
,
tf
.
maximum
(
levels
,
min_level
))
...
@@ -395,9 +394,12 @@ def multilevel_crop_and_resize(features, boxes, output_size=7):
...
@@ -395,9 +394,12 @@ def multilevel_crop_and_resize(features, boxes, output_size=7):
boxes
/=
tf
.
expand_dims
(
scale_to_level
,
axis
=
2
)
boxes
/=
tf
.
expand_dims
(
scale_to_level
,
axis
=
2
)
box_width
/=
scale_to_level
box_width
/=
scale_to_level
box_height
/=
scale_to_level
box_height
/=
scale_to_level
boxes
=
tf
.
concat
([
boxes
[:,
:,
0
:
2
],
boxes
=
tf
.
concat
([
tf
.
expand_dims
(
box_height
,
-
1
),
boxes
[:,
:,
0
:
2
],
tf
.
expand_dims
(
box_width
,
-
1
)],
axis
=-
1
)
tf
.
expand_dims
(
box_height
,
-
1
),
tf
.
expand_dims
(
box_width
,
-
1
)
],
axis
=-
1
)
# Maps levels to [0, max_level-min_level].
# Maps levels to [0, max_level-min_level].
levels
-=
min_level
levels
-=
min_level
...
@@ -464,12 +466,12 @@ def single_level_feature_crop(features, level_boxes, detection_prior_levels,
...
@@ -464,12 +466,12 @@ def single_level_feature_crop(features, level_boxes, detection_prior_levels,
Args:
Args:
features: a float tensor of shape [batch_size, num_levels,
features: a float tensor of shape [batch_size, num_levels,
max_feature_size,
max_feature_size,
max_feature_size,
num_downsample_channels].
max_feature_size, num_downsample_channels].
level_boxes: a float Tensor of the level boxes to crop from.
level_boxes: a float Tensor of the level boxes to crop from.
[batch_size,
[batch_size,
num_instances, 4].
num_instances, 4].
detection_prior_levels: an int Tensor of instance assigned level of shape
detection_prior_levels: an int Tensor of instance assigned level of shape
[batch_size, num_instances].
[batch_size, num_instances].
min_mask_level: minimum FPN level to crop mask feature from.
min_mask_level: minimum FPN level to crop mask feature from.
mask_crop_size: an int of mask crop size.
mask_crop_size: an int of mask crop size.
...
@@ -478,8 +480,8 @@ def single_level_feature_crop(features, level_boxes, detection_prior_levels,
...
@@ -478,8 +480,8 @@ def single_level_feature_crop(features, level_boxes, detection_prior_levels,
mask_crop_size, mask_crop_size, num_downsample_channels]. This is the
mask_crop_size, mask_crop_size, num_downsample_channels]. This is the
instance feature crop.
instance feature crop.
"""
"""
(
batch_size
,
num_levels
,
max_feature_size
,
(
batch_size
,
num_levels
,
max_feature_size
,
_
,
_
,
num_downsample_channels
)
=
features
.
get_shape
().
as_list
()
num_downsample_channels
)
=
features
.
get_shape
().
as_list
()
_
,
num_of_instances
,
_
=
level_boxes
.
get_shape
().
as_list
()
_
,
num_of_instances
,
_
=
level_boxes
.
get_shape
().
as_list
()
level_boxes
=
tf
.
cast
(
level_boxes
,
tf
.
int32
)
level_boxes
=
tf
.
cast
(
level_boxes
,
tf
.
int32
)
assert
num_of_instances
==
detection_prior_levels
.
get_shape
().
as_list
()[
1
]
assert
num_of_instances
==
detection_prior_levels
.
get_shape
().
as_list
()[
1
]
...
@@ -503,32 +505,25 @@ def single_level_feature_crop(features, level_boxes, detection_prior_levels,
...
@@ -503,32 +505,25 @@ def single_level_feature_crop(features, level_boxes, detection_prior_levels,
indices
=
tf
.
reshape
(
indices
=
tf
.
reshape
(
tf
.
tile
(
tf
.
tile
(
tf
.
reshape
(
tf
.
reshape
(
tf
.
range
(
batch_size
)
*
batch_dim_size
,
tf
.
range
(
batch_size
)
*
batch_dim_size
,
[
batch_size
,
1
,
1
,
1
]),
[
batch_size
,
1
,
1
,
1
]),
[
1
,
num_of_instances
,
mask_crop_size
,
mask_crop_size
])
+
tf
.
tile
(
[
1
,
num_of_instances
,
tf
.
reshape
(
levels
*
level_dim_size
,
mask_crop_size
,
mask_crop_size
])
+
[
batch_size
,
num_of_instances
,
1
,
1
]),
tf
.
tile
(
[
1
,
1
,
mask_crop_size
,
mask_crop_size
])
+
tf
.
tile
(
tf
.
reshape
(
levels
*
level_dim_size
,
tf
.
reshape
(
y_indices
*
height_dim_size
,
[
batch_size
,
num_of_instances
,
1
,
1
]),
[
batch_size
,
num_of_instances
,
mask_crop_size
,
1
]),
[
1
,
1
,
mask_crop_size
,
mask_crop_size
])
+
[
1
,
1
,
1
,
mask_crop_size
])
+
tf
.
tile
(
tf
.
reshape
(
y_indices
*
height_dim_size
,
[
batch_size
,
num_of_instances
,
mask_crop_size
,
1
]),
[
1
,
1
,
1
,
mask_crop_size
])
+
tf
.
tile
(
tf
.
tile
(
tf
.
reshape
(
x_indices
,
tf
.
reshape
(
x_indices
,
[
batch_size
,
num_of_instances
,
[
batch_size
,
num_of_instances
,
1
,
mask_crop_size
]),
1
,
mask_crop_size
]),
[
1
,
1
,
mask_crop_size
,
1
]),
[
-
1
])
[
1
,
1
,
mask_crop_size
,
1
]),
[
-
1
])
features_r2
=
tf
.
reshape
(
features
,
features_r2
=
tf
.
reshape
(
features
,
[
-
1
,
num_downsample_channels
])
[
-
1
,
num_downsample_channels
])
crop_features
=
tf
.
reshape
(
crop_features
=
tf
.
reshape
(
tf
.
gather
(
features_r2
,
indices
),
tf
.
gather
(
features_r2
,
indices
),
[
[
batch_size
*
num_of_instances
,
batch_size
*
num_of_instances
,
mask_crop_size
,
mask_crop_size
,
mask_crop_size
,
mask_crop_size
,
num_downsample_channels
num_downsample_channels
])
])
return
crop_features
return
crop_features
...
@@ -546,9 +541,9 @@ def crop_mask_in_target_box(masks,
...
@@ -546,9 +541,9 @@ def crop_mask_in_target_box(masks,
boxes: a float tensor representing box cooridnates that tightly enclose
boxes: a float tensor representing box cooridnates that tightly enclose
masks with a shape of [batch_size, num_masks, 4] in un-normalized
masks with a shape of [batch_size, num_masks, 4] in un-normalized
coordinates. A box is represented by [ymin, xmin, ymax, xmax].
coordinates. A box is represented by [ymin, xmin, ymax, xmax].
target_boxes: a float tensor representing target box cooridnates for
target_boxes: a float tensor representing target box cooridnates for
masks
masks
with a shape of [batch_size, num_masks, 4] in un-normalized
with a shape of [batch_size, num_masks, 4] in un-normalized
coordinates. A
coordinates. A
box is represented by [ymin, xmin, ymax, xmax].
box is represented by [ymin, xmin, ymax, xmax].
output_size: A scalar to indicate the output crop size. It currently only
output_size: A scalar to indicate the output crop size. It currently only
supports to output a square shape outputs.
supports to output a square shape outputs.
sample_offset: a float number in [0, 1] indicates the subpixel sample offset
sample_offset: a float number in [0, 1] indicates the subpixel sample offset
...
@@ -561,10 +556,10 @@ def crop_mask_in_target_box(masks,
...
@@ -561,10 +556,10 @@ def crop_mask_in_target_box(masks,
"""
"""
with
tf
.
name_scope
(
'crop_mask_in_target_box'
):
with
tf
.
name_scope
(
'crop_mask_in_target_box'
):
batch_size
,
num_masks
,
height
,
width
=
masks
.
get_shape
().
as_list
()
batch_size
,
num_masks
,
height
,
width
=
masks
.
get_shape
().
as_list
()
masks
=
tf
.
reshape
(
masks
,
[
batch_size
*
num_masks
,
height
,
width
,
1
])
masks
=
tf
.
reshape
(
masks
,
[
batch_size
*
num_masks
,
height
,
width
,
1
])
# Pad zeros on the boundary of masks.
# Pad zeros on the boundary of masks.
masks
=
tf
.
image
.
pad_to_bounding_box
(
masks
,
2
,
2
,
height
+
4
,
width
+
4
)
masks
=
tf
.
image
.
pad_to_bounding_box
(
masks
,
2
,
2
,
height
+
4
,
width
+
4
)
masks
=
tf
.
reshape
(
masks
,
[
batch_size
,
num_masks
,
height
+
4
,
width
+
4
,
1
])
masks
=
tf
.
reshape
(
masks
,
[
batch_size
,
num_masks
,
height
+
4
,
width
+
4
,
1
])
# Projects target box locations and sizes to corresponding cropped
# Projects target box locations and sizes to corresponding cropped
# mask coordinates.
# mask coordinates.
...
@@ -572,10 +567,10 @@ def crop_mask_in_target_box(masks,
...
@@ -572,10 +567,10 @@ def crop_mask_in_target_box(masks,
value
=
boxes
,
num_or_size_splits
=
4
,
axis
=
2
)
value
=
boxes
,
num_or_size_splits
=
4
,
axis
=
2
)
bb_y_min
,
bb_x_min
,
bb_y_max
,
bb_x_max
=
tf
.
split
(
bb_y_min
,
bb_x_min
,
bb_y_max
,
bb_x_max
=
tf
.
split
(
value
=
target_boxes
,
num_or_size_splits
=
4
,
axis
=
2
)
value
=
target_boxes
,
num_or_size_splits
=
4
,
axis
=
2
)
y_transform
=
(
bb_y_min
-
gt_y_min
)
*
height
/
(
y_transform
=
(
bb_y_min
-
gt_y_min
)
*
height
/
(
gt_y_max
-
gt_y_min
+
gt_y_max
-
gt_y_min
+
_EPSILON
)
+
2
_EPSILON
)
+
2
x_transform
=
(
bb_x_min
-
gt_x_min
)
*
height
/
(
x_transform
=
(
bb_x_min
-
gt_x_min
)
*
height
/
(
gt_x_max
-
gt_x_min
+
gt_x_max
-
gt_x_min
+
_EPSILON
)
+
2
_EPSILON
)
+
2
h_transform
=
(
bb_y_max
-
bb_y_min
)
*
width
/
(
h_transform
=
(
bb_y_max
-
bb_y_min
)
*
width
/
(
gt_y_max
-
gt_y_min
+
_EPSILON
)
gt_y_max
-
gt_y_min
+
_EPSILON
)
w_transform
=
(
bb_x_max
-
bb_x_min
)
*
width
/
(
w_transform
=
(
bb_x_max
-
bb_x_min
)
*
width
/
(
...
@@ -592,8 +587,8 @@ def crop_mask_in_target_box(masks,
...
@@ -592,8 +587,8 @@ def crop_mask_in_target_box(masks,
# Reshape tensors to have the right shape for selective_crop_and_resize.
# Reshape tensors to have the right shape for selective_crop_and_resize.
trasnformed_boxes
=
tf
.
concat
(
trasnformed_boxes
=
tf
.
concat
(
[
y_transform
,
x_transform
,
h_transform
,
w_transform
],
-
1
)
[
y_transform
,
x_transform
,
h_transform
,
w_transform
],
-
1
)
levels
=
tf
.
tile
(
tf
.
reshape
(
tf
.
range
(
num_masks
),
[
1
,
num_masks
]),
levels
=
tf
.
tile
(
[
batch_size
,
1
])
tf
.
reshape
(
tf
.
range
(
num_masks
),
[
1
,
num_masks
]),
[
batch_size
,
1
])
cropped_masks
=
selective_crop_and_resize
(
cropped_masks
=
selective_crop_and_resize
(
masks
,
masks
,
...
...
official/vision/detection/ops/target_ops.py
View file @
999fae62
...
@@ -87,18 +87,16 @@ def box_matching(boxes, gt_boxes, gt_classes):
...
@@ -87,18 +87,16 @@ def box_matching(boxes, gt_boxes, gt_classes):
matched_gt_boxes
)
matched_gt_boxes
)
matched_gt_classes
=
tf
.
gather_nd
(
gt_classes
,
gather_nd_indices
)
matched_gt_classes
=
tf
.
gather_nd
(
gt_classes
,
gather_nd_indices
)
matched_gt_classes
=
tf
.
where
(
matched_gt_classes
=
tf
.
where
(
background_box_mask
,
background_box_mask
,
tf
.
zeros_like
(
matched_gt_classes
),
tf
.
zeros_like
(
matched_gt_classes
),
matched_gt_classes
)
matched_gt_classes
)
matched_gt_indices
=
tf
.
where
(
matched_gt_indices
=
tf
.
where
(
background_box_mask
,
background_box_mask
,
-
tf
.
ones_like
(
argmax_iou_indices
),
-
tf
.
ones_like
(
argmax_iou_indices
),
argmax_iou_indices
)
argmax_iou_indices
)
return
(
matched_gt_boxes
,
matched_gt_classes
,
matched_gt_indices
,
return
(
matched_gt_boxes
,
matched_gt_classes
,
matched_gt_indices
,
matched_iou
,
matched_iou
,
iou
)
iou
)
def
assign_and_sample_proposals
(
proposed_boxes
,
def
assign_and_sample_proposals
(
proposed_boxes
,
...
@@ -121,22 +119,21 @@ def assign_and_sample_proposals(proposed_boxes,
...
@@ -121,22 +119,21 @@ def assign_and_sample_proposals(proposed_boxes,
returns box_targets, class_targets, and RoIs.
returns box_targets, class_targets, and RoIs.
Args:
Args:
proposed_boxes: a tensor of shape of [batch_size, N, 4]. N is the number
proposed_boxes: a tensor of shape of [batch_size, N, 4]. N is the number of
of proposals before groundtruth assignment. The last dimension is the
proposals before groundtruth assignment. The last dimension is the box
box coordinates w.r.t. the scaled images in [ymin, xmin, ymax, xmax]
coordinates w.r.t. the scaled images in [ymin, xmin, ymax, xmax] format.
format.
gt_boxes: a tensor of shape of [batch_size, MAX_NUM_INSTANCES, 4]. The
gt_boxes: a tensor of shape of [batch_size, MAX_NUM_INSTANCES, 4].
coordinates of gt_boxes are in the pixel coordinates of the scaled image.
The coordinates of gt_boxes are in the pixel coordinates of the scaled
This tensor might have padding of values -1 indicating the invalid box
image. This tensor might have padding of values -1 indicating the invalid
coordinates.
box coordinates.
gt_classes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This
gt_classes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This
tensor might have paddings with values of -1 indicating the invalid
tensor might have paddings with values of -1 indicating the invalid
classes.
classes.
num_samples_per_image: a integer represents RoI minibatch size per image.
num_samples_per_image: a integer represents RoI minibatch size per image.
mix_gt_boxes: a bool indicating whether to mix the groundtruth boxes before
mix_gt_boxes: a bool indicating whether to mix the groundtruth boxes before
sampling proposals.
sampling proposals.
fg_fraction: a float represents the target fraction of RoI minibatch that
fg_fraction: a float represents the target fraction of RoI minibatch that
is
is
labeled foreground (i.e., class > 0).
labeled foreground (i.e., class > 0).
fg_iou_thresh: a float represents the IoU overlap threshold for an RoI to be
fg_iou_thresh: a float represents the IoU overlap threshold for an RoI to be
considered foreground (if >= fg_iou_thresh).
considered foreground (if >= fg_iou_thresh).
bg_iou_thresh_hi: a float represents the IoU overlap threshold for an RoI to
bg_iou_thresh_hi: a float represents the IoU overlap threshold for an RoI to
...
@@ -163,8 +160,8 @@ def assign_and_sample_proposals(proposed_boxes,
...
@@ -163,8 +160,8 @@ def assign_and_sample_proposals(proposed_boxes,
else
:
else
:
boxes
=
proposed_boxes
boxes
=
proposed_boxes
(
matched_gt_boxes
,
matched_gt_classes
,
matched_gt_indices
,
(
matched_gt_boxes
,
matched_gt_classes
,
matched_gt_indices
,
matched_iou
,
matched_iou
,
_
)
=
box_matching
(
boxes
,
gt_boxes
,
gt_classes
)
_
)
=
box_matching
(
boxes
,
gt_boxes
,
gt_classes
)
positive_match
=
tf
.
greater
(
matched_iou
,
fg_iou_thresh
)
positive_match
=
tf
.
greater
(
matched_iou
,
fg_iou_thresh
)
negative_match
=
tf
.
logical_and
(
negative_match
=
tf
.
logical_and
(
...
@@ -173,10 +170,12 @@ def assign_and_sample_proposals(proposed_boxes,
...
@@ -173,10 +170,12 @@ def assign_and_sample_proposals(proposed_boxes,
ignored_match
=
tf
.
less
(
matched_iou
,
0.0
)
ignored_match
=
tf
.
less
(
matched_iou
,
0.0
)
# re-assign negatively matched boxes to the background class.
# re-assign negatively matched boxes to the background class.
matched_gt_classes
=
tf
.
where
(
matched_gt_classes
=
tf
.
where
(
negative_match
,
negative_match
,
tf
.
zeros_like
(
matched_gt_classes
),
matched_gt_classes
)
tf
.
zeros_like
(
matched_gt_classes
),
matched_gt_indices
=
tf
.
where
(
matched_gt_classes
)
negative_match
,
tf
.
zeros_like
(
matched_gt_indices
),
matched_gt_indices
)
matched_gt_indices
=
tf
.
where
(
negative_match
,
tf
.
zeros_like
(
matched_gt_indices
),
matched_gt_indices
)
sample_candidates
=
tf
.
logical_and
(
sample_candidates
=
tf
.
logical_and
(
tf
.
logical_or
(
positive_match
,
negative_match
),
tf
.
logical_or
(
positive_match
,
negative_match
),
...
@@ -189,8 +188,9 @@ def assign_and_sample_proposals(proposed_boxes,
...
@@ -189,8 +188,9 @@ def assign_and_sample_proposals(proposed_boxes,
batch_size
,
_
=
sample_candidates
.
get_shape
().
as_list
()
batch_size
,
_
=
sample_candidates
.
get_shape
().
as_list
()
sampled_indicators
=
[]
sampled_indicators
=
[]
for
i
in
range
(
batch_size
):
for
i
in
range
(
batch_size
):
sampled_indicator
=
sampler
.
subsample
(
sampled_indicator
=
sampler
.
subsample
(
sample_candidates
[
i
],
sample_candidates
[
i
],
num_samples_per_image
,
positive_match
[
i
])
num_samples_per_image
,
positive_match
[
i
])
sampled_indicators
.
append
(
sampled_indicator
)
sampled_indicators
.
append
(
sampled_indicator
)
sampled_indicators
=
tf
.
stack
(
sampled_indicators
)
sampled_indicators
=
tf
.
stack
(
sampled_indicators
)
_
,
sampled_indices
=
tf
.
nn
.
top_k
(
_
,
sampled_indices
=
tf
.
nn
.
top_k
(
...
@@ -206,10 +206,8 @@ def assign_and_sample_proposals(proposed_boxes,
...
@@ -206,10 +206,8 @@ def assign_and_sample_proposals(proposed_boxes,
sampled_rois
=
tf
.
gather_nd
(
boxes
,
gather_nd_indices
)
sampled_rois
=
tf
.
gather_nd
(
boxes
,
gather_nd_indices
)
sampled_gt_boxes
=
tf
.
gather_nd
(
matched_gt_boxes
,
gather_nd_indices
)
sampled_gt_boxes
=
tf
.
gather_nd
(
matched_gt_boxes
,
gather_nd_indices
)
sampled_gt_classes
=
tf
.
gather_nd
(
sampled_gt_classes
=
tf
.
gather_nd
(
matched_gt_classes
,
gather_nd_indices
)
matched_gt_classes
,
gather_nd_indices
)
sampled_gt_indices
=
tf
.
gather_nd
(
matched_gt_indices
,
gather_nd_indices
)
sampled_gt_indices
=
tf
.
gather_nd
(
matched_gt_indices
,
gather_nd_indices
)
return
(
sampled_rois
,
sampled_gt_boxes
,
sampled_gt_classes
,
return
(
sampled_rois
,
sampled_gt_boxes
,
sampled_gt_classes
,
sampled_gt_indices
)
sampled_gt_indices
)
...
@@ -237,8 +235,8 @@ def sample_and_crop_foreground_masks(candidate_rois,
...
@@ -237,8 +235,8 @@ def sample_and_crop_foreground_masks(candidate_rois,
candidate_gt_indices: a tensor of shape [batch_size, N], storing the
candidate_gt_indices: a tensor of shape [batch_size, N], storing the
corresponding groundtruth instance indices to the `candidate_gt_boxes`,
corresponding groundtruth instance indices to the `candidate_gt_boxes`,
i.e. gt_boxes[candidate_gt_indices[:, i]] = candidate_gt_boxes[:, i] and
i.e. gt_boxes[candidate_gt_indices[:, i]] = candidate_gt_boxes[:, i] and
gt_boxes which is of shape [batch_size, MAX_INSTANCES, 4], M >= N, is
the
gt_boxes which is of shape [batch_size, MAX_INSTANCES, 4], M >= N, is
superset of candidate_gt_boxes.
the
superset of candidate_gt_boxes.
gt_masks: a tensor of [batch_size, MAX_INSTANCES, mask_height, mask_width]
gt_masks: a tensor of [batch_size, MAX_INSTANCES, mask_height, mask_width]
containing all the groundtruth masks which sample masks are drawn from.
containing all the groundtruth masks which sample masks are drawn from.
num_mask_samples_per_image: an integer which specifies the number of masks
num_mask_samples_per_image: an integer which specifies the number of masks
...
@@ -266,27 +264,29 @@ def sample_and_crop_foreground_masks(candidate_rois,
...
@@ -266,27 +264,29 @@ def sample_and_crop_foreground_masks(candidate_rois,
tf
.
expand_dims
(
tf
.
range
(
fg_instance_indices_shape
[
0
]),
axis
=-
1
)
*
tf
.
expand_dims
(
tf
.
range
(
fg_instance_indices_shape
[
0
]),
axis
=-
1
)
*
tf
.
ones
([
1
,
fg_instance_indices_shape
[
-
1
]],
dtype
=
tf
.
int32
))
tf
.
ones
([
1
,
fg_instance_indices_shape
[
-
1
]],
dtype
=
tf
.
int32
))
gather_nd_instance_indices
=
tf
.
stack
(
gather_nd_instance_indices
=
tf
.
stack
([
batch_indices
,
fg_instance_indices
],
[
batch_indices
,
fg_instance_indices
],
axis
=-
1
)
axis
=-
1
)
foreground_rois
=
tf
.
gather_nd
(
foreground_rois
=
tf
.
gather_nd
(
candidate_rois
,
gather_nd_instance_indices
)
candidate_rois
,
gather_nd_instance_indices
)
foreground_boxes
=
tf
.
gather_nd
(
candidate_gt_boxes
,
foreground_boxes
=
tf
.
gather_nd
(
gather_nd_instance_indices
)
candidate_gt_boxes
,
gather_nd_instance_indices
)
foreground_classes
=
tf
.
gather_nd
(
candidate_gt_classes
,
foreground_classes
=
tf
.
gather_nd
(
gather_nd_instance_indices
)
candidate_gt_classes
,
gather_nd_instance_indices
)
foreground_gt_indices
=
tf
.
gather_nd
(
candidate_gt_indices
,
foreground_gt_indices
=
tf
.
gather_nd
(
gather_nd_instance_indices
)
candidate_gt_indices
,
gather_nd_instance_indices
)
foreground_gt_indices_shape
=
tf
.
shape
(
foreground_gt_indices
)
foreground_gt_indices_shape
=
tf
.
shape
(
foreground_gt_indices
)
batch_indices
=
(
batch_indices
=
(
tf
.
expand_dims
(
tf
.
range
(
foreground_gt_indices_shape
[
0
]),
axis
=-
1
)
*
tf
.
expand_dims
(
tf
.
range
(
foreground_gt_indices_shape
[
0
]),
axis
=-
1
)
*
tf
.
ones
([
1
,
foreground_gt_indices_shape
[
-
1
]],
dtype
=
tf
.
int32
))
tf
.
ones
([
1
,
foreground_gt_indices_shape
[
-
1
]],
dtype
=
tf
.
int32
))
gather_nd_gt_indices
=
tf
.
stack
(
gather_nd_gt_indices
=
tf
.
stack
(
[
batch_indices
,
foreground_gt_indices
],
[
batch_indices
,
foreground_gt_indices
],
axis
=-
1
)
axis
=-
1
)
foreground_masks
=
tf
.
gather_nd
(
gt_masks
,
gather_nd_gt_indices
)
foreground_masks
=
tf
.
gather_nd
(
gt_masks
,
gather_nd_gt_indices
)
cropped_foreground_masks
=
spatial_transform_ops
.
crop_mask_in_target_box
(
cropped_foreground_masks
=
spatial_transform_ops
.
crop_mask_in_target_box
(
foreground_masks
,
foreground_boxes
,
foreground_rois
,
mask_target_size
,
foreground_masks
,
foreground_boxes
,
foreground_rois
,
mask_target_size
,
sample_offset
=
0.5
)
sample_offset
=
0.5
)
return
foreground_rois
,
foreground_classes
,
cropped_foreground_masks
return
foreground_rois
,
foreground_classes
,
cropped_foreground_masks
...
@@ -307,12 +307,11 @@ class ROISampler(object):
...
@@ -307,12 +307,11 @@ class ROISampler(object):
"""Sample and assign RoIs for training.
"""Sample and assign RoIs for training.
Args:
Args:
rois: a tensor of shape of [batch_size, N, 4]. N is the number
rois: a tensor of shape of [batch_size, N, 4]. N is the number of
of proposals before groundtruth assignment. The last dimension is the
proposals before groundtruth assignment. The last dimension is the box
box coordinates w.r.t. the scaled images in [ymin, xmin, ymax, xmax]
coordinates w.r.t. the scaled images in [ymin, xmin, ymax, xmax] format.
format.
gt_boxes: a tensor of shape of [batch_size, MAX_NUM_INSTANCES, 4]. The
gt_boxes: a tensor of shape of [batch_size, MAX_NUM_INSTANCES, 4].
coordinates of gt_boxes are in the pixel coordinates of the scaled
The coordinates of gt_boxes are in the pixel coordinates of the scaled
image. This tensor might have padding of values -1 indicating the
image. This tensor might have padding of values -1 indicating the
invalid box coordinates.
invalid box coordinates.
gt_classes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This
gt_classes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This
...
@@ -350,12 +349,8 @@ class MaskSampler(object):
...
@@ -350,12 +349,8 @@ class MaskSampler(object):
self
.
_mask_target_size
=
mask_target_size
self
.
_mask_target_size
=
mask_target_size
self
.
_num_mask_samples_per_image
=
num_mask_samples_per_image
self
.
_num_mask_samples_per_image
=
num_mask_samples_per_image
def
__call__
(
self
,
def
__call__
(
self
,
candidate_rois
,
candidate_gt_boxes
,
candidate_gt_classes
,
candidate_rois
,
candidate_gt_indices
,
gt_masks
):
candidate_gt_boxes
,
candidate_gt_classes
,
candidate_gt_indices
,
gt_masks
):
"""Sample and create mask targets for training.
"""Sample and create mask targets for training.
Args:
Args:
...
@@ -371,8 +366,8 @@ class MaskSampler(object):
...
@@ -371,8 +366,8 @@ class MaskSampler(object):
candidate_gt_indices: a tensor of shape [batch_size, N], storing the
candidate_gt_indices: a tensor of shape [batch_size, N], storing the
corresponding groundtruth instance indices to the `candidate_gt_boxes`,
corresponding groundtruth instance indices to the `candidate_gt_boxes`,
i.e. gt_boxes[candidate_gt_indices[:, i]] = candidate_gt_boxes[:, i],
i.e. gt_boxes[candidate_gt_indices[:, i]] = candidate_gt_boxes[:, i],
where gt_boxes which is of shape [batch_size, MAX_INSTANCES, 4], M >=
N,
where gt_boxes which is of shape [batch_size, MAX_INSTANCES, 4], M >=
is the superset of candidate_gt_boxes.
N,
is the superset of candidate_gt_boxes.
gt_masks: a tensor of [batch_size, MAX_INSTANCES, mask_height, mask_width]
gt_masks: a tensor of [batch_size, MAX_INSTANCES, mask_height, mask_width]
containing all the groundtruth masks which sample masks are drawn from.
containing all the groundtruth masks which sample masks are drawn from.
after sampling. The output masks are resized w.r.t the sampled RoIs.
after sampling. The output masks are resized w.r.t the sampled RoIs.
...
@@ -388,12 +383,9 @@ class MaskSampler(object):
...
@@ -388,12 +383,9 @@ class MaskSampler(object):
cropped foreground masks used for training.
cropped foreground masks used for training.
"""
"""
foreground_rois
,
foreground_classes
,
cropped_foreground_masks
=
(
foreground_rois
,
foreground_classes
,
cropped_foreground_masks
=
(
sample_and_crop_foreground_masks
(
sample_and_crop_foreground_masks
(
candidate_rois
,
candidate_gt_boxes
,
candidate_rois
,
candidate_gt_classes
,
candidate_gt_boxes
,
candidate_gt_indices
,
gt_masks
,
candidate_gt_classes
,
self
.
_num_mask_samples_per_image
,
candidate_gt_indices
,
self
.
_mask_target_size
))
gt_masks
,
self
.
_num_mask_samples_per_image
,
self
.
_mask_target_size
))
return
foreground_rois
,
foreground_classes
,
cropped_foreground_masks
return
foreground_rois
,
foreground_classes
,
cropped_foreground_masks
official/vision/detection/utils/box_utils.py
View file @
999fae62
...
@@ -115,8 +115,8 @@ def normalize_boxes(boxes, image_shape):
...
@@ -115,8 +115,8 @@ def normalize_boxes(boxes, image_shape):
"""Converts boxes to the normalized coordinates.
"""Converts boxes to the normalized coordinates.
Args:
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates
boxes: a tensor whose last dimension is 4 representing the coordinates
of
of
boxes in ymin, xmin, ymax, xmax order.
boxes in ymin, xmin, ymax, xmax order.
image_shape: a list of two integers, a two-element vector or a tensor such
image_shape: a list of two integers, a two-element vector or a tensor such
that all but the last dimensions are `broadcastable` to `boxes`. The last
that all but the last dimensions are `broadcastable` to `boxes`. The last
dimension is 2, which represents [height, width].
dimension is 2, which represents [height, width].
...
@@ -153,8 +153,8 @@ def denormalize_boxes(boxes, image_shape):
...
@@ -153,8 +153,8 @@ def denormalize_boxes(boxes, image_shape):
"""Converts boxes normalized by [height, width] to pixel coordinates.
"""Converts boxes normalized by [height, width] to pixel coordinates.
Args:
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates
boxes: a tensor whose last dimension is 4 representing the coordinates
of
of
boxes in ymin, xmin, ymax, xmax order.
boxes in ymin, xmin, ymax, xmax order.
image_shape: a list of two integers, a two-element vector or a tensor such
image_shape: a list of two integers, a two-element vector or a tensor such
that all but the last dimensions are `broadcastable` to `boxes`. The last
that all but the last dimensions are `broadcastable` to `boxes`. The last
dimension is 2, which represents [height, width].
dimension is 2, which represents [height, width].
...
@@ -187,8 +187,8 @@ def clip_boxes(boxes, image_shape):
...
@@ -187,8 +187,8 @@ def clip_boxes(boxes, image_shape):
"""Clips boxes to image boundaries.
"""Clips boxes to image boundaries.
Args:
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates
boxes: a tensor whose last dimension is 4 representing the coordinates
of
of
boxes in ymin, xmin, ymax, xmax order.
boxes in ymin, xmin, ymax, xmax order.
image_shape: a list of two integers, a two-element vector or a tensor such
image_shape: a list of two integers, a two-element vector or a tensor such
that all but the last dimensions are `broadcastable` to `boxes`. The last
that all but the last dimensions are `broadcastable` to `boxes`. The last
dimension is 2, which represents [height, width].
dimension is 2, which represents [height, width].
...
@@ -255,8 +255,8 @@ def encode_boxes(boxes, anchors, weights=None):
...
@@ -255,8 +255,8 @@ def encode_boxes(boxes, anchors, weights=None):
"""Encode boxes to targets.
"""Encode boxes to targets.
Args:
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates
boxes: a tensor whose last dimension is 4 representing the coordinates
of
of
boxes in ymin, xmin, ymax, xmax order.
boxes in ymin, xmin, ymax, xmax order.
anchors: a tensor whose shape is the same as, or `broadcastable` to `boxes`,
anchors: a tensor whose shape is the same as, or `broadcastable` to `boxes`,
representing the coordinates of anchors in ymin, xmin, ymax, xmax order.
representing the coordinates of anchors in ymin, xmin, ymax, xmax order.
weights: None or a list of four float numbers used to scale coordinates.
weights: None or a list of four float numbers used to scale coordinates.
...
@@ -302,9 +302,8 @@ def encode_boxes(boxes, anchors, weights=None):
...
@@ -302,9 +302,8 @@ def encode_boxes(boxes, anchors, weights=None):
encoded_dh
*=
weights
[
2
]
encoded_dh
*=
weights
[
2
]
encoded_dw
*=
weights
[
3
]
encoded_dw
*=
weights
[
3
]
encoded_boxes
=
tf
.
concat
(
encoded_boxes
=
tf
.
concat
([
encoded_dy
,
encoded_dx
,
encoded_dh
,
encoded_dw
],
[
encoded_dy
,
encoded_dx
,
encoded_dh
,
encoded_dw
],
axis
=-
1
)
axis
=-
1
)
return
encoded_boxes
return
encoded_boxes
...
@@ -359,10 +358,11 @@ def decode_boxes(encoded_boxes, anchors, weights=None):
...
@@ -359,10 +358,11 @@ def decode_boxes(encoded_boxes, anchors, weights=None):
decoded_boxes_ymax
=
decoded_boxes_ymin
+
decoded_boxes_h
-
1.0
decoded_boxes_ymax
=
decoded_boxes_ymin
+
decoded_boxes_h
-
1.0
decoded_boxes_xmax
=
decoded_boxes_xmin
+
decoded_boxes_w
-
1.0
decoded_boxes_xmax
=
decoded_boxes_xmin
+
decoded_boxes_w
-
1.0
decoded_boxes
=
tf
.
concat
(
decoded_boxes
=
tf
.
concat
([
[
decoded_boxes_ymin
,
decoded_boxes_xmin
,
decoded_boxes_ymin
,
decoded_boxes_xmin
,
decoded_boxes_ymax
,
decoded_boxes_ymax
,
decoded_boxes_xmax
],
decoded_boxes_xmax
axis
=-
1
)
],
axis
=-
1
)
return
decoded_boxes
return
decoded_boxes
...
@@ -546,6 +546,6 @@ def get_non_empty_box_indices(boxes):
...
@@ -546,6 +546,6 @@ def get_non_empty_box_indices(boxes):
# Selects indices if box height or width is 0.
# Selects indices if box height or width is 0.
height
=
boxes
[:,
2
]
-
boxes
[:,
0
]
height
=
boxes
[:,
2
]
-
boxes
[:,
0
]
width
=
boxes
[:,
3
]
-
boxes
[:,
1
]
width
=
boxes
[:,
3
]
-
boxes
[:,
1
]
indices
=
tf
.
where
(
tf
.
logical_and
(
tf
.
greater
(
height
,
0
),
indices
=
tf
.
where
(
tf
.
greater
(
width
,
0
)))
tf
.
logical_and
(
tf
.
greater
(
height
,
0
),
tf
.
greater
(
width
,
0
)))
return
indices
[:,
0
]
return
indices
[:,
0
]
official/vision/detection/utils/input_utils.py
View file @
999fae62
...
@@ -15,6 +15,7 @@
...
@@ -15,6 +15,7 @@
"""Utility functions for input processing."""
"""Utility functions for input processing."""
import
math
import
math
import
tensorflow
as
tf
import
tensorflow
as
tf
from
official.vision.detection.utils
import
box_utils
from
official.vision.detection.utils
import
box_utils
...
@@ -91,12 +92,12 @@ def compute_padded_size(desired_size, stride):
...
@@ -91,12 +92,12 @@ def compute_padded_size(desired_size, stride):
[height, width] of the padded output image size.
[height, width] of the padded output image size.
"""
"""
if
isinstance
(
desired_size
,
list
)
or
isinstance
(
desired_size
,
tuple
):
if
isinstance
(
desired_size
,
list
)
or
isinstance
(
desired_size
,
tuple
):
padded_size
=
[
int
(
math
.
ceil
(
d
*
1.0
/
stride
)
*
stride
)
padded_size
=
[
for
d
in
desired_size
]
int
(
math
.
ceil
(
d
*
1.0
/
stride
)
*
stride
)
for
d
in
desired_size
]
else
:
else
:
padded_size
=
tf
.
cast
(
padded_size
=
tf
.
cast
(
tf
.
math
.
ceil
(
tf
.
math
.
ceil
(
tf
.
cast
(
desired_size
,
dtype
=
tf
.
float32
)
/
stride
)
*
stride
,
tf
.
cast
(
desired_size
,
dtype
=
tf
.
float32
)
/
stride
)
*
stride
,
tf
.
int32
)
tf
.
int32
)
return
padded_size
return
padded_size
...
@@ -158,8 +159,8 @@ def resize_and_crop_image(image,
...
@@ -158,8 +159,8 @@ def resize_and_crop_image(image,
else
:
else
:
scaled_size
=
desired_size
scaled_size
=
desired_size
scale
=
tf
.
minimum
(
scale
=
tf
.
minimum
(
scaled_size
[
0
]
/
image_size
[
0
],
scaled_size
[
0
]
/
image_size
[
0
],
scaled_size
[
1
]
/
image_size
[
1
])
scaled_size
[
1
]
/
image_size
[
1
])
scaled_size
=
tf
.
round
(
image_size
*
scale
)
scaled_size
=
tf
.
round
(
image_size
*
scale
)
# Computes 2D image_scale.
# Computes 2D image_scale.
...
@@ -169,9 +170,8 @@ def resize_and_crop_image(image,
...
@@ -169,9 +170,8 @@ def resize_and_crop_image(image,
# desired_size.
# desired_size.
if
random_jittering
:
if
random_jittering
:
max_offset
=
scaled_size
-
desired_size
max_offset
=
scaled_size
-
desired_size
max_offset
=
tf
.
where
(
tf
.
less
(
max_offset
,
0
),
max_offset
=
tf
.
where
(
tf
.
zeros_like
(
max_offset
),
tf
.
less
(
max_offset
,
0
),
tf
.
zeros_like
(
max_offset
),
max_offset
)
max_offset
)
offset
=
max_offset
*
tf
.
random
.
uniform
([
offset
=
max_offset
*
tf
.
random
.
uniform
([
2
,
2
,
],
0
,
1
,
seed
=
seed
)
],
0
,
1
,
seed
=
seed
)
...
@@ -191,9 +191,9 @@ def resize_and_crop_image(image,
...
@@ -191,9 +191,9 @@ def resize_and_crop_image(image,
image_info
=
tf
.
stack
([
image_info
=
tf
.
stack
([
image_size
,
image_size
,
tf
.
cast
(
desired_size
,
dtype
=
tf
.
float32
),
tf
.
cast
(
desired_size
,
dtype
=
tf
.
float32
),
image_scale
,
image_scale
,
tf
.
cast
(
offset
,
tf
.
float32
)
tf
.
cast
(
offset
,
tf
.
float32
)
])
])
return
output_image
,
image_info
return
output_image
,
image_info
...
@@ -288,25 +288,21 @@ def resize_and_crop_image_v2(image,
...
@@ -288,25 +288,21 @@ def resize_and_crop_image_v2(image,
image
,
tf
.
cast
(
scaled_size
,
tf
.
int32
),
method
=
method
)
image
,
tf
.
cast
(
scaled_size
,
tf
.
int32
),
method
=
method
)
if
random_jittering
:
if
random_jittering
:
scaled_image
=
scaled_image
[
scaled_image
=
scaled_image
[
offset
[
0
]:
offset
[
0
]
+
desired_size
[
0
],
offset
[
0
]:
offset
[
0
]
+
desired_size
[
0
],
offset
[
1
]:
offset
[
1
]
+
desired_size
[
1
],
:]
offset
[
1
]:
offset
[
1
]
+
desired_size
[
1
],
:]
output_image
=
tf
.
image
.
pad_to_bounding_box
(
output_image
=
tf
.
image
.
pad_to_bounding_box
(
scaled_image
,
0
,
0
,
scaled_image
,
0
,
0
,
padded_size
[
0
],
padded_size
[
1
])
padded_size
[
0
],
padded_size
[
1
])
image_info
=
tf
.
stack
([
image_info
=
tf
.
stack
([
image_size
,
image_size
,
tf
.
cast
(
desired_size
,
dtype
=
tf
.
float32
),
tf
.
cast
(
desired_size
,
dtype
=
tf
.
float32
),
image_scale
,
image_scale
,
tf
.
cast
(
offset
,
tf
.
float32
)
tf
.
cast
(
offset
,
tf
.
float32
)
])
])
return
output_image
,
image_info
return
output_image
,
image_info
def
resize_and_crop_boxes
(
boxes
,
def
resize_and_crop_boxes
(
boxes
,
image_scale
,
output_size
,
offset
):
image_scale
,
output_size
,
offset
):
"""Resizes boxes to output size with scale and offset.
"""Resizes boxes to output size with scale and offset.
Args:
Args:
...
@@ -329,10 +325,7 @@ def resize_and_crop_boxes(boxes,
...
@@ -329,10 +325,7 @@ def resize_and_crop_boxes(boxes,
return
boxes
return
boxes
def
resize_and_crop_masks
(
masks
,
def
resize_and_crop_masks
(
masks
,
image_scale
,
output_size
,
offset
):
image_scale
,
output_size
,
offset
):
"""Resizes boxes to output size with scale and offset.
"""Resizes boxes to output size with scale and offset.
Args:
Args:
...
...
official/vision/detection/utils/mask_utils.py
View file @
999fae62
...
@@ -18,14 +18,12 @@ from __future__ import division
...
@@ -18,14 +18,12 @@ from __future__ import division
from
__future__
import
print_function
from
__future__
import
print_function
import
math
import
math
import
numpy
as
np
import
numpy
as
np
import
cv2
import
cv2
def
paste_instance_masks
(
masks
,
def
paste_instance_masks
(
masks
,
detected_boxes
,
image_height
,
image_width
):
detected_boxes
,
image_height
,
image_width
):
"""Paste instance masks to generate the image segmentation results.
"""Paste instance masks to generate the image segmentation results.
Args:
Args:
...
@@ -95,10 +93,8 @@ def paste_instance_masks(masks,
...
@@ -95,10 +93,8 @@ def paste_instance_masks(masks,
y_0
=
min
(
max
(
ref_box
[
1
],
0
),
image_height
)
y_0
=
min
(
max
(
ref_box
[
1
],
0
),
image_height
)
y_1
=
min
(
max
(
ref_box
[
3
]
+
1
,
0
),
image_height
)
y_1
=
min
(
max
(
ref_box
[
3
]
+
1
,
0
),
image_height
)
im_mask
[
y_0
:
y_1
,
x_0
:
x_1
]
=
mask
[
im_mask
[
y_0
:
y_1
,
x_0
:
x_1
]
=
mask
[(
y_0
-
ref_box
[
1
]):(
y_1
-
ref_box
[
1
]),
(
y_0
-
ref_box
[
1
]):(
y_1
-
ref_box
[
1
]),
(
x_0
-
ref_box
[
0
]):(
x_1
-
ref_box
[
0
])]
(
x_0
-
ref_box
[
0
]):(
x_1
-
ref_box
[
0
])
]
segms
.
append
(
im_mask
)
segms
.
append
(
im_mask
)
segms
=
np
.
array
(
segms
)
segms
=
np
.
array
(
segms
)
...
@@ -106,10 +102,7 @@ def paste_instance_masks(masks,
...
@@ -106,10 +102,7 @@ def paste_instance_masks(masks,
return
segms
return
segms
def
paste_instance_masks_v2
(
masks
,
def
paste_instance_masks_v2
(
masks
,
detected_boxes
,
image_height
,
image_width
):
detected_boxes
,
image_height
,
image_width
):
"""Paste instance masks to generate the image segmentation (v2).
"""Paste instance masks to generate the image segmentation (v2).
Args:
Args:
...
@@ -146,34 +139,22 @@ def paste_instance_masks_v2(masks,
...
@@ -146,34 +139,22 @@ def paste_instance_masks_v2(masks,
beta
=
box
[
3
]
/
(
1.0
*
mask_height
)
beta
=
box
[
3
]
/
(
1.0
*
mask_height
)
# pylint: disable=invalid-name
# pylint: disable=invalid-name
# Transformation from mask pixel indices to image coordinate.
# Transformation from mask pixel indices to image coordinate.
M_mask_to_image
=
np
.
array
(
M_mask_to_image
=
np
.
array
([[
alpha
,
0
,
xmin
],
[
0
,
beta
,
ymin
],
[
0
,
0
,
1
]],
[[
alpha
,
0
,
xmin
],
dtype
=
np
.
float32
)
[
0
,
beta
,
ymin
],
[
0
,
0
,
1
]],
dtype
=
np
.
float32
)
# Transformation from image to cropped mask coordinate.
# Transformation from image to cropped mask coordinate.
M_image_to_crop
=
np
.
array
(
M_image_to_crop
=
np
.
array
(
[[
1
,
0
,
-
xmin_int
],
[[
1
,
0
,
-
xmin_int
],
[
0
,
1
,
-
ymin_int
],
[
0
,
0
,
1
]],
dtype
=
np
.
float32
)
[
0
,
1
,
-
ymin_int
],
[
0
,
0
,
1
]],
dtype
=
np
.
float32
)
M
=
np
.
dot
(
M_image_to_crop
,
M_mask_to_image
)
M
=
np
.
dot
(
M_image_to_crop
,
M_mask_to_image
)
# Compensate the half pixel offset that OpenCV has in the
# Compensate the half pixel offset that OpenCV has in the
# warpPerspective implementation: the top-left pixel is sampled
# warpPerspective implementation: the top-left pixel is sampled
# at (0,0), but we want it to be at (0.5, 0.5).
# at (0,0), but we want it to be at (0.5, 0.5).
M
=
np
.
dot
(
M
=
np
.
dot
(
np
.
dot
(
np
.
dot
(
np
.
array
([[
1
,
0
,
-
0.5
],
np
.
array
([[
1
,
0
,
-
0.5
],
[
0
,
1
,
-
0.5
],
[
0
,
0
,
1
]],
np
.
float32
),
M
),
[
0
,
1
,
-
0.5
],
np
.
array
([[
1
,
0
,
0.5
],
[
0
,
1
,
0.5
],
[
0
,
0
,
1
]],
np
.
float32
))
[
0
,
0
,
1
]],
np
.
float32
),
M
),
np
.
array
([[
1
,
0
,
0.5
],
[
0
,
1
,
0.5
],
[
0
,
0
,
1
]],
np
.
float32
))
# pylint: enable=invalid-name
# pylint: enable=invalid-name
cropped_mask
=
cv2
.
warpPerspective
(
cropped_mask
=
cv2
.
warpPerspective
(
mask
.
astype
(
np
.
float32
),
M
,
mask
.
astype
(
np
.
float32
),
M
,
(
xmax_int
-
xmin_int
,
ymax_int
-
ymin_int
))
(
xmax_int
-
xmin_int
,
ymax_int
-
ymin_int
))
cropped_mask
=
np
.
array
(
cropped_mask
>
0.5
,
dtype
=
np
.
uint8
)
cropped_mask
=
np
.
array
(
cropped_mask
>
0.5
,
dtype
=
np
.
uint8
)
img_mask
=
np
.
zeros
((
image_height
,
image_width
))
img_mask
=
np
.
zeros
((
image_height
,
image_width
))
...
@@ -181,12 +162,10 @@ def paste_instance_masks_v2(masks,
...
@@ -181,12 +162,10 @@ def paste_instance_masks_v2(masks,
x1
=
max
(
min
(
xmax_int
,
image_width
),
0
)
x1
=
max
(
min
(
xmax_int
,
image_width
),
0
)
y0
=
max
(
min
(
ymin_int
,
image_height
),
0
)
y0
=
max
(
min
(
ymin_int
,
image_height
),
0
)
y1
=
max
(
min
(
ymax_int
,
image_height
),
0
)
y1
=
max
(
min
(
ymax_int
,
image_height
),
0
)
img_mask
[
y0
:
y1
,
x0
:
x1
]
=
cropped_mask
[
img_mask
[
y0
:
y1
,
x0
:
x1
]
=
cropped_mask
[(
y0
-
ymin_int
):(
y1
-
ymin_int
),
(
y0
-
ymin_int
):(
y1
-
ymin_int
),
(
x0
-
xmin_int
):(
x1
-
xmin_int
)]
(
x0
-
xmin_int
):(
x1
-
xmin_int
)]
segms
.
append
(
img_mask
)
segms
.
append
(
img_mask
)
segms
=
np
.
array
(
segms
)
segms
=
np
.
array
(
segms
)
return
segms
return
segms
official/vision/detection/utils/object_detection/balanced_positive_negative_sampler.py
View file @
999fae62
...
@@ -12,7 +12,6 @@
...
@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
# ==============================================================================
# ==============================================================================
"""Class to subsample minibatches by balancing positives and negatives.
"""Class to subsample minibatches by balancing positives and negatives.
Subsamples minibatches based on a pre-specified positive fraction in range
Subsamples minibatches based on a pre-specified positive fraction in range
...
@@ -92,10 +91,10 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler):
...
@@ -92,10 +91,10 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler):
Args:
Args:
input_tensor: An int32 tensor of shape [N] to be sliced.
input_tensor: An int32 tensor of shape [N] to be sliced.
num_start_samples: Number of examples to be sliced from the beginning
num_start_samples: Number of examples to be sliced from the beginning
of
of
the input tensor.
the input tensor.
num_end_samples: Number of examples to be sliced from the end of the
num_end_samples: Number of examples to be sliced from the end of the
input
input
tensor.
tensor.
total_num_samples: Sum of is num_start_samples and num_end_samples. This
total_num_samples: Sum of is num_start_samples and num_end_samples. This
should be a scalar.
should be a scalar.
...
@@ -110,13 +109,16 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler):
...
@@ -110,13 +109,16 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler):
tf
.
range
(
input_length
),
input_length
-
num_end_samples
)
tf
.
range
(
input_length
),
input_length
-
num_end_samples
)
selected_positions
=
tf
.
logical_or
(
start_positions
,
end_positions
)
selected_positions
=
tf
.
logical_or
(
start_positions
,
end_positions
)
selected_positions
=
tf
.
cast
(
selected_positions
,
tf
.
float32
)
selected_positions
=
tf
.
cast
(
selected_positions
,
tf
.
float32
)
indexed_positions
=
tf
.
multiply
(
tf
.
cumsum
(
selected_positions
),
indexed_positions
=
tf
.
multiply
(
selected_positions
)
tf
.
cumsum
(
selected_positions
),
selected_positions
)
one_hot_selector
=
tf
.
one_hot
(
tf
.
cast
(
indexed_positions
,
tf
.
int32
)
-
1
,
one_hot_selector
=
tf
.
one_hot
(
total_num_samples
,
tf
.
cast
(
indexed_positions
,
tf
.
int32
)
-
1
,
dtype
=
tf
.
float32
)
total_num_samples
,
return
tf
.
cast
(
tf
.
tensordot
(
tf
.
cast
(
input_tensor
,
tf
.
float32
),
dtype
=
tf
.
float32
)
one_hot_selector
,
axes
=
[
0
,
0
]),
tf
.
int32
)
return
tf
.
cast
(
tf
.
tensordot
(
tf
.
cast
(
input_tensor
,
tf
.
float32
),
one_hot_selector
,
axes
=
[
0
,
0
]),
tf
.
int32
)
def
_static_subsample
(
self
,
indicator
,
batch_size
,
labels
):
def
_static_subsample
(
self
,
indicator
,
batch_size
,
labels
):
"""Returns subsampled minibatch.
"""Returns subsampled minibatch.
...
@@ -182,13 +184,12 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler):
...
@@ -182,13 +184,12 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler):
sorted_signed_indicator_idx
=
tf
.
nn
.
top_k
(
sorted_signed_indicator_idx
=
tf
.
nn
.
top_k
(
signed_indicator_idx
,
input_length
,
sorted
=
True
).
values
signed_indicator_idx
,
input_length
,
sorted
=
True
).
values
[
num_positive_samples
,
[
num_positive_samples
,
num_negative_samples
num_negative_samples
]
=
self
.
_get_num_pos_neg_samples
(
]
=
self
.
_get_num_pos_neg_samples
(
sorted_signed_indicator_idx
,
batch_size
)
sorted_signed_indicator_idx
,
batch_size
)
sampled_idx
=
self
.
_get_values_from_start_and_end
(
sampled_idx
=
self
.
_get_values_from_start_and_end
(
sorted_signed_indicator_idx
,
num_positive_samples
,
sorted_signed_indicator_idx
,
num_positive_samples
,
num_negative_samples
,
num_negative_samples
,
batch_size
)
batch_size
)
# Shift the indices to start from 0 and remove any samples that are set as
# Shift the indices to start from 0 and remove any samples that are set as
# False.
# False.
...
@@ -203,11 +204,13 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler):
...
@@ -203,11 +204,13 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler):
tf
.
bool
)
tf
.
bool
)
# project back the order based on stored permutations
# project back the order based on stored permutations
reprojections
=
tf
.
one_hot
(
permutation
,
depth
=
input_length
,
reprojections
=
tf
.
one_hot
(
dtype
=
tf
.
float32
)
permutation
,
depth
=
input_length
,
dtype
=
tf
.
float32
)
return
tf
.
cast
(
tf
.
tensordot
(
return
tf
.
cast
(
tf
.
cast
(
sampled_idx_indicator
,
tf
.
float32
),
tf
.
tensordot
(
reprojections
,
axes
=
[
0
,
0
]),
tf
.
bool
)
tf
.
cast
(
sampled_idx_indicator
,
tf
.
float32
),
reprojections
,
axes
=
[
0
,
0
]),
tf
.
bool
)
def
subsample
(
self
,
indicator
,
batch_size
,
labels
,
scope
=
None
):
def
subsample
(
self
,
indicator
,
batch_size
,
labels
,
scope
=
None
):
"""Returns subsampled minibatch.
"""Returns subsampled minibatch.
...
@@ -218,7 +221,7 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler):
...
@@ -218,7 +221,7 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler):
randomly selects negative samples so that the positive sample fraction
randomly selects negative samples so that the positive sample fraction
matches self._positive_fraction. It cannot be None is is_static is True.
matches self._positive_fraction. It cannot be None is is_static is True.
labels: boolean tensor of shape [N] denoting positive(=True) and negative
labels: boolean tensor of shape [N] denoting positive(=True) and negative
(=False) examples.
(=False) examples.
scope: name scope.
scope: name scope.
Returns:
Returns:
...
...
official/vision/detection/utils/object_detection/box_coder.py
View file @
999fae62
...
@@ -12,7 +12,6 @@
...
@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
# ==============================================================================
# ==============================================================================
"""Base box coder.
"""Base box coder.
Box coders convert between coordinate frames, namely image-centric
Box coders convert between coordinate frames, namely image-centric
...
@@ -32,7 +31,6 @@ from abc import abstractproperty
...
@@ -32,7 +31,6 @@ from abc import abstractproperty
import
tensorflow
as
tf
import
tensorflow
as
tf
# Box coder types.
# Box coder types.
FASTER_RCNN
=
'faster_rcnn'
FASTER_RCNN
=
'faster_rcnn'
KEYPOINT
=
'keypoint'
KEYPOINT
=
'keypoint'
...
@@ -138,11 +136,11 @@ def batch_decode(encoded_boxes, box_coder, anchors):
...
@@ -138,11 +136,11 @@ def batch_decode(encoded_boxes, box_coder, anchors):
"""
"""
encoded_boxes
.
get_shape
().
assert_has_rank
(
3
)
encoded_boxes
.
get_shape
().
assert_has_rank
(
3
)
if
encoded_boxes
.
get_shape
()[
1
].
value
!=
anchors
.
num_boxes_static
():
if
encoded_boxes
.
get_shape
()[
1
].
value
!=
anchors
.
num_boxes_static
():
raise
ValueError
(
'The number of anchors inferred from encoded_boxes'
raise
ValueError
(
' and anchors are inconsistent: shape[1] of
encoded_boxes'
'The number of anchors inferred from
encoded_boxes'
' %s should be equal to the number
of
a
nc
hors: %s.'
%
' and anchors are inconsistent: shape[1]
of
e
nc
oded_boxes'
(
encoded_boxes
.
get_shape
()[
1
].
value
,
' %s should be equal to the number of anchors: %s.'
%
anchors
.
num_boxes_static
()))
(
encoded_boxes
.
get_shape
()[
1
].
value
,
anchors
.
num_boxes_static
()))
decoded_boxes
=
tf
.
stack
([
decoded_boxes
=
tf
.
stack
([
box_coder
.
decode
(
boxes
,
anchors
).
get
()
box_coder
.
decode
(
boxes
,
anchors
).
get
()
...
...
official/vision/detection/utils/object_detection/box_list.py
View file @
999fae62
...
@@ -12,7 +12,6 @@
...
@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
# ==============================================================================
# ==============================================================================
"""Bounding Box List definition.
"""Bounding Box List definition.
BoxList represents a list of bounding boxes as tensorflow
BoxList represents a list of bounding boxes as tensorflow
...
@@ -126,8 +125,8 @@ class BoxList(object):
...
@@ -126,8 +125,8 @@ class BoxList(object):
it returns the box coordinates.
it returns the box coordinates.
Args:
Args:
field: this optional string parameter can be used to specify
field: this optional string parameter can be used to specify
a related
a related
field to be accessed.
field to be accessed.
Returns:
Returns:
a tensor representing the box collection or an associated field.
a tensor representing the box collection or an associated field.
...
@@ -192,8 +191,8 @@ class BoxList(object):
...
@@ -192,8 +191,8 @@ class BoxList(object):
"""Retrieves specified fields as a dictionary of tensors.
"""Retrieves specified fields as a dictionary of tensors.
Args:
Args:
fields: (optional) list of fields to return in the dictionary.
fields: (optional) list of fields to return in the dictionary.
If None
If None
(default), all fields are returned.
(default), all fields are returned.
Returns:
Returns:
tensor_dict: A dictionary of tensors specified by fields.
tensor_dict: A dictionary of tensors specified by fields.
...
...
official/vision/detection/utils/object_detection/box_list_ops.py
View file @
999fae62
...
@@ -12,7 +12,6 @@
...
@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
# ==============================================================================
# ==============================================================================
"""Bounding Box List operations.
"""Bounding Box List operations.
Example box operations that are supported:
Example box operations that are supported:
...
@@ -152,8 +151,8 @@ def prune_outside_window(boxlist, window, scope=None):
...
@@ -152,8 +151,8 @@ def prune_outside_window(boxlist, window, scope=None):
Args:
Args:
boxlist: a BoxList holding M_in boxes.
boxlist: a BoxList holding M_in boxes.
window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax]
window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax]
of
of
the window
the window
scope: name scope.
scope: name scope.
Returns:
Returns:
...
@@ -166,8 +165,10 @@ def prune_outside_window(boxlist, window, scope=None):
...
@@ -166,8 +165,10 @@ def prune_outside_window(boxlist, window, scope=None):
value
=
boxlist
.
get
(),
num_or_size_splits
=
4
,
axis
=
1
)
value
=
boxlist
.
get
(),
num_or_size_splits
=
4
,
axis
=
1
)
win_y_min
,
win_x_min
,
win_y_max
,
win_x_max
=
tf
.
unstack
(
window
)
win_y_min
,
win_x_min
,
win_y_max
,
win_x_max
=
tf
.
unstack
(
window
)
coordinate_violations
=
tf
.
concat
([
coordinate_violations
=
tf
.
concat
([
tf
.
less
(
y_min
,
win_y_min
),
tf
.
less
(
x_min
,
win_x_min
),
tf
.
less
(
y_min
,
win_y_min
),
tf
.
greater
(
y_max
,
win_y_max
),
tf
.
greater
(
x_max
,
win_x_max
)
tf
.
less
(
x_min
,
win_x_min
),
tf
.
greater
(
y_max
,
win_y_max
),
tf
.
greater
(
x_max
,
win_x_max
)
],
1
)
],
1
)
valid_indices
=
tf
.
reshape
(
valid_indices
=
tf
.
reshape
(
tf
.
where
(
tf
.
logical_not
(
tf
.
reduce_any
(
coordinate_violations
,
1
))),
[
-
1
])
tf
.
where
(
tf
.
logical_not
(
tf
.
reduce_any
(
coordinate_violations
,
1
))),
[
-
1
])
...
@@ -183,8 +184,8 @@ def prune_completely_outside_window(boxlist, window, scope=None):
...
@@ -183,8 +184,8 @@ def prune_completely_outside_window(boxlist, window, scope=None):
Args:
Args:
boxlist: a BoxList holding M_in boxes.
boxlist: a BoxList holding M_in boxes.
window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax]
window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax]
of
of
the window
the window
scope: name scope.
scope: name scope.
Returns:
Returns:
...
@@ -198,8 +199,10 @@ def prune_completely_outside_window(boxlist, window, scope=None):
...
@@ -198,8 +199,10 @@ def prune_completely_outside_window(boxlist, window, scope=None):
value
=
boxlist
.
get
(),
num_or_size_splits
=
4
,
axis
=
1
)
value
=
boxlist
.
get
(),
num_or_size_splits
=
4
,
axis
=
1
)
win_y_min
,
win_x_min
,
win_y_max
,
win_x_max
=
tf
.
unstack
(
window
)
win_y_min
,
win_x_min
,
win_y_max
,
win_x_max
=
tf
.
unstack
(
window
)
coordinate_violations
=
tf
.
concat
([
coordinate_violations
=
tf
.
concat
([
tf
.
greater_equal
(
y_min
,
win_y_max
),
tf
.
greater_equal
(
x_min
,
win_x_max
),
tf
.
greater_equal
(
y_min
,
win_y_max
),
tf
.
less_equal
(
y_max
,
win_y_min
),
tf
.
less_equal
(
x_max
,
win_x_min
)
tf
.
greater_equal
(
x_min
,
win_x_max
),
tf
.
less_equal
(
y_max
,
win_y_min
),
tf
.
less_equal
(
x_max
,
win_x_min
)
],
1
)
],
1
)
valid_indices
=
tf
.
reshape
(
valid_indices
=
tf
.
reshape
(
tf
.
where
(
tf
.
logical_not
(
tf
.
reduce_any
(
coordinate_violations
,
1
))),
[
-
1
])
tf
.
where
(
tf
.
logical_not
(
tf
.
reduce_any
(
coordinate_violations
,
1
))),
[
-
1
])
...
@@ -274,8 +277,8 @@ def iou(boxlist1, boxlist2, scope=None):
...
@@ -274,8 +277,8 @@ def iou(boxlist1, boxlist2, scope=None):
unions
=
(
unions
=
(
tf
.
expand_dims
(
areas1
,
1
)
+
tf
.
expand_dims
(
areas2
,
0
)
-
intersections
)
tf
.
expand_dims
(
areas1
,
1
)
+
tf
.
expand_dims
(
areas2
,
0
)
-
intersections
)
return
tf
.
where
(
return
tf
.
where
(
tf
.
equal
(
intersections
,
0.0
),
tf
.
equal
(
intersections
,
0.0
),
tf
.
zeros_like
(
intersections
),
tf
.
zeros_like
(
intersections
),
tf
.
truediv
(
intersections
,
unions
))
tf
.
truediv
(
intersections
,
unions
))
def
matched_iou
(
boxlist1
,
boxlist2
,
scope
=
None
):
def
matched_iou
(
boxlist1
,
boxlist2
,
scope
=
None
):
...
@@ -295,8 +298,8 @@ def matched_iou(boxlist1, boxlist2, scope=None):
...
@@ -295,8 +298,8 @@ def matched_iou(boxlist1, boxlist2, scope=None):
areas2
=
area
(
boxlist2
)
areas2
=
area
(
boxlist2
)
unions
=
areas1
+
areas2
-
intersections
unions
=
areas1
+
areas2
-
intersections
return
tf
.
where
(
return
tf
.
where
(
tf
.
equal
(
intersections
,
0.0
),
tf
.
equal
(
intersections
,
0.0
),
tf
.
zeros_like
(
intersections
),
tf
.
zeros_like
(
intersections
),
tf
.
truediv
(
intersections
,
unions
))
tf
.
truediv
(
intersections
,
unions
))
def
ioa
(
boxlist1
,
boxlist2
,
scope
=
None
):
def
ioa
(
boxlist1
,
boxlist2
,
scope
=
None
):
...
@@ -320,8 +323,10 @@ def ioa(boxlist1, boxlist2, scope=None):
...
@@ -320,8 +323,10 @@ def ioa(boxlist1, boxlist2, scope=None):
return
tf
.
truediv
(
intersections
,
areas
)
return
tf
.
truediv
(
intersections
,
areas
)
def
prune_non_overlapping_boxes
(
def
prune_non_overlapping_boxes
(
boxlist1
,
boxlist1
,
boxlist2
,
min_overlap
=
0.0
,
scope
=
None
):
boxlist2
,
min_overlap
=
0.0
,
scope
=
None
):
"""Prunes the boxes in boxlist1 that overlap less than thresh with boxlist2.
"""Prunes the boxes in boxlist1 that overlap less than thresh with boxlist2.
For each box in boxlist1, we want its IOA to be more than minoverlap with
For each box in boxlist1, we want its IOA to be more than minoverlap with
...
@@ -331,7 +336,7 @@ def prune_non_overlapping_boxes(
...
@@ -331,7 +336,7 @@ def prune_non_overlapping_boxes(
boxlist1: BoxList holding N boxes.
boxlist1: BoxList holding N boxes.
boxlist2: BoxList holding M boxes.
boxlist2: BoxList holding M boxes.
min_overlap: Minimum required overlap between boxes, to count them as
min_overlap: Minimum required overlap between boxes, to count them as
overlapping.
overlapping.
scope: name scope.
scope: name scope.
Returns:
Returns:
...
@@ -361,8 +366,8 @@ def prune_small_boxes(boxlist, min_side, scope=None):
...
@@ -361,8 +366,8 @@ def prune_small_boxes(boxlist, min_side, scope=None):
"""
"""
with
tf
.
name_scope
(
scope
,
'PruneSmallBoxes'
):
with
tf
.
name_scope
(
scope
,
'PruneSmallBoxes'
):
height
,
width
=
height_width
(
boxlist
)
height
,
width
=
height_width
(
boxlist
)
is_valid
=
tf
.
logical_and
(
tf
.
greater_equal
(
width
,
min_side
),
is_valid
=
tf
.
logical_and
(
tf
.
greater_equal
(
height
,
min_side
))
tf
.
greater_equal
(
width
,
min_side
),
tf
.
greater_equal
(
height
,
min_side
))
return
gather
(
boxlist
,
tf
.
reshape
(
tf
.
where
(
is_valid
),
[
-
1
]))
return
gather
(
boxlist
,
tf
.
reshape
(
tf
.
where
(
is_valid
),
[
-
1
]))
...
@@ -389,9 +394,10 @@ def change_coordinate_frame(boxlist, window, scope=None):
...
@@ -389,9 +394,10 @@ def change_coordinate_frame(boxlist, window, scope=None):
with
tf
.
name_scope
(
scope
,
'ChangeCoordinateFrame'
):
with
tf
.
name_scope
(
scope
,
'ChangeCoordinateFrame'
):
win_height
=
window
[
2
]
-
window
[
0
]
win_height
=
window
[
2
]
-
window
[
0
]
win_width
=
window
[
3
]
-
window
[
1
]
win_width
=
window
[
3
]
-
window
[
1
]
boxlist_new
=
scale
(
box_list
.
BoxList
(
boxlist_new
=
scale
(
boxlist
.
get
()
-
[
window
[
0
],
window
[
1
],
window
[
0
],
window
[
1
]]),
box_list
.
BoxList
(
boxlist
.
get
()
-
1.0
/
win_height
,
1.0
/
win_width
)
[
window
[
0
],
window
[
1
],
window
[
0
],
window
[
1
]]),
1.0
/
win_height
,
1.0
/
win_width
)
boxlist_new
=
_copy_extra_fields
(
boxlist_new
,
boxlist
)
boxlist_new
=
_copy_extra_fields
(
boxlist_new
,
boxlist
)
return
boxlist_new
return
boxlist_new
...
@@ -420,13 +426,17 @@ def sq_dist(boxlist1, boxlist2, scope=None):
...
@@ -420,13 +426,17 @@ def sq_dist(boxlist1, boxlist2, scope=None):
with
tf
.
name_scope
(
scope
,
'SqDist'
):
with
tf
.
name_scope
(
scope
,
'SqDist'
):
sqnorm1
=
tf
.
reduce_sum
(
tf
.
square
(
boxlist1
.
get
()),
1
,
keep_dims
=
True
)
sqnorm1
=
tf
.
reduce_sum
(
tf
.
square
(
boxlist1
.
get
()),
1
,
keep_dims
=
True
)
sqnorm2
=
tf
.
reduce_sum
(
tf
.
square
(
boxlist2
.
get
()),
1
,
keep_dims
=
True
)
sqnorm2
=
tf
.
reduce_sum
(
tf
.
square
(
boxlist2
.
get
()),
1
,
keep_dims
=
True
)
innerprod
=
tf
.
matmul
(
boxlist1
.
get
(),
boxlist2
.
get
(),
innerprod
=
tf
.
matmul
(
transpose_a
=
False
,
transpose_b
=
True
)
boxlist1
.
get
(),
boxlist2
.
get
(),
transpose_a
=
False
,
transpose_b
=
True
)
return
sqnorm1
+
tf
.
transpose
(
sqnorm2
)
-
2.0
*
innerprod
return
sqnorm1
+
tf
.
transpose
(
sqnorm2
)
-
2.0
*
innerprod
def
boolean_mask
(
boxlist
,
indicator
,
fields
=
None
,
scope
=
None
,
def
boolean_mask
(
boxlist
,
use_static_shapes
=
False
,
indicator_sum
=
None
):
indicator
,
fields
=
None
,
scope
=
None
,
use_static_shapes
=
False
,
indicator_sum
=
None
):
"""Select boxes from BoxList according to indicator and return new BoxList.
"""Select boxes from BoxList according to indicator and return new BoxList.
`boolean_mask` returns the subset of boxes that are marked as "True" by the
`boolean_mask` returns the subset of boxes that are marked as "True" by the
...
@@ -463,8 +473,7 @@ def boolean_mask(boxlist, indicator, fields=None, scope=None,
...
@@ -463,8 +473,7 @@ def boolean_mask(boxlist, indicator, fields=None, scope=None,
raise
ValueError
(
'`indicator_sum` must be a of type int'
)
raise
ValueError
(
'`indicator_sum` must be a of type int'
)
selected_positions
=
tf
.
cast
(
indicator
,
dtype
=
tf
.
float32
)
selected_positions
=
tf
.
cast
(
indicator
,
dtype
=
tf
.
float32
)
indexed_positions
=
tf
.
cast
(
indexed_positions
=
tf
.
cast
(
tf
.
multiply
(
tf
.
multiply
(
tf
.
cumsum
(
selected_positions
),
selected_positions
),
tf
.
cumsum
(
selected_positions
),
selected_positions
),
dtype
=
tf
.
int32
)
dtype
=
tf
.
int32
)
one_hot_selector
=
tf
.
one_hot
(
one_hot_selector
=
tf
.
one_hot
(
indexed_positions
-
1
,
indicator_sum
,
dtype
=
tf
.
float32
)
indexed_positions
-
1
,
indicator_sum
,
dtype
=
tf
.
float32
)
...
@@ -541,9 +550,8 @@ def concatenate(boxlists, fields=None, scope=None):
...
@@ -541,9 +550,8 @@ def concatenate(boxlists, fields=None, scope=None):
Args:
Args:
boxlists: list of BoxList objects
boxlists: list of BoxList objects
fields: optional list of fields to also concatenate. By default, all
fields: optional list of fields to also concatenate. By default, all fields
fields from the first BoxList in the list are included in the
from the first BoxList in the list are included in the concatenation.
concatenation.
scope: name scope.
scope: name scope.
Returns:
Returns:
...
@@ -637,8 +645,8 @@ def visualize_boxes_in_image(image, boxlist, normalized=False, scope=None):
...
@@ -637,8 +645,8 @@ def visualize_boxes_in_image(image, boxlist, normalized=False, scope=None):
Args:
Args:
image: an image tensor with shape [height, width, 3]
image: an image tensor with shape [height, width, 3]
boxlist: a BoxList
boxlist: a BoxList
normalized: (boolean) specify whether corners are to be interpreted
normalized: (boolean) specify whether corners are to be interpreted
as
as
absolute coordinates in image space or normalized with respect to the
absolute coordinates in image space or normalized with respect to the
image size.
image size.
scope: name scope.
scope: name scope.
...
@@ -648,8 +656,7 @@ def visualize_boxes_in_image(image, boxlist, normalized=False, scope=None):
...
@@ -648,8 +656,7 @@ def visualize_boxes_in_image(image, boxlist, normalized=False, scope=None):
with
tf
.
name_scope
(
scope
,
'VisualizeBoxesInImage'
):
with
tf
.
name_scope
(
scope
,
'VisualizeBoxesInImage'
):
if
not
normalized
:
if
not
normalized
:
height
,
width
,
_
=
tf
.
unstack
(
tf
.
shape
(
image
))
height
,
width
,
_
=
tf
.
unstack
(
tf
.
shape
(
image
))
boxlist
=
scale
(
boxlist
,
boxlist
=
scale
(
boxlist
,
1.0
/
tf
.
cast
(
height
,
tf
.
float32
),
1.0
/
tf
.
cast
(
height
,
tf
.
float32
),
1.0
/
tf
.
cast
(
width
,
tf
.
float32
))
1.0
/
tf
.
cast
(
width
,
tf
.
float32
))
corners
=
tf
.
expand_dims
(
boxlist
.
get
(),
0
)
corners
=
tf
.
expand_dims
(
boxlist
.
get
(),
0
)
image
=
tf
.
expand_dims
(
image
,
0
)
image
=
tf
.
expand_dims
(
image
,
0
)
...
@@ -714,9 +721,8 @@ def filter_greater_than(boxlist, thresh, scope=None):
...
@@ -714,9 +721,8 @@ def filter_greater_than(boxlist, thresh, scope=None):
if
len
(
scores
.
shape
.
as_list
())
==
2
and
scores
.
shape
.
as_list
()[
1
]
!=
1
:
if
len
(
scores
.
shape
.
as_list
())
==
2
and
scores
.
shape
.
as_list
()[
1
]
!=
1
:
raise
ValueError
(
'Scores should have rank 1 or have shape '
raise
ValueError
(
'Scores should have rank 1 or have shape '
'consistent with [None, 1]'
)
'consistent with [None, 1]'
)
high_score_indices
=
tf
.
cast
(
tf
.
reshape
(
high_score_indices
=
tf
.
cast
(
tf
.
where
(
tf
.
greater
(
scores
,
thresh
)),
tf
.
reshape
(
tf
.
where
(
tf
.
greater
(
scores
,
thresh
)),
[
-
1
]),
tf
.
int32
)
[
-
1
]),
tf
.
int32
)
return
gather
(
boxlist
,
high_score_indices
)
return
gather
(
boxlist
,
high_score_indices
)
...
@@ -748,8 +754,10 @@ def non_max_suppression(boxlist, thresh, max_output_size, scope=None):
...
@@ -748,8 +754,10 @@ def non_max_suppression(boxlist, thresh, max_output_size, scope=None):
if
not
boxlist
.
has_field
(
'scores'
):
if
not
boxlist
.
has_field
(
'scores'
):
raise
ValueError
(
'input boxlist must have
\'
scores
\'
field'
)
raise
ValueError
(
'input boxlist must have
\'
scores
\'
field'
)
selected_indices
=
tf
.
image
.
non_max_suppression
(
selected_indices
=
tf
.
image
.
non_max_suppression
(
boxlist
.
get
(),
boxlist
.
get_field
(
'scores'
),
boxlist
.
get
(),
max_output_size
,
iou_threshold
=
thresh
)
boxlist
.
get_field
(
'scores'
),
max_output_size
,
iou_threshold
=
thresh
)
return
gather
(
boxlist
,
selected_indices
)
return
gather
(
boxlist
,
selected_indices
)
...
@@ -768,8 +776,11 @@ def _copy_extra_fields(boxlist_to_copy_to, boxlist_to_copy_from):
...
@@ -768,8 +776,11 @@ def _copy_extra_fields(boxlist_to_copy_to, boxlist_to_copy_from):
return
boxlist_to_copy_to
return
boxlist_to_copy_to
def
to_normalized_coordinates
(
boxlist
,
height
,
width
,
def
to_normalized_coordinates
(
boxlist
,
check_range
=
True
,
scope
=
None
):
height
,
width
,
check_range
=
True
,
scope
=
None
):
"""Converts absolute box coordinates to normalized coordinates in [0, 1].
"""Converts absolute box coordinates to normalized coordinates in [0, 1].
Usually one uses the dynamic shape of the image or conv-layer tensor:
Usually one uses the dynamic shape of the image or conv-layer tensor:
...
@@ -797,8 +808,9 @@ def to_normalized_coordinates(boxlist, height, width,
...
@@ -797,8 +808,9 @@ def to_normalized_coordinates(boxlist, height, width,
if
check_range
:
if
check_range
:
max_val
=
tf
.
reduce_max
(
boxlist
.
get
())
max_val
=
tf
.
reduce_max
(
boxlist
.
get
())
max_assert
=
tf
.
Assert
(
tf
.
greater
(
max_val
,
1.01
),
max_assert
=
tf
.
Assert
(
[
'max value is lower than 1.01: '
,
max_val
])
tf
.
greater
(
max_val
,
1.01
),
[
'max value is lower than 1.01: '
,
max_val
])
with
tf
.
control_dependencies
([
max_assert
]):
with
tf
.
control_dependencies
([
max_assert
]):
width
=
tf
.
identity
(
width
)
width
=
tf
.
identity
(
width
)
...
@@ -822,8 +834,8 @@ def to_absolute_coordinates(boxlist,
...
@@ -822,8 +834,8 @@ def to_absolute_coordinates(boxlist,
height: Maximum value for height of absolute box coordinates.
height: Maximum value for height of absolute box coordinates.
width: Maximum value for width of absolute box coordinates.
width: Maximum value for width of absolute box coordinates.
check_range: If True, checks if the coordinates are normalized or not.
check_range: If True, checks if the coordinates are normalized or not.
maximum_normalized_coordinate: Maximum coordinate value to be considered
maximum_normalized_coordinate: Maximum coordinate value to be considered
as
as
normalized, default to 1.1.
normalized, default to 1.1.
scope: name scope.
scope: name scope.
Returns:
Returns:
...
@@ -838,9 +850,10 @@ def to_absolute_coordinates(boxlist,
...
@@ -838,9 +850,10 @@ def to_absolute_coordinates(boxlist,
if
check_range
:
if
check_range
:
box_maximum
=
tf
.
reduce_max
(
boxlist
.
get
())
box_maximum
=
tf
.
reduce_max
(
boxlist
.
get
())
max_assert
=
tf
.
Assert
(
max_assert
=
tf
.
Assert
(
tf
.
greater_equal
(
maximum_normalized_coordinate
,
box_maximum
),
tf
.
greater_equal
(
maximum_normalized_coordinate
,
box_maximum
),
[
[
'maximum box coordinate value is larger '
'maximum box coordinate value is larger '
'than %f: '
%
maximum_normalized_coordinate
,
box_maximum
])
'than %f: '
%
maximum_normalized_coordinate
,
box_maximum
])
with
tf
.
control_dependencies
([
max_assert
]):
with
tf
.
control_dependencies
([
max_assert
]):
width
=
tf
.
identity
(
width
)
width
=
tf
.
identity
(
width
)
...
@@ -924,13 +937,15 @@ def refine_boxes(pool_boxes,
...
@@ -924,13 +937,15 @@ def refine_boxes(pool_boxes,
if
not
pool_boxes
.
has_field
(
'scores'
):
if
not
pool_boxes
.
has_field
(
'scores'
):
raise
ValueError
(
'pool_boxes must have a
\'
scores
\'
field'
)
raise
ValueError
(
'pool_boxes must have a
\'
scores
\'
field'
)
nms_boxes
=
non_max_suppression
(
nms_boxes
=
non_max_suppression
(
pool_boxes
,
nms_iou_thresh
,
pool_boxes
,
nms_iou_thresh
,
nms_max_detections
)
nms_max_detections
)
return
box_voting
(
nms_boxes
,
pool_boxes
,
voting_iou_thresh
)
return
box_voting
(
nms_boxes
,
pool_boxes
,
voting_iou_thresh
)
def
box_voting
(
selected_boxes
,
pool_boxes
,
iou_thresh
=
0.5
):
def
box_voting
(
selected_boxes
,
pool_boxes
,
iou_thresh
=
0.5
):
"""Performs box voting as described in S. Gidaris and N. Komodakis, ICCV 2015.
"""Performs box voting as described in S. Gidaris and N.
Komodakis, ICCV 2015.
Performs box voting as described in 'Object detection via a multi-region &
Performs box voting as described in 'Object detection via a multi-region &
semantic segmentation-aware CNN model', Gidaris and Komodakis, ICCV 2015. For
semantic segmentation-aware CNN model', Gidaris and Komodakis, ICCV 2015. For
...
@@ -972,9 +987,10 @@ def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5):
...
@@ -972,9 +987,10 @@ def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5):
# match to any boxes in pool_boxes. For such boxes without any matches, we
# match to any boxes in pool_boxes. For such boxes without any matches, we
# should return the original boxes without voting.
# should return the original boxes without voting.
match_assert
=
tf
.
Assert
(
match_assert
=
tf
.
Assert
(
tf
.
reduce_all
(
tf
.
greater
(
num_matches
,
0
)),
tf
.
reduce_all
(
tf
.
greater
(
num_matches
,
0
)),
[
[
'Each box in selected_boxes must match with at least one box '
'Each box in selected_boxes must match with at least one box '
'in pool_boxes.'
])
'in pool_boxes.'
])
scores
=
tf
.
expand_dims
(
pool_boxes
.
get_field
(
'scores'
),
1
)
scores
=
tf
.
expand_dims
(
pool_boxes
.
get_field
(
'scores'
),
1
)
scores_assert
=
tf
.
Assert
(
scores_assert
=
tf
.
Assert
(
...
@@ -993,9 +1009,7 @@ def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5):
...
@@ -993,9 +1009,7 @@ def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5):
return
averaged_boxes
return
averaged_boxes
def
get_minimal_coverage_box
(
boxlist
,
def
get_minimal_coverage_box
(
boxlist
,
default_box
=
None
,
scope
=
None
):
default_box
=
None
,
scope
=
None
):
"""Creates a single bounding box which covers all boxes in the boxlist.
"""Creates a single bounding box which covers all boxes in the boxlist.
Args:
Args:
...
@@ -1045,9 +1059,9 @@ def sample_boxes_by_jittering(boxlist,
...
@@ -1045,9 +1059,9 @@ def sample_boxes_by_jittering(boxlist,
boxlist: A boxlist containing N boxes in normalized coordinates.
boxlist: A boxlist containing N boxes in normalized coordinates.
num_boxes_to_sample: A positive integer containing the number of boxes to
num_boxes_to_sample: A positive integer containing the number of boxes to
sample.
sample.
stddev: Standard deviation. This is used to draw random offsets for the
stddev: Standard deviation. This is used to draw random offsets for the
box
box
corners from a normal distribution. The offset is multiplied by the
corners from a normal distribution. The offset is multiplied by the
box
box
size so will be larger in terms of pixels for larger boxes.
size so will be larger in terms of pixels for larger boxes.
scope: Name scope.
scope: Name scope.
Returns:
Returns:
...
@@ -1056,11 +1070,10 @@ def sample_boxes_by_jittering(boxlist,
...
@@ -1056,11 +1070,10 @@ def sample_boxes_by_jittering(boxlist,
"""
"""
with
tf
.
name_scope
(
scope
,
'SampleBoxesByJittering'
):
with
tf
.
name_scope
(
scope
,
'SampleBoxesByJittering'
):
num_boxes
=
boxlist
.
num_boxes
()
num_boxes
=
boxlist
.
num_boxes
()
box_indices
=
tf
.
random_uniform
(
box_indices
=
tf
.
random_uniform
([
num_boxes_to_sample
],
[
num_boxes_to_sample
],
minval
=
0
,
minval
=
0
,
maxval
=
num_boxes
,
maxval
=
num_boxes
,
dtype
=
tf
.
int32
)
dtype
=
tf
.
int32
)
sampled_boxes
=
tf
.
gather
(
boxlist
.
get
(),
box_indices
)
sampled_boxes
=
tf
.
gather
(
boxlist
.
get
(),
box_indices
)
sampled_boxes_height
=
sampled_boxes
[:,
2
]
-
sampled_boxes
[:,
0
]
sampled_boxes_height
=
sampled_boxes
[:,
2
]
-
sampled_boxes
[:,
0
]
sampled_boxes_width
=
sampled_boxes
[:,
3
]
-
sampled_boxes
[:,
1
]
sampled_boxes_width
=
sampled_boxes
[:,
3
]
-
sampled_boxes
[:,
1
]
...
...
official/vision/detection/utils/object_detection/faster_rcnn_box_coder.py
View file @
999fae62
...
@@ -12,7 +12,6 @@
...
@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
# ==============================================================================
# ==============================================================================
"""Faster RCNN box coder.
"""Faster RCNN box coder.
Faster RCNN box coder follows the coding schema described below:
Faster RCNN box coder follows the coding schema described below:
...
@@ -43,9 +42,9 @@ class FasterRcnnBoxCoder(box_coder.BoxCoder):
...
@@ -43,9 +42,9 @@ class FasterRcnnBoxCoder(box_coder.BoxCoder):
"""Constructor for FasterRcnnBoxCoder.
"""Constructor for FasterRcnnBoxCoder.
Args:
Args:
scale_factors: List of 4 positive scalars to scale ty, tx, th and tw.
scale_factors: List of 4 positive scalars to scale ty, tx, th and tw.
If
If
set to None, does not perform scaling. For Faster RCNN,
set to None, does not perform scaling. For Faster RCNN,
the open-source
the open-source
implementation recommends using [10.0, 10.0, 5.0, 5.0].
implementation recommends using [10.0, 10.0, 5.0, 5.0].
"""
"""
if
scale_factors
:
if
scale_factors
:
assert
len
(
scale_factors
)
==
4
assert
len
(
scale_factors
)
==
4
...
...
official/vision/detection/utils/object_detection/matcher.py
View file @
999fae62
...
@@ -12,7 +12,6 @@
...
@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
# ==============================================================================
# ==============================================================================
"""Matcher interface and Match class.
"""Matcher interface and Match class.
This module defines the Matcher interface and the Match object. The job of the
This module defines the Matcher interface and the Match object. The job of the
...
@@ -49,9 +48,9 @@ class Match(object):
...
@@ -49,9 +48,9 @@ class Match(object):
Args:
Args:
match_results: Integer tensor of shape [N] with (1) match_results[i]>=0,
match_results: Integer tensor of shape [N] with (1) match_results[i]>=0,
meaning that column i is matched with row match_results[i].
meaning that column i is matched with row match_results[i].
(2)
(2)
match_results[i]=-1, meaning that column i is not matched.
match_results[i]=-1, meaning that column i is not matched.
(3)
(3)
match_results[i]=-2, meaning that column i is ignored.
match_results[i]=-2, meaning that column i is ignored.
Raises:
Raises:
ValueError: if match_results does not have rank 1 or is not an
ValueError: if match_results does not have rank 1 or is not an
...
@@ -168,8 +167,7 @@ class Match(object):
...
@@ -168,8 +167,7 @@ class Match(object):
def
_reshape_and_cast
(
self
,
t
):
def
_reshape_and_cast
(
self
,
t
):
return
tf
.
cast
(
tf
.
reshape
(
t
,
[
-
1
]),
tf
.
int32
)
return
tf
.
cast
(
tf
.
reshape
(
t
,
[
-
1
]),
tf
.
int32
)
def
gather_based_on_match
(
self
,
input_tensor
,
unmatched_value
,
def
gather_based_on_match
(
self
,
input_tensor
,
unmatched_value
,
ignored_value
):
ignored_value
):
"""Gathers elements from `input_tensor` based on match results.
"""Gathers elements from `input_tensor` based on match results.
For columns that are matched to a row, gathered_tensor[col] is set to
For columns that are matched to a row, gathered_tensor[col] is set to
...
@@ -190,16 +188,15 @@ class Match(object):
...
@@ -190,16 +188,15 @@ class Match(object):
The shape of the gathered tensor is [match_results.shape[0]] +
The shape of the gathered tensor is [match_results.shape[0]] +
input_tensor.shape[1:].
input_tensor.shape[1:].
"""
"""
input_tensor
=
tf
.
concat
(
[
tf
.
stack
([
ignored_value
,
unmatched_value
]),
input_tensor
=
tf
.
concat
(
input_tensor
],
axis
=
0
)
[
tf
.
stack
([
ignored_value
,
unmatched_value
]),
input_tensor
],
axis
=
0
)
gather_indices
=
tf
.
maximum
(
self
.
match_results
+
2
,
0
)
gather_indices
=
tf
.
maximum
(
self
.
match_results
+
2
,
0
)
gathered_tensor
=
tf
.
gather
(
input_tensor
,
gather_indices
)
gathered_tensor
=
tf
.
gather
(
input_tensor
,
gather_indices
)
return
gathered_tensor
return
gathered_tensor
class
Matcher
(
object
):
class
Matcher
(
object
):
"""Abstract base class for matcher.
"""Abstract base class for matcher."""
"""
__metaclass__
=
ABCMeta
__metaclass__
=
ABCMeta
def
match
(
self
,
similarity_matrix
,
scope
=
None
,
**
params
):
def
match
(
self
,
similarity_matrix
,
scope
=
None
,
**
params
):
...
@@ -212,8 +209,8 @@ class Matcher(object):
...
@@ -212,8 +209,8 @@ class Matcher(object):
similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
where higher value means more similar.
where higher value means more similar.
scope: Op scope name. Defaults to 'Match' if None.
scope: Op scope name. Defaults to 'Match' if None.
**params: Additional keyword arguments for specific implementations of
**params: Additional keyword arguments for specific implementations of
the
the
Matcher.
Matcher.
Returns:
Returns:
A Match object with the results of matching.
A Match object with the results of matching.
...
@@ -230,8 +227,8 @@ class Matcher(object):
...
@@ -230,8 +227,8 @@ class Matcher(object):
Args:
Args:
similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
where higher value means more similar.
where higher value means more similar.
**params: Additional keyword arguments for specific implementations of
**params: Additional keyword arguments for specific implementations of
the
the
Matcher.
Matcher.
Returns:
Returns:
match_results: Integer tensor of shape [M]: match_results[i]>=0 means
match_results: Integer tensor of shape [M]: match_results[i]>=0 means
...
...
official/vision/detection/utils/object_detection/minibatch_sampler.py
View file @
999fae62
...
@@ -12,7 +12,6 @@
...
@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
# ==============================================================================
# ==============================================================================
"""Base minibatch sampler module.
"""Base minibatch sampler module.
The job of the minibatch_sampler is to subsample a minibatch based on some
The job of the minibatch_sampler is to subsample a minibatch based on some
...
@@ -53,8 +52,8 @@ class MinibatchSampler(object):
...
@@ -53,8 +52,8 @@ class MinibatchSampler(object):
Args:
Args:
indicator: boolean tensor of shape [N] whose True entries can be sampled.
indicator: boolean tensor of shape [N] whose True entries can be sampled.
batch_size: desired batch size.
batch_size: desired batch size.
**params: additional keyword arguments for specific implementations of
**params: additional keyword arguments for specific implementations of
the
the
MinibatchSampler.
MinibatchSampler.
Returns:
Returns:
sample_indicator: boolean tensor of shape [N] whose True entries have been
sample_indicator: boolean tensor of shape [N] whose True entries have been
...
@@ -72,8 +71,8 @@ class MinibatchSampler(object):
...
@@ -72,8 +71,8 @@ class MinibatchSampler(object):
is returned.
is returned.
Args:
Args:
indicator: a 1-dimensional boolean tensor indicating which elements
indicator: a 1-dimensional boolean tensor indicating which elements
are
are
allowed to be sampled and which are not.
allowed to be sampled and which are not.
num_samples: int32 scalar tensor
num_samples: int32 scalar tensor
Returns:
Returns:
...
...
official/vision/detection/utils/object_detection/ops.py
View file @
999fae62
...
@@ -12,7 +12,6 @@
...
@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
# ==============================================================================
# ==============================================================================
"""A module for helper tensorflow ops.
"""A module for helper tensorflow ops.
This is originally implemented in TensorFlow Object Detection API.
This is originally implemented in TensorFlow Object Detection API.
...
@@ -37,7 +36,7 @@ def indices_to_dense_vector(indices,
...
@@ -37,7 +36,7 @@ def indices_to_dense_vector(indices,
Args:
Args:
indices: 1d Tensor with integer indices which are to be set to
indices: 1d Tensor with integer indices which are to be set to
indices_values.
indices_values.
size: scalar with size (integer) of output Tensor.
size: scalar with size (integer) of output Tensor.
indices_value: values of elements specified by indices in the output vector
indices_value: values of elements specified by indices in the output vector
default_value: values of other elements in the output vector.
default_value: values of other elements in the output vector.
...
@@ -61,10 +60,10 @@ def matmul_gather_on_zeroth_axis(params, indices, scope=None):
...
@@ -61,10 +60,10 @@ def matmul_gather_on_zeroth_axis(params, indices, scope=None):
TODO(rathodv, jonathanhuang): enable sparse matmul option.
TODO(rathodv, jonathanhuang): enable sparse matmul option.
Args:
Args:
params: A float32 Tensor. The tensor from which to gather values.
params: A float32 Tensor. The tensor from which to gather values.
Must be at
Must be at
least rank 1.
least rank 1.
indices: A Tensor. Must be one of the following types: int32, int64.
indices: A Tensor. Must be one of the following types: int32, int64.
Must be
Must be
in range [0, params.shape[0])
in range [0, params.shape[0])
scope: A name for the operation (optional).
scope: A name for the operation (optional).
Returns:
Returns:
...
...
official/vision/detection/utils/object_detection/preprocessor.py
View file @
999fae62
...
@@ -50,10 +50,9 @@ def _flip_boxes_left_right(boxes):
...
@@ -50,10 +50,9 @@ def _flip_boxes_left_right(boxes):
"""Left-right flip the boxes.
"""Left-right flip the boxes.
Args:
Args:
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. Boxes
Boxes are in normalized form meaning their coordinates vary
are in normalized form meaning their coordinates vary between [0, 1]. Each
between [0, 1].
row is in the form of [ymin, xmin, ymax, xmax].
Each row is in the form of [ymin, xmin, ymax, xmax].
Returns:
Returns:
Flipped boxes.
Flipped boxes.
...
@@ -69,8 +68,8 @@ def _flip_masks_left_right(masks):
...
@@ -69,8 +68,8 @@ def _flip_masks_left_right(masks):
"""Left-right flip masks.
"""Left-right flip masks.
Args:
Args:
masks: rank 3 float32 tensor with shape
masks: rank 3 float32 tensor with shape
[num_instances, height, width]
[num_instances, height, width]
representing instance masks.
representing instance masks.
Returns:
Returns:
flipped masks: rank 3 float32 tensor with shape
flipped masks: rank 3 float32 tensor with shape
...
@@ -79,7 +78,9 @@ def _flip_masks_left_right(masks):
...
@@ -79,7 +78,9 @@ def _flip_masks_left_right(masks):
return
masks
[:,
:,
::
-
1
]
return
masks
[:,
:,
::
-
1
]
def
keypoint_flip_horizontal
(
keypoints
,
flip_point
,
flip_permutation
,
def
keypoint_flip_horizontal
(
keypoints
,
flip_point
,
flip_permutation
,
scope
=
None
):
scope
=
None
):
"""Flips the keypoints horizontally around the flip_point.
"""Flips the keypoints horizontally around the flip_point.
...
@@ -91,9 +92,9 @@ def keypoint_flip_horizontal(keypoints, flip_point, flip_permutation,
...
@@ -91,9 +92,9 @@ def keypoint_flip_horizontal(keypoints, flip_point, flip_permutation,
flip_point: (float) scalar tensor representing the x coordinate to flip the
flip_point: (float) scalar tensor representing the x coordinate to flip the
keypoints around.
keypoints around.
flip_permutation: rank 1 int32 tensor containing the keypoint flip
flip_permutation: rank 1 int32 tensor containing the keypoint flip
permutation. This specifies the mapping from original keypoint indices
permutation. This specifies the mapping from original keypoint indices
to
to
the flipped keypoint indices. This is used primarily for keypoints
the flipped keypoint indices. This is used primarily for keypoints
that
that
are not reflection invariant. E.g. Suppose there are 3 keypoints
are not reflection invariant. E.g. Suppose there are 3 keypoints
representing ['head', 'right_eye', 'left_eye'], then a logical choice for
representing ['head', 'right_eye', 'left_eye'], then a logical choice for
flip_permutation might be [0, 2, 1] since we want to swap the 'left_eye'
flip_permutation might be [0, 2, 1] since we want to swap the 'left_eye'
and 'right_eye' after a horizontal flip.
and 'right_eye' after a horizontal flip.
...
@@ -190,19 +191,16 @@ def random_horizontal_flip(image,
...
@@ -190,19 +191,16 @@ def random_horizontal_flip(image,
Args:
Args:
image: rank 3 float32 tensor with shape [height, width, channels].
image: rank 3 float32 tensor with shape [height, width, channels].
boxes: (optional) rank 2 float32 tensor with shape [N, 4]
boxes: (optional) rank 2 float32 tensor with shape [N, 4] containing the
containing the bounding boxes.
bounding boxes. Boxes are in normalized form meaning their coordinates
Boxes are in normalized form meaning their coordinates vary
vary between [0, 1]. Each row is in the form of [ymin, xmin, ymax, xmax].
between [0, 1].
masks: (optional) rank 3 float32 tensor with shape [num_instances, height,
Each row is in the form of [ymin, xmin, ymax, xmax].
width] containing instance masks. The masks are of the same height, width
masks: (optional) rank 3 float32 tensor with shape
as the input `image`.
[num_instances, height, width] containing instance masks. The masks
keypoints: (optional) rank 3 float32 tensor with shape [num_instances,
are of the same height, width as the input `image`.
num_keypoints, 2]. The keypoints are in y-x normalized coordinates.
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x
normalized coordinates.
keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip
keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip
permutation.
permutation.
seed: random seed
seed: random seed
Returns:
Returns:
...
@@ -369,20 +367,19 @@ def resize_to_range(image,
...
@@ -369,20 +367,19 @@ def resize_to_range(image,
Args:
Args:
image: A 3D tensor of shape [height, width, channels]
image: A 3D tensor of shape [height, width, channels]
masks: (optional) rank 3 float32 tensor with shape
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height,
[num_instances, height,
width] containing instance masks.
width] containing instance masks.
min_dimension: (optional) (scalar) desired size of the smaller image
min_dimension: (optional) (scalar) desired size of the smaller image
dimension.
dimension.
max_dimension: (optional) (scalar) maximum allowed size
max_dimension: (optional) (scalar) maximum allowed size
of the larger image
of the larger image
dimension.
dimension.
method: (optional) interpolation method used in resizing. Defaults to
method: (optional) interpolation method used in resizing. Defaults to
BILINEAR.
BILINEAR.
align_corners: bool. If true, exactly align all 4 corners of the input
align_corners: bool. If true, exactly align all 4 corners of the input and
and output. Defaults to False.
output. Defaults to False.
pad_to_max_dimension: Whether to resize the image and pad it with zeros
pad_to_max_dimension: Whether to resize the image and pad it with zeros so
so the resulting image is of the spatial size
the resulting image is of the spatial size [max_dimension, max_dimension].
[max_dimension, max_dimension]. If masks are included they are padded
If masks are included they are padded similarly.
similarly.
Returns:
Returns:
Note that the position of the resized_image_shape changes based on whether
Note that the position of the resized_image_shape changes based on whether
...
@@ -410,8 +407,8 @@ def resize_to_range(image,
...
@@ -410,8 +407,8 @@ def resize_to_range(image,
new_image
=
tf
.
image
.
resize
(
image
,
new_size
[:
-
1
],
method
=
method
)
new_image
=
tf
.
image
.
resize
(
image
,
new_size
[:
-
1
],
method
=
method
)
if
pad_to_max_dimension
:
if
pad_to_max_dimension
:
new_image
=
tf
.
image
.
pad_to_bounding_box
(
new_image
=
tf
.
image
.
pad_to_bounding_box
(
new_image
,
0
,
0
,
max_dimension
,
new_image
,
0
,
0
,
max_dimension
,
max_dimension
)
max_dimension
)
result
=
[
new_image
]
result
=
[
new_image
]
if
masks
is
not
None
:
if
masks
is
not
None
:
...
@@ -422,8 +419,8 @@ def resize_to_range(image,
...
@@ -422,8 +419,8 @@ def resize_to_range(image,
method
=
tf
.
image
.
ResizeMethod
.
NEAREST_NEIGHBOR
)
method
=
tf
.
image
.
ResizeMethod
.
NEAREST_NEIGHBOR
)
new_masks
=
tf
.
squeeze
(
new_masks
,
3
)
new_masks
=
tf
.
squeeze
(
new_masks
,
3
)
if
pad_to_max_dimension
:
if
pad_to_max_dimension
:
new_masks
=
tf
.
image
.
pad_to_bounding_box
(
new_masks
=
tf
.
image
.
pad_to_bounding_box
(
new_masks
,
0
,
0
,
max_dimension
,
new_masks
,
0
,
0
,
max_dimension
,
max_dimension
)
max_dimension
)
result
.
append
(
new_masks
)
result
.
append
(
new_masks
)
result
.
append
(
new_size
)
result
.
append
(
new_size
)
...
@@ -500,11 +497,10 @@ def scale_boxes_to_pixel_coordinates(image, boxes, keypoints=None):
...
@@ -500,11 +497,10 @@ def scale_boxes_to_pixel_coordinates(image, boxes, keypoints=None):
Args:
Args:
image: A 3D float32 tensor of shape [height, width, channels].
image: A 3D float32 tensor of shape [height, width, channels].
boxes: A 2D float32 tensor of shape [num_boxes, 4] containing the bounding
boxes: A 2D float32 tensor of shape [num_boxes, 4] containing the bounding
boxes in normalized coordinates. Each row is of the form
boxes in normalized coordinates. Each row is of the form [ymin, xmin,
[ymin, xmin, ymax, xmax].
ymax, xmax].
keypoints: (optional) rank 3 float32 tensor with shape
keypoints: (optional) rank 3 float32 tensor with shape [num_instances,
[num_instances, num_keypoints, 2]. The keypoints are in y-x normalized
num_keypoints, 2]. The keypoints are in y-x normalized coordinates.
coordinates.
Returns:
Returns:
image: unchanged input image.
image: unchanged input image.
...
...
official/vision/detection/utils/object_detection/region_similarity_calculator.py
View file @
999fae62
...
@@ -12,7 +12,6 @@
...
@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
# ==============================================================================
# ==============================================================================
"""Region Similarity Calculators for BoxLists.
"""Region Similarity Calculators for BoxLists.
Region Similarity Calculators compare a pairwise measure of similarity
Region Similarity Calculators compare a pairwise measure of similarity
...
...
Prev
1
…
5
6
7
8
9
10
11
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment