Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
cc748b2a
Commit
cc748b2a
authored
Sep 02, 2020
by
Abdullah Rashwan
Committed by
A. Unique TensorFlower
Sep 02, 2020
Browse files
Internal change
PiperOrigin-RevId: 329754787
parent
2f788e1d
Changes
110
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
5317 additions
and
0 deletions
+5317
-0
official/vision/beta/modeling/maskrcnn_model_test.py
official/vision/beta/modeling/maskrcnn_model_test.py
+279
-0
official/vision/beta/modeling/retinanet_model.py
official/vision/beta/modeling/retinanet_model.py
+141
-0
official/vision/beta/modeling/retinanet_model_test.py
official/vision/beta/modeling/retinanet_model_test.py
+222
-0
official/vision/beta/modeling/video_classification_model.py
official/vision/beta/modeling/video_classification_model.py
+114
-0
official/vision/beta/modeling/video_classification_model_test.py
...l/vision/beta/modeling/video_classification_model_test.py
+90
-0
official/vision/beta/ops/anchor.py
official/vision/beta/ops/anchor.py
+342
-0
official/vision/beta/ops/anchor_test.py
official/vision/beta/ops/anchor_test.py
+168
-0
official/vision/beta/ops/box_ops.py
official/vision/beta/ops/box_ops.py
+639
-0
official/vision/beta/ops/box_ops_test.py
official/vision/beta/ops/box_ops_test.py
+859
-0
official/vision/beta/ops/experimental/anchor_generator.py
official/vision/beta/ops/experimental/anchor_generator.py
+182
-0
official/vision/beta/ops/experimental/anchor_generator_test.py
...ial/vision/beta/ops/experimental/anchor_generator_test.py
+158
-0
official/vision/beta/ops/mask_ops.py
official/vision/beta/ops/mask_ops.py
+190
-0
official/vision/beta/ops/mask_ops_test.py
official/vision/beta/ops/mask_ops_test.py
+55
-0
official/vision/beta/ops/nms.py
official/vision/beta/ops/nms.py
+202
-0
official/vision/beta/ops/nms_test.py
official/vision/beta/ops/nms_test.py
+104
-0
official/vision/beta/ops/preprocess_ops.py
official/vision/beta/ops/preprocess_ops.py
+557
-0
official/vision/beta/ops/preprocess_ops_3d.py
official/vision/beta/ops/preprocess_ops_3d.py
+260
-0
official/vision/beta/ops/preprocess_ops_3d_test.py
official/vision/beta/ops/preprocess_ops_3d_test.py
+142
-0
official/vision/beta/ops/preprocess_ops_test.py
official/vision/beta/ops/preprocess_ops_test.py
+230
-0
official/vision/beta/ops/sampling_ops.py
official/vision/beta/ops/sampling_ops.py
+383
-0
No files found.
official/vision/beta/modeling/maskrcnn_model_test.py
0 → 100644
View file @
cc748b2a
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for maskrcnn_model.py."""
# Import libraries
from
absl.testing
import
parameterized
import
numpy
as
np
import
tensorflow
as
tf
from
official.vision.beta.modeling
import
maskrcnn_model
from
official.vision.beta.modeling.backbones
import
resnet
from
official.vision.beta.modeling.decoders
import
fpn
from
official.vision.beta.modeling.heads
import
dense_prediction_heads
from
official.vision.beta.modeling.heads
import
instance_heads
from
official.vision.beta.modeling.layers
import
detection_generator
from
official.vision.beta.modeling.layers
import
mask_sampler
from
official.vision.beta.modeling.layers
import
roi_aligner
from
official.vision.beta.modeling.layers
import
roi_generator
from
official.vision.beta.modeling.layers
import
roi_sampler
from
official.vision.beta.ops
import
anchor
class
MaskRCNNModelTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
3
,
3
,
7
,
3
,
[
1.0
],
50
,
False
,
False
,
41953246
),
)
def
test_num_params
(
self
,
num_classes
,
min_level
,
max_level
,
num_scales
,
aspect_ratios
,
resnet_model_id
,
use_separable_conv
,
include_mask
,
expected_num_params
):
num_anchors_per_location
=
num_scales
*
len
(
aspect_ratios
)
image_size
=
384
images
=
np
.
random
.
rand
(
2
,
image_size
,
image_size
,
3
)
image_shape
=
np
.
array
([[
image_size
,
image_size
],
[
image_size
,
image_size
]])
anchor_boxes
=
anchor
.
Anchor
(
min_level
=
min_level
,
max_level
=
max_level
,
num_scales
=
num_scales
,
aspect_ratios
=
aspect_ratios
,
anchor_size
=
3
,
image_size
=
(
image_size
,
image_size
)).
multilevel_boxes
for
l
in
anchor_boxes
:
anchor_boxes
[
l
]
=
tf
.
tile
(
tf
.
expand_dims
(
anchor_boxes
[
l
],
axis
=
0
),
[
2
,
1
,
1
,
1
])
backbone
=
resnet
.
ResNet
(
model_id
=
resnet_model_id
)
decoder
=
fpn
.
FPN
(
input_specs
=
backbone
.
output_specs
,
min_level
=
min_level
,
max_level
=
max_level
,
use_separable_conv
=
use_separable_conv
)
rpn_head
=
dense_prediction_heads
.
RPNHead
(
min_level
=
min_level
,
max_level
=
max_level
,
num_anchors_per_location
=
num_anchors_per_location
,
num_convs
=
1
)
detection_head
=
instance_heads
.
DetectionHead
(
num_classes
=
num_classes
)
roi_generator_obj
=
roi_generator
.
MultilevelROIGenerator
()
roi_sampler_obj
=
roi_sampler
.
ROISampler
()
roi_aligner_obj
=
roi_aligner
.
MultilevelROIAligner
()
detection_generator_obj
=
detection_generator
.
DetectionGenerator
()
if
include_mask
:
mask_head
=
instance_heads
.
MaskHead
(
num_classes
=
num_classes
,
upsample_factor
=
2
)
mask_sampler_obj
=
mask_sampler
.
MaskSampler
(
mask_target_size
=
28
,
num_sampled_masks
=
1
)
mask_roi_aligner_obj
=
roi_aligner
.
MultilevelROIAligner
(
crop_size
=
14
)
else
:
mask_head
=
None
mask_sampler_obj
=
None
mask_roi_aligner_obj
=
None
model
=
maskrcnn_model
.
MaskRCNNModel
(
backbone
,
decoder
,
rpn_head
,
detection_head
,
roi_generator_obj
,
roi_sampler_obj
,
roi_aligner_obj
,
detection_generator_obj
,
mask_head
,
mask_sampler_obj
,
mask_roi_aligner_obj
)
gt_boxes
=
np
.
array
(
[[[
10
,
10
,
15
,
15
],
[
2.5
,
2.5
,
7.5
,
7.5
],
[
-
1
,
-
1
,
-
1
,
-
1
]],
[[
100
,
100
,
150
,
150
],
[
-
1
,
-
1
,
-
1
,
-
1
],
[
-
1
,
-
1
,
-
1
,
-
1
]]],
dtype
=
np
.
float32
)
gt_classes
=
np
.
array
([[
2
,
1
,
-
1
],
[
1
,
-
1
,
-
1
]],
dtype
=
np
.
int32
)
if
include_mask
:
gt_masks
=
np
.
ones
((
2
,
3
,
100
,
100
))
else
:
gt_masks
=
None
_
=
model
(
images
,
image_shape
,
anchor_boxes
,
gt_boxes
,
gt_classes
,
gt_masks
,
training
=
True
)
self
.
assertEqual
(
expected_num_params
,
model
.
count_params
())
@
parameterized
.
parameters
(
(
False
,
False
,),
(
False
,
True
,),
(
True
,
False
,),
(
True
,
True
,),
)
def
test_forward
(
self
,
include_mask
,
training
):
num_classes
=
3
min_level
=
3
max_level
=
4
num_scales
=
3
aspect_ratios
=
[
1.0
]
image_size
=
(
256
,
256
)
images
=
np
.
random
.
rand
(
2
,
image_size
[
0
],
image_size
[
1
],
3
)
image_shape
=
np
.
array
([[
224
,
100
],
[
100
,
224
]])
anchor_boxes
=
anchor
.
Anchor
(
min_level
=
min_level
,
max_level
=
max_level
,
num_scales
=
num_scales
,
aspect_ratios
=
aspect_ratios
,
anchor_size
=
3
,
image_size
=
image_size
).
multilevel_boxes
num_anchors_per_location
=
len
(
aspect_ratios
)
*
num_scales
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
None
,
None
,
3
])
backbone
=
resnet
.
ResNet
(
model_id
=
50
,
input_specs
=
input_specs
)
decoder
=
fpn
.
FPN
(
min_level
=
min_level
,
max_level
=
max_level
,
input_specs
=
backbone
.
output_specs
)
rpn_head
=
dense_prediction_heads
.
RPNHead
(
min_level
=
min_level
,
max_level
=
max_level
,
num_anchors_per_location
=
num_anchors_per_location
)
detection_head
=
instance_heads
.
DetectionHead
(
num_classes
=
num_classes
)
roi_generator_obj
=
roi_generator
.
MultilevelROIGenerator
()
roi_sampler_obj
=
roi_sampler
.
ROISampler
()
roi_aligner_obj
=
roi_aligner
.
MultilevelROIAligner
()
detection_generator_obj
=
detection_generator
.
DetectionGenerator
()
if
include_mask
:
mask_head
=
instance_heads
.
MaskHead
(
num_classes
=
num_classes
,
upsample_factor
=
2
)
mask_sampler_obj
=
mask_sampler
.
MaskSampler
(
mask_target_size
=
28
,
num_sampled_masks
=
1
)
mask_roi_aligner_obj
=
roi_aligner
.
MultilevelROIAligner
(
crop_size
=
14
)
else
:
mask_head
=
None
mask_sampler_obj
=
None
mask_roi_aligner_obj
=
None
model
=
maskrcnn_model
.
MaskRCNNModel
(
backbone
,
decoder
,
rpn_head
,
detection_head
,
roi_generator_obj
,
roi_sampler_obj
,
roi_aligner_obj
,
detection_generator_obj
,
mask_head
,
mask_sampler_obj
,
mask_roi_aligner_obj
)
gt_boxes
=
np
.
array
(
[[[
10
,
10
,
15
,
15
],
[
2.5
,
2.5
,
7.5
,
7.5
],
[
-
1
,
-
1
,
-
1
,
-
1
]],
[[
100
,
100
,
150
,
150
],
[
-
1
,
-
1
,
-
1
,
-
1
],
[
-
1
,
-
1
,
-
1
,
-
1
]]],
dtype
=
np
.
float32
)
gt_classes
=
np
.
array
([[
2
,
1
,
-
1
],
[
1
,
-
1
,
-
1
]],
dtype
=
np
.
int32
)
if
include_mask
:
gt_masks
=
np
.
ones
((
2
,
3
,
100
,
100
))
else
:
gt_masks
=
None
results
=
model
(
images
,
image_shape
,
anchor_boxes
,
gt_boxes
,
gt_classes
,
gt_masks
,
training
=
training
)
self
.
assertIn
(
'rpn_boxes'
,
results
)
self
.
assertIn
(
'rpn_scores'
,
results
)
if
training
:
self
.
assertIn
(
'class_targets'
,
results
)
self
.
assertIn
(
'box_targets'
,
results
)
self
.
assertIn
(
'class_outputs'
,
results
)
self
.
assertIn
(
'box_outputs'
,
results
)
if
include_mask
:
self
.
assertIn
(
'mask_outputs'
,
results
)
else
:
self
.
assertIn
(
'detection_boxes'
,
results
)
self
.
assertIn
(
'detection_scores'
,
results
)
self
.
assertIn
(
'detection_classes'
,
results
)
self
.
assertIn
(
'num_detections'
,
results
)
if
include_mask
:
self
.
assertIn
(
'detection_masks'
,
results
)
@
parameterized
.
parameters
(
(
False
,),
(
True
,),
)
def
test_serialize_deserialize
(
self
,
include_mask
):
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
None
,
None
,
3
])
backbone
=
resnet
.
ResNet
(
model_id
=
50
,
input_specs
=
input_specs
)
decoder
=
fpn
.
FPN
(
min_level
=
3
,
max_level
=
7
,
input_specs
=
backbone
.
output_specs
)
rpn_head
=
dense_prediction_heads
.
RPNHead
(
min_level
=
3
,
max_level
=
7
,
num_anchors_per_location
=
3
)
detection_head
=
instance_heads
.
DetectionHead
(
num_classes
=
2
)
roi_generator_obj
=
roi_generator
.
MultilevelROIGenerator
()
roi_sampler_obj
=
roi_sampler
.
ROISampler
()
roi_aligner_obj
=
roi_aligner
.
MultilevelROIAligner
()
detection_generator_obj
=
detection_generator
.
DetectionGenerator
()
if
include_mask
:
mask_head
=
instance_heads
.
MaskHead
(
num_classes
=
2
,
upsample_factor
=
2
)
mask_sampler_obj
=
mask_sampler
.
MaskSampler
(
mask_target_size
=
28
,
num_sampled_masks
=
1
)
mask_roi_aligner_obj
=
roi_aligner
.
MultilevelROIAligner
(
crop_size
=
14
)
else
:
mask_head
=
None
mask_sampler_obj
=
None
mask_roi_aligner_obj
=
None
model
=
maskrcnn_model
.
MaskRCNNModel
(
backbone
,
decoder
,
rpn_head
,
detection_head
,
roi_generator_obj
,
roi_sampler_obj
,
roi_aligner_obj
,
detection_generator_obj
,
mask_head
,
mask_sampler_obj
,
mask_roi_aligner_obj
)
config
=
model
.
get_config
()
new_model
=
maskrcnn_model
.
MaskRCNNModel
.
from_config
(
config
)
# Validate that the config can be forced to JSON.
_
=
new_model
.
to_json
()
# If the serialization was successful, the new config should match the old.
self
.
assertAllEqual
(
model
.
get_config
(),
new_model
.
get_config
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/beta/modeling/retinanet_model.py
0 → 100644
View file @
cc748b2a
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""RetinaNet."""
# Import libraries
import
tensorflow
as
tf
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
RetinaNetModel
(
tf
.
keras
.
Model
):
"""The RetinaNet model class."""
def
__init__
(
self
,
backbone
,
decoder
,
head
,
detection_generator
,
**
kwargs
):
"""Classification initialization function.
Args:
backbone: `tf.keras.Model` a backbone network.
decoder: `tf.keras.Model` a decoder network.
head: `RetinaNetHead`, the RetinaNet head.
detection_generator: the detection generator.
**kwargs: keyword arguments to be passed.
"""
super
(
RetinaNetModel
,
self
).
__init__
(
**
kwargs
)
self
.
_config_dict
=
{
'backbone'
:
backbone
,
'decoder'
:
decoder
,
'head'
:
head
,
'detection_generator'
:
detection_generator
,
}
self
.
_backbone
=
backbone
self
.
_decoder
=
decoder
self
.
_head
=
head
self
.
_detection_generator
=
detection_generator
def
call
(
self
,
images
,
image_shape
=
None
,
anchor_boxes
=
None
,
training
=
None
):
"""Forward pass of the RetinaNet model.
Args:
images: `Tensor`, the input batched images, whose shape is
[batch, height, width, 3].
image_shape: `Tensor`, the actual shape of the input images, whose shape
is [batch, 2] where the last dimension is [height, width]. Note that
this is the actual image shape excluding paddings. For example, images
in the batch may be resized into different shapes before padding to the
fixed size.
anchor_boxes: a dict of tensors which includes multilevel anchors.
- key: `int`, the level of the multilevel predictions.
- values: `Tensor`, the anchor coordinates of a particular feature
level, whose shape is [height_l, width_l, num_anchors_per_location].
training: `bool`, indicating whether it is in training mode.
Returns:
scores: a dict of tensors which includes scores of the predictions.
- key: `int`, the level of the multilevel predictions.
- values: `Tensor`, the box scores predicted from a particular feature
level, whose shape is
[batch, height_l, width_l, num_classes * num_anchors_per_location].
boxes: a dict of tensors which includes coordinates of the predictions.
- key: `int`, the level of the multilevel predictions.
- values: `Tensor`, the box coordinates predicted from a particular
feature level, whose shape is
[batch, height_l, width_l, 4 * num_anchors_per_location].
"""
# Feature extraction.
features
=
self
.
backbone
(
images
)
if
self
.
decoder
:
features
=
self
.
decoder
(
features
)
# Dense prediction.
raw_scores
,
raw_boxes
=
self
.
head
(
features
)
if
training
:
return
{
'cls_outputs'
:
raw_scores
,
'box_outputs'
:
raw_boxes
,
}
else
:
# Post-processing.
final_results
=
self
.
detection_generator
(
raw_boxes
,
raw_scores
,
anchor_boxes
,
image_shape
)
return
{
'detection_boxes'
:
final_results
[
'detection_boxes'
],
'detection_scores'
:
final_results
[
'detection_scores'
],
'detection_classes'
:
final_results
[
'detection_classes'
],
'num_detections'
:
final_results
[
'num_detections'
],
'cls_outputs'
:
raw_scores
,
'box_outputs'
:
raw_boxes
}
@
property
def
checkpoint_items
(
self
):
"""Returns a dictionary of items to be additionally checkpointed."""
items
=
dict
(
backbone
=
self
.
backbone
,
head
=
self
.
head
)
if
self
.
decoder
is
not
None
:
items
.
update
(
decoder
=
self
.
decoder
)
return
items
@
property
def
backbone
(
self
):
return
self
.
_backbone
@
property
def
decoder
(
self
):
return
self
.
_decoder
@
property
def
head
(
self
):
return
self
.
_head
@
property
def
detection_generator
(
self
):
return
self
.
_detection_generator
def
get_config
(
self
):
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
):
return
cls
(
**
config
)
official/vision/beta/modeling/retinanet_model_test.py
0 → 100644
View file @
cc748b2a
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for RetinaNet models."""
# Import libraries
from
absl.testing
import
parameterized
import
numpy
as
np
import
tensorflow
as
tf
from
tensorflow.python.distribute
import
combinations
from
tensorflow.python.distribute
import
strategy_combinations
from
official.vision.beta.modeling
import
retinanet_model
from
official.vision.beta.modeling.backbones
import
resnet
from
official.vision.beta.modeling.decoders
import
fpn
from
official.vision.beta.modeling.heads
import
dense_prediction_heads
from
official.vision.beta.modeling.layers
import
detection_generator
from
official.vision.beta.ops
import
anchor
class
RetinaNetTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
3
,
3
,
7
,
3
,
[
1.0
],
50
,
False
,
256
,
4
,
256
,
32244949
),
)
def
test_num_params
(
self
,
num_classes
,
min_level
,
max_level
,
num_scales
,
aspect_ratios
,
resnet_model_id
,
use_separable_conv
,
fpn_num_filters
,
head_num_convs
,
head_num_filters
,
expected_num_params
):
num_anchors_per_location
=
num_scales
*
len
(
aspect_ratios
)
image_size
=
384
images
=
np
.
random
.
rand
(
2
,
image_size
,
image_size
,
3
)
image_shape
=
np
.
array
([[
image_size
,
image_size
],
[
image_size
,
image_size
]])
anchor_boxes
=
anchor
.
Anchor
(
min_level
=
min_level
,
max_level
=
max_level
,
num_scales
=
num_scales
,
aspect_ratios
=
aspect_ratios
,
anchor_size
=
3
,
image_size
=
(
image_size
,
image_size
)).
multilevel_boxes
for
l
in
anchor_boxes
:
anchor_boxes
[
l
]
=
tf
.
tile
(
tf
.
expand_dims
(
anchor_boxes
[
l
],
axis
=
0
),
[
2
,
1
,
1
,
1
])
backbone
=
resnet
.
ResNet
(
model_id
=
resnet_model_id
)
decoder
=
fpn
.
FPN
(
input_specs
=
backbone
.
output_specs
,
min_level
=
min_level
,
max_level
=
max_level
,
num_filters
=
fpn_num_filters
,
use_separable_conv
=
use_separable_conv
)
head
=
dense_prediction_heads
.
RetinaNetHead
(
min_level
=
min_level
,
max_level
=
max_level
,
num_classes
=
num_classes
,
num_anchors_per_location
=
num_anchors_per_location
,
use_separable_conv
=
use_separable_conv
,
num_convs
=
head_num_convs
,
num_filters
=
head_num_filters
)
generator
=
detection_generator
.
MultilevelDetectionGenerator
(
max_num_detections
=
10
)
model
=
retinanet_model
.
RetinaNetModel
(
backbone
=
backbone
,
decoder
=
decoder
,
head
=
head
,
detection_generator
=
generator
)
_
=
model
(
images
,
image_shape
,
anchor_boxes
,
training
=
True
)
self
.
assertEqual
(
expected_num_params
,
model
.
count_params
())
@
combinations
.
generate
(
combinations
.
combine
(
strategy
=
[
strategy_combinations
.
tpu_strategy
,
strategy_combinations
.
one_device_strategy_gpu
,
],
image_size
=
[(
128
,
128
),],
training
=
[
True
,
False
],
)
)
def
test_forward
(
self
,
strategy
,
image_size
,
training
):
"""Test for creation of a R50-FPN RetinaNet."""
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
num_classes
=
3
min_level
=
3
max_level
=
7
num_scales
=
3
aspect_ratios
=
[
1.0
]
num_anchors_per_location
=
num_scales
*
len
(
aspect_ratios
)
images
=
np
.
random
.
rand
(
2
,
image_size
[
0
],
image_size
[
1
],
3
)
image_shape
=
np
.
array
(
[[
image_size
[
0
],
image_size
[
1
]],
[
image_size
[
0
],
image_size
[
1
]]])
with
strategy
.
scope
():
anchor_gen
=
anchor
.
build_anchor_generator
(
min_level
=
min_level
,
max_level
=
max_level
,
num_scales
=
num_scales
,
aspect_ratios
=
aspect_ratios
,
anchor_size
=
3
)
anchor_boxes
=
anchor_gen
(
image_size
)
for
l
in
anchor_boxes
:
anchor_boxes
[
l
]
=
tf
.
tile
(
tf
.
expand_dims
(
anchor_boxes
[
l
],
axis
=
0
),
[
2
,
1
,
1
,
1
])
backbone
=
resnet
.
ResNet
(
model_id
=
50
)
decoder
=
fpn
.
FPN
(
input_specs
=
backbone
.
output_specs
,
min_level
=
min_level
,
max_level
=
max_level
)
head
=
dense_prediction_heads
.
RetinaNetHead
(
min_level
=
min_level
,
max_level
=
max_level
,
num_classes
=
num_classes
,
num_anchors_per_location
=
num_anchors_per_location
)
generator
=
detection_generator
.
MultilevelDetectionGenerator
(
max_num_detections
=
10
)
model
=
retinanet_model
.
RetinaNetModel
(
backbone
=
backbone
,
decoder
=
decoder
,
head
=
head
,
detection_generator
=
generator
)
model_outputs
=
model
(
images
,
image_shape
,
anchor_boxes
,
training
=
training
)
if
training
:
cls_outputs
=
model_outputs
[
'cls_outputs'
]
box_outputs
=
model_outputs
[
'box_outputs'
]
for
level
in
range
(
min_level
,
max_level
+
1
):
self
.
assertIn
(
level
,
cls_outputs
)
self
.
assertIn
(
level
,
box_outputs
)
self
.
assertAllEqual
([
2
,
image_size
[
0
]
//
2
**
level
,
image_size
[
1
]
//
2
**
level
,
num_classes
*
num_anchors_per_location
],
cls_outputs
[
level
].
numpy
().
shape
)
self
.
assertAllEqual
([
2
,
image_size
[
0
]
//
2
**
level
,
image_size
[
1
]
//
2
**
level
,
4
*
num_anchors_per_location
],
box_outputs
[
level
].
numpy
().
shape
)
else
:
self
.
assertIn
(
'detection_boxes'
,
model_outputs
)
self
.
assertIn
(
'detection_scores'
,
model_outputs
)
self
.
assertIn
(
'detection_classes'
,
model_outputs
)
self
.
assertIn
(
'num_detections'
,
model_outputs
)
self
.
assertAllEqual
(
[
2
,
10
,
4
],
model_outputs
[
'detection_boxes'
].
numpy
().
shape
)
self
.
assertAllEqual
(
[
2
,
10
],
model_outputs
[
'detection_scores'
].
numpy
().
shape
)
self
.
assertAllEqual
(
[
2
,
10
],
model_outputs
[
'detection_classes'
].
numpy
().
shape
)
self
.
assertAllEqual
(
[
2
,],
model_outputs
[
'num_detections'
].
numpy
().
shape
)
def
test_serialize_deserialize
(
self
):
"""Validate the network can be serialized and deserialized."""
num_classes
=
3
min_level
=
3
max_level
=
7
num_scales
=
3
aspect_ratios
=
[
1.0
]
num_anchors_per_location
=
num_scales
*
len
(
aspect_ratios
)
backbone
=
resnet
.
ResNet
(
model_id
=
50
)
decoder
=
fpn
.
FPN
(
input_specs
=
backbone
.
output_specs
,
min_level
=
min_level
,
max_level
=
max_level
)
head
=
dense_prediction_heads
.
RetinaNetHead
(
min_level
=
min_level
,
max_level
=
max_level
,
num_classes
=
num_classes
,
num_anchors_per_location
=
num_anchors_per_location
)
generator
=
detection_generator
.
MultilevelDetectionGenerator
(
max_num_detections
=
10
)
model
=
retinanet_model
.
RetinaNetModel
(
backbone
=
backbone
,
decoder
=
decoder
,
head
=
head
,
detection_generator
=
generator
)
config
=
model
.
get_config
()
new_model
=
retinanet_model
.
RetinaNetModel
.
from_config
(
config
)
# Validate that the config can be forced to JSON.
_
=
new_model
.
to_json
()
# If the serialization was successful, the new config should match the old.
self
.
assertAllEqual
(
model
.
get_config
(),
new_model
.
get_config
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/beta/modeling/video_classification_model.py
0 → 100644
View file @
cc748b2a
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Build video classification models."""
# Import libraries
import
tensorflow
as
tf
layers
=
tf
.
keras
.
layers
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
VideoClassificationModel
(
tf
.
keras
.
Model
):
"""A video classification class builder."""
def
__init__
(
self
,
backbone
,
num_classes
,
input_specs
=
layers
.
InputSpec
(
shape
=
[
None
,
None
,
None
,
None
,
3
]),
dropout_rate
=
0.0
,
kernel_initializer
=
'random_uniform'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
add_head_batch_norm
=
False
,
use_sync_bn
:
bool
=
False
,
norm_momentum
:
float
=
0.99
,
norm_epsilon
:
float
=
0.001
,
**
kwargs
):
"""Video Classification initialization function.
Args:
backbone: a 3d backbone network.
num_classes: `int` number of classes in classification task.
input_specs: `tf.keras.layers.InputSpec` specs of the input tensor.
dropout_rate: `float` rate for dropout regularization.
kernel_initializer: kernel initializer for the dense layer.
kernel_regularizer: tf.keras.regularizers.Regularizer object. Default to
None.
bias_regularizer: tf.keras.regularizers.Regularizer object. Default to
None.
add_head_batch_norm: `bool` whether to add a batch normalization layer
before pool.
use_sync_bn: `bool` if True, use synchronized batch normalization.
norm_momentum: `float` normalization momentum for the moving average.
norm_epsilon: `float` small float added to variance to avoid dividing by
zero.
**kwargs: keyword arguments to be passed.
"""
self
.
_self_setattr_tracking
=
False
self
.
_config_dict
=
{
'backbone'
:
backbone
,
'num_classes'
:
num_classes
,
'input_specs'
:
input_specs
,
'dropout_rate'
:
dropout_rate
,
'kernel_initializer'
:
kernel_initializer
,
'kernel_regularizer'
:
kernel_regularizer
,
'bias_regularizer'
:
bias_regularizer
,
'add_head_batch_norm'
:
add_head_batch_norm
,
'use_sync_bn'
:
use_sync_bn
,
'norm_momentum'
:
norm_momentum
,
'norm_epsilon'
:
norm_epsilon
,
}
self
.
_input_specs
=
input_specs
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_bias_regularizer
=
bias_regularizer
self
.
_backbone
=
backbone
if
use_sync_bn
:
self
.
_norm
=
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
else
:
self
.
_norm
=
tf
.
keras
.
layers
.
BatchNormalization
axis
=
-
1
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
else
1
inputs
=
tf
.
keras
.
Input
(
shape
=
input_specs
.
shape
[
1
:])
endpoints
=
backbone
(
inputs
)
x
=
endpoints
[
max
(
endpoints
.
keys
())]
if
add_head_batch_norm
:
x
=
self
.
_norm
(
axis
=
axis
,
momentum
=
norm_momentum
,
epsilon
=
norm_epsilon
)(
x
)
x
=
tf
.
keras
.
layers
.
GlobalAveragePooling3D
()(
x
)
x
=
tf
.
keras
.
layers
.
Dropout
(
dropout_rate
)(
x
)
x
=
tf
.
keras
.
layers
.
Dense
(
num_classes
,
kernel_initializer
=
kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)(
x
)
super
(
VideoClassificationModel
,
self
).
__init__
(
inputs
=
inputs
,
outputs
=
x
,
**
kwargs
)
@
property
def
checkpoint_items
(
self
):
"""Returns a dictionary of items to be additionally checkpointed."""
return
dict
(
backbone
=
self
.
backbone
)
@
property
def
backbone
(
self
):
return
self
.
_backbone
def
get_config
(
self
):
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
,
custom_objects
=
None
):
return
cls
(
**
config
)
official/vision/beta/modeling/video_classification_model_test.py
0 → 100644
View file @
cc748b2a
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for video classification network."""
# Import libraries
from
absl.testing
import
parameterized
import
numpy
as
np
import
tensorflow
as
tf
from
official.vision.beta.modeling
import
backbones
from
official.vision.beta.modeling
import
video_classification_model
class
VideoClassificationNetworkTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
50
,
8
,
112
,
'relu'
),
(
50
,
8
,
112
,
'swish'
),
)
def
test_resnet3d_network_creation
(
self
,
model_id
,
temporal_size
,
spatial_size
,
activation
):
"""Test for creation of a ResNet3D-50 classifier."""
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
temporal_size
,
spatial_size
,
spatial_size
,
3
])
temporal_strides
=
[
1
,
1
,
1
,
1
]
temporal_kernel_sizes
=
[(
3
,
3
,
3
),
(
3
,
1
,
3
,
1
),
(
3
,
1
,
3
,
1
,
3
,
1
),
(
1
,
3
,
1
)]
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
backbone
=
backbones
.
ResNet3D
(
model_id
=
model_id
,
temporal_strides
=
temporal_strides
,
temporal_kernel_sizes
=
temporal_kernel_sizes
,
input_specs
=
input_specs
,
activation
=
activation
)
num_classes
=
1000
model
=
video_classification_model
.
VideoClassificationModel
(
backbone
=
backbone
,
num_classes
=
num_classes
,
input_specs
=
input_specs
,
dropout_rate
=
0.2
,
)
inputs
=
np
.
random
.
rand
(
2
,
temporal_size
,
spatial_size
,
spatial_size
,
3
)
logits
=
model
(
inputs
)
self
.
assertAllEqual
([
2
,
num_classes
],
logits
.
numpy
().
shape
)
def
test_serialize_deserialize
(
self
):
"""Validate the classification network can be serialized and deserialized."""
model_id
=
50
temporal_strides
=
[
1
,
1
,
1
,
1
]
temporal_kernel_sizes
=
[(
3
,
3
,
3
),
(
3
,
1
,
3
,
1
),
(
3
,
1
,
3
,
1
,
3
,
1
),
(
1
,
3
,
1
)]
backbone
=
backbones
.
ResNet3D
(
model_id
=
model_id
,
temporal_strides
=
temporal_strides
,
temporal_kernel_sizes
=
temporal_kernel_sizes
)
model
=
video_classification_model
.
VideoClassificationModel
(
backbone
=
backbone
,
num_classes
=
1000
)
config
=
model
.
get_config
()
new_model
=
video_classification_model
.
VideoClassificationModel
.
from_config
(
config
)
# Validate that the config can be forced to JSON.
_
=
new_model
.
to_json
()
# If the serialization was successful, the new config should match the old.
self
.
assertAllEqual
(
model
.
get_config
(),
new_model
.
get_config
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/beta/ops/anchor.py
0 → 100644
View file @
cc748b2a
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Anchor box and labeler definition."""
import
collections
# Import libraries
import
tensorflow
as
tf
from
official.vision.beta.ops.experimental
import
anchor_generator
from
official.vision.detection.utils.object_detection
import
argmax_matcher
from
official.vision.detection.utils.object_detection
import
balanced_positive_negative_sampler
from
official.vision.detection.utils.object_detection
import
box_list
from
official.vision.detection.utils.object_detection
import
faster_rcnn_box_coder
from
official.vision.detection.utils.object_detection
import
region_similarity_calculator
from
official.vision.detection.utils.object_detection
import
target_assigner
class
Anchor
(
object
):
"""Anchor class for anchor-based object detectors."""
def
__init__
(
self
,
min_level
,
max_level
,
num_scales
,
aspect_ratios
,
anchor_size
,
image_size
):
"""Constructs multiscale anchors.
Args:
min_level: integer number of minimum level of the output feature pyramid.
max_level: integer number of maximum level of the output feature pyramid.
num_scales: integer number representing intermediate scales added
on each level. For instances, num_scales=2 adds one additional
intermediate anchor scales [2^0, 2^0.5] on each level.
aspect_ratios: list of float numbers representing the aspect raito anchors
added on each level. The number indicates the ratio of width to height.
For instances, aspect_ratios=[1.0, 2.0, 0.5] adds three anchors on each
scale level.
anchor_size: float number representing the scale of size of the base
anchor to the feature stride 2^level.
image_size: a list of integer numbers or Tensors representing
[height, width] of the input image size.The image_size should be divided
by the largest feature stride 2^max_level.
"""
self
.
min_level
=
min_level
self
.
max_level
=
max_level
self
.
num_scales
=
num_scales
self
.
aspect_ratios
=
aspect_ratios
self
.
anchor_size
=
anchor_size
self
.
image_size
=
image_size
self
.
boxes
=
self
.
_generate_boxes
()
def
_generate_boxes
(
self
):
"""Generates multiscale anchor boxes.
Returns:
a Tensor of shape [N, 4], representing anchor boxes of all levels
concatenated together.
"""
boxes_all
=
[]
for
level
in
range
(
self
.
min_level
,
self
.
max_level
+
1
):
boxes_l
=
[]
for
scale
in
range
(
self
.
num_scales
):
for
aspect_ratio
in
self
.
aspect_ratios
:
stride
=
2
**
level
intermidate_scale
=
2
**
(
scale
/
float
(
self
.
num_scales
))
base_anchor_size
=
self
.
anchor_size
*
stride
*
intermidate_scale
aspect_x
=
aspect_ratio
**
0.5
aspect_y
=
aspect_ratio
**
-
0.5
half_anchor_size_x
=
base_anchor_size
*
aspect_x
/
2.0
half_anchor_size_y
=
base_anchor_size
*
aspect_y
/
2.0
x
=
tf
.
range
(
stride
/
2
,
self
.
image_size
[
1
],
stride
)
y
=
tf
.
range
(
stride
/
2
,
self
.
image_size
[
0
],
stride
)
xv
,
yv
=
tf
.
meshgrid
(
x
,
y
)
xv
=
tf
.
cast
(
tf
.
reshape
(
xv
,
[
-
1
]),
dtype
=
tf
.
float32
)
yv
=
tf
.
cast
(
tf
.
reshape
(
yv
,
[
-
1
]),
dtype
=
tf
.
float32
)
# Tensor shape Nx4.
boxes
=
tf
.
stack
([
yv
-
half_anchor_size_y
,
xv
-
half_anchor_size_x
,
yv
+
half_anchor_size_y
,
xv
+
half_anchor_size_x
],
axis
=
1
)
boxes_l
.
append
(
boxes
)
# Concat anchors on the same level to tensor shape NxAx4.
boxes_l
=
tf
.
stack
(
boxes_l
,
axis
=
1
)
boxes_l
=
tf
.
reshape
(
boxes_l
,
[
-
1
,
4
])
boxes_all
.
append
(
boxes_l
)
return
tf
.
concat
(
boxes_all
,
axis
=
0
)
def
unpack_labels
(
self
,
labels
):
"""Unpacks an array of labels into multiscales labels."""
unpacked_labels
=
collections
.
OrderedDict
()
count
=
0
for
level
in
range
(
self
.
min_level
,
self
.
max_level
+
1
):
feat_size_y
=
tf
.
cast
(
self
.
image_size
[
0
]
/
2
**
level
,
tf
.
int32
)
feat_size_x
=
tf
.
cast
(
self
.
image_size
[
1
]
/
2
**
level
,
tf
.
int32
)
steps
=
feat_size_y
*
feat_size_x
*
self
.
anchors_per_location
unpacked_labels
[
level
]
=
tf
.
reshape
(
labels
[
count
:
count
+
steps
],
[
feat_size_y
,
feat_size_x
,
-
1
])
count
+=
steps
return
unpacked_labels
@
property
def
anchors_per_location
(
self
):
return
self
.
num_scales
*
len
(
self
.
aspect_ratios
)
@
property
def
multilevel_boxes
(
self
):
return
self
.
unpack_labels
(
self
.
boxes
)
class
AnchorLabeler
(
object
):
"""Labeler for dense object detector."""
def
__init__
(
self
,
match_threshold
=
0.5
,
unmatched_threshold
=
0.5
):
"""Constructs anchor labeler to assign labels to anchors.
Args:
match_threshold: a float number between 0 and 1 representing the
lower-bound threshold to assign positive labels for anchors. An anchor
with a score over the threshold is labeled positive.
unmatched_threshold: a float number between 0 and 1 representing the
upper-bound threshold to assign negative labels for anchors. An anchor
with a score below the threshold is labeled negative.
"""
similarity_calc
=
region_similarity_calculator
.
IouSimilarity
()
matcher
=
argmax_matcher
.
ArgMaxMatcher
(
match_threshold
,
unmatched_threshold
=
unmatched_threshold
,
negatives_lower_than_unmatched
=
True
,
force_match_for_each_row
=
True
)
box_coder
=
faster_rcnn_box_coder
.
FasterRcnnBoxCoder
()
self
.
_target_assigner
=
target_assigner
.
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
)
self
.
_match_threshold
=
match_threshold
self
.
_unmatched_threshold
=
unmatched_threshold
def
label_anchors
(
self
,
anchor_boxes
,
gt_boxes
,
gt_labels
):
"""Labels anchors with ground truth inputs.
Args:
anchor_boxes: A float tensor with shape [N, 4] representing anchor boxes.
For each row, it stores [y0, x0, y1, x1] for four corners of a box.
gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes.
For each row, it stores [y0, x0, y1, x1] for four corners of a box.
gt_labels: A integer tensor with shape [N, 1] representing groundtruth
classes.
Returns:
cls_targets_dict: ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, num_anchors_per_location]. The height_l and
width_l represent the dimension of class logits at l-th level.
box_targets_dict: ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, num_anchors_per_location * 4]. The height_l
and width_l represent the dimension of bounding box regression output at
l-th level.
cls_weights: A flattened Tensor with shape [batch_size, num_anchors], that
serves as masking / sample weight for classification loss. Its value
is 1.0 for positive and negative matched anchors, and 0.0 for ignored
anchors.
box_weights: A flattened Tensor with shape [batch_size, num_anchors], that
serves as masking / sample weight for regression loss. Its value is
1.0 for positive matched anchors, and 0.0 for negative and ignored
anchors.
"""
gt_box_list
=
box_list
.
BoxList
(
gt_boxes
)
flattened_anchor_boxes
=
[]
for
anchors
in
anchor_boxes
.
values
():
flattened_anchor_boxes
.
append
(
tf
.
reshape
(
anchors
,
[
-
1
,
4
]))
flattened_anchor_boxes
=
tf
.
concat
(
flattened_anchor_boxes
,
axis
=
0
)
anchor_box_list
=
box_list
.
BoxList
(
flattened_anchor_boxes
)
# The cls_weights, box_weights are not used.
(
cls_targets
,
cls_weights
,
box_targets
,
box_weights
,
matches
)
=
self
.
_target_assigner
.
assign
(
anchor_box_list
,
gt_box_list
,
gt_labels
)
# Labels definition in matches.match_results:
# (1) match_results[i]>=0, meaning that column i is matched with row
# match_results[i].
# (2) match_results[i]=-1, meaning that column i is not matched.
# (3) match_results[i]=-2, meaning that column i is ignored.
match_results
=
tf
.
expand_dims
(
matches
.
match_results
,
axis
=
1
)
cls_targets
=
tf
.
cast
(
cls_targets
,
tf
.
int32
)
cls_targets
=
tf
.
where
(
tf
.
equal
(
match_results
,
-
1
),
-
tf
.
ones_like
(
cls_targets
),
cls_targets
)
cls_targets
=
tf
.
where
(
tf
.
equal
(
match_results
,
-
2
),
-
2
*
tf
.
ones_like
(
cls_targets
),
cls_targets
)
# Unpacks labels into multi-level representations.
cls_targets_dict
=
unpack_targets
(
cls_targets
,
anchor_boxes
)
box_targets_dict
=
unpack_targets
(
box_targets
,
anchor_boxes
)
return
cls_targets_dict
,
box_targets_dict
,
cls_weights
,
box_weights
class
RpnAnchorLabeler
(
AnchorLabeler
):
"""Labeler for Region Proposal Network."""
def
__init__
(
self
,
match_threshold
=
0.7
,
unmatched_threshold
=
0.3
,
rpn_batch_size_per_im
=
256
,
rpn_fg_fraction
=
0.5
):
AnchorLabeler
.
__init__
(
self
,
match_threshold
=
0.7
,
unmatched_threshold
=
0.3
)
self
.
_rpn_batch_size_per_im
=
rpn_batch_size_per_im
self
.
_rpn_fg_fraction
=
rpn_fg_fraction
def
_get_rpn_samples
(
self
,
match_results
):
"""Computes anchor labels.
This function performs subsampling for foreground (fg) and background (bg)
anchors.
Args:
match_results: A integer tensor with shape [N] representing the
matching results of anchors. (1) match_results[i]>=0,
meaning that column i is matched with row match_results[i].
(2) match_results[i]=-1, meaning that column i is not matched.
(3) match_results[i]=-2, meaning that column i is ignored.
Returns:
score_targets: a integer tensor with the a shape of [N].
(1) score_targets[i]=1, the anchor is a positive sample.
(2) score_targets[i]=0, negative. (3) score_targets[i]=-1, the anchor is
don't care (ignore).
"""
sampler
=
(
balanced_positive_negative_sampler
.
BalancedPositiveNegativeSampler
(
positive_fraction
=
self
.
_rpn_fg_fraction
,
is_static
=
False
))
# indicator includes both positive and negative labels.
# labels includes only positives labels.
# positives = indicator & labels.
# negatives = indicator & !labels.
# ignore = !indicator.
indicator
=
tf
.
greater
(
match_results
,
-
2
)
labels
=
tf
.
greater
(
match_results
,
-
1
)
samples
=
sampler
.
subsample
(
indicator
,
self
.
_rpn_batch_size_per_im
,
labels
)
positive_labels
=
tf
.
where
(
tf
.
logical_and
(
samples
,
labels
),
tf
.
constant
(
2
,
dtype
=
tf
.
int32
,
shape
=
match_results
.
shape
),
tf
.
constant
(
0
,
dtype
=
tf
.
int32
,
shape
=
match_results
.
shape
))
negative_labels
=
tf
.
where
(
tf
.
logical_and
(
samples
,
tf
.
logical_not
(
labels
)),
tf
.
constant
(
1
,
dtype
=
tf
.
int32
,
shape
=
match_results
.
shape
),
tf
.
constant
(
0
,
dtype
=
tf
.
int32
,
shape
=
match_results
.
shape
))
ignore_labels
=
tf
.
fill
(
match_results
.
shape
,
-
1
)
return
(
ignore_labels
+
positive_labels
+
negative_labels
,
positive_labels
,
negative_labels
)
def
label_anchors
(
self
,
anchor_boxes
,
gt_boxes
,
gt_labels
):
"""Labels anchors with ground truth inputs.
Args:
anchor_boxes: A float tensor with shape [N, 4] representing anchor boxes.
For each row, it stores [y0, x0, y1, x1] for four corners of a box.
gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes.
For each row, it stores [y0, x0, y1, x1] for four corners of a box.
gt_labels: A integer tensor with shape [N, 1] representing groundtruth
classes.
Returns:
score_targets_dict: ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, num_anchors]. The height_l and width_l
represent the dimension of class logits at l-th level.
box_targets_dict: ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, num_anchors * 4]. The height_l and
width_l represent the dimension of bounding box regression output at
l-th level.
"""
gt_box_list
=
box_list
.
BoxList
(
gt_boxes
)
flattened_anchor_boxes
=
[]
for
anchors
in
anchor_boxes
.
values
():
flattened_anchor_boxes
.
append
(
tf
.
reshape
(
anchors
,
[
-
1
,
4
]))
flattened_anchor_boxes
=
tf
.
concat
(
flattened_anchor_boxes
,
axis
=
0
)
anchor_box_list
=
box_list
.
BoxList
(
flattened_anchor_boxes
)
# cls_targets, cls_weights, box_weights are not used.
_
,
_
,
box_targets
,
_
,
matches
=
self
.
_target_assigner
.
assign
(
anchor_box_list
,
gt_box_list
,
gt_labels
)
# score_targets contains the subsampled positive and negative anchors.
score_targets
,
_
,
_
=
self
.
_get_rpn_samples
(
matches
.
match_results
)
# Unpacks labels.
score_targets_dict
=
unpack_targets
(
score_targets
,
anchor_boxes
)
box_targets_dict
=
unpack_targets
(
box_targets
,
anchor_boxes
)
return
score_targets_dict
,
box_targets_dict
def
build_anchor_generator
(
min_level
,
max_level
,
num_scales
,
aspect_ratios
,
anchor_size
):
"""Build anchor generator from levels."""
anchor_sizes
=
collections
.
OrderedDict
()
strides
=
collections
.
OrderedDict
()
scales
=
[]
for
scale
in
range
(
num_scales
):
scales
.
append
(
2
**
(
scale
/
float
(
num_scales
)))
for
level
in
range
(
min_level
,
max_level
+
1
):
stride
=
2
**
level
strides
[
level
]
=
stride
anchor_sizes
[
level
]
=
anchor_size
*
stride
anchor_gen
=
anchor_generator
.
AnchorGenerator
(
anchor_sizes
=
anchor_sizes
,
scales
=
scales
,
aspect_ratios
=
aspect_ratios
,
strides
=
strides
)
return
anchor_gen
def
unpack_targets
(
targets
,
anchor_boxes_dict
):
"""Unpacks an array of labels into multiscales labels."""
unpacked_targets
=
collections
.
OrderedDict
()
count
=
0
for
level
,
anchor_boxes
in
anchor_boxes_dict
.
items
():
feat_size_shape
=
anchor_boxes
.
shape
.
as_list
()
feat_size_y
=
feat_size_shape
[
0
]
feat_size_x
=
feat_size_shape
[
1
]
anchors_per_location
=
int
(
feat_size_shape
[
2
]
/
4
)
steps
=
feat_size_y
*
feat_size_x
*
anchors_per_location
unpacked_targets
[
level
]
=
tf
.
reshape
(
targets
[
count
:
count
+
steps
],
[
feat_size_y
,
feat_size_x
,
-
1
])
count
+=
steps
return
unpacked_targets
official/vision/beta/ops/anchor_test.py
0 → 100644
View file @
cc748b2a
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for anchor.py."""
# Import libraries
from
absl.testing
import
parameterized
import
numpy
as
np
import
tensorflow
as
tf
from
official.vision.beta.ops
import
anchor
class
AnchorTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
# The set of parameters are tailored for the MLPerf configuration, where
# the number of anchors is 495132, rpn_batch_size_per_im=256, and
# rpn_fg_fraction=0.5.
@
parameterized
.
parameters
(
(
512
,
25
,
25
,
25
,
25
,
(
512
,
512
)),
(
512
,
25
,
25
,
25
,
25
,
(
512
,
640
)),
(
512
,
25
,
25
,
25
,
25
,
(
640
,
512
)),
(
495132
,
100
,
100
,
100
,
100
,
(
512
,
512
)),
(
495132
,
200
,
100
,
128
,
100
,
(
512
,
512
)),
(
495132
,
100
,
120
,
100
,
120
,
(
512
,
512
)),
(
495132
,
100
,
200
,
100
,
156
,
(
512
,
512
)),
(
495132
,
200
,
200
,
128
,
128
,
(
512
,
512
)),
)
def
testAnchorRpnSample
(
self
,
num_anchors
,
num_positives
,
num_negatives
,
expected_positives
,
expected_negatives
,
image_size
):
match_results_np
=
np
.
empty
([
num_anchors
])
match_results_np
.
fill
(
-
2
)
match_results_np
[:
num_positives
]
=
0
match_results_np
[
num_positives
:
num_positives
+
num_negatives
]
=
-
1
match_results
=
tf
.
convert_to_tensor
(
value
=
match_results_np
,
dtype
=
tf
.
int32
)
anchor_labeler
=
anchor
.
RpnAnchorLabeler
(
match_threshold
=
0.7
,
unmatched_threshold
=
0.3
,
rpn_batch_size_per_im
=
256
,
rpn_fg_fraction
=
0.5
)
rpn_sample_op
=
anchor_labeler
.
_get_rpn_samples
(
match_results
)
labels
=
[
v
.
numpy
()
for
v
in
rpn_sample_op
]
self
.
assertLen
(
labels
[
0
],
num_anchors
)
positives
=
np
.
sum
(
np
.
array
(
labels
[
0
])
==
1
)
negatives
=
np
.
sum
(
np
.
array
(
labels
[
0
])
==
0
)
self
.
assertEqual
(
positives
,
expected_positives
)
self
.
assertEqual
(
negatives
,
expected_negatives
)
@
parameterized
.
parameters
(
# Single scale anchor.
(
5
,
5
,
1
,
[
1.0
],
2.0
,
[[
-
16
,
-
16
,
48
,
48
],
[
-
16
,
16
,
48
,
80
],
[
16
,
-
16
,
80
,
48
],
[
16
,
16
,
80
,
80
]]),
# Multi scale anchor.
(
5
,
6
,
1
,
[
1.0
],
2.0
,
[[
-
16
,
-
16
,
48
,
48
],
[
-
16
,
16
,
48
,
80
],
[
16
,
-
16
,
80
,
48
],
[
16
,
16
,
80
,
80
],
[
-
32
,
-
32
,
96
,
96
]]),
# # Multi aspect ratio anchor.
(
6
,
6
,
1
,
[
1.0
,
4.0
,
0.25
],
2.0
,
[[
-
32
,
-
32
,
96
,
96
],
[
-
0
,
-
96
,
64
,
160
],
[
-
96
,
-
0
,
160
,
64
]]),
)
def
testAnchorGeneration
(
self
,
min_level
,
max_level
,
num_scales
,
aspect_ratios
,
anchor_size
,
expected_boxes
):
image_size
=
[
64
,
64
]
anchors
=
anchor
.
Anchor
(
min_level
,
max_level
,
num_scales
,
aspect_ratios
,
anchor_size
,
image_size
)
boxes
=
anchors
.
boxes
.
numpy
()
self
.
assertEqual
(
expected_boxes
,
boxes
.
tolist
())
@
parameterized
.
parameters
(
# Single scale anchor.
(
5
,
5
,
1
,
[
1.0
],
2.0
,
[[
-
16
,
-
16
,
48
,
48
],
[
-
16
,
16
,
48
,
80
],
[
16
,
-
16
,
80
,
48
],
[
16
,
16
,
80
,
80
]]),
# Multi scale anchor.
(
5
,
6
,
1
,
[
1.0
],
2.0
,
[[
-
16
,
-
16
,
48
,
48
],
[
-
16
,
16
,
48
,
80
],
[
16
,
-
16
,
80
,
48
],
[
16
,
16
,
80
,
80
],
[
-
32
,
-
32
,
96
,
96
]]),
# # Multi aspect ratio anchor.
(
6
,
6
,
1
,
[
1.0
,
4.0
,
0.25
],
2.0
,
[[
-
32
,
-
32
,
96
,
96
],
[
-
0
,
-
96
,
64
,
160
],
[
-
96
,
-
0
,
160
,
64
]]),
)
def
testAnchorGenerationWithImageSizeAsTensor
(
self
,
min_level
,
max_level
,
num_scales
,
aspect_ratios
,
anchor_size
,
expected_boxes
):
image_size
=
tf
.
constant
([
64
,
64
],
tf
.
int32
)
anchors
=
anchor
.
Anchor
(
min_level
,
max_level
,
num_scales
,
aspect_ratios
,
anchor_size
,
image_size
)
boxes
=
anchors
.
boxes
.
numpy
()
self
.
assertEqual
(
expected_boxes
,
boxes
.
tolist
())
@
parameterized
.
parameters
(
(
3
,
6
,
2
,
[
1.0
],
2.0
),
)
def
testLabelAnchors
(
self
,
min_level
,
max_level
,
num_scales
,
aspect_ratios
,
anchor_size
):
input_size
=
[
512
,
512
]
ground_truth_class_id
=
2
# The matched anchors are the anchors used as ground truth and the anchors
# at the next octave scale on the same location.
expected_anchor_locations
=
[[
0
,
0
,
0
],
[
0
,
0
,
1
]]
anchor_gen
=
anchor
.
build_anchor_generator
(
min_level
,
max_level
,
num_scales
,
aspect_ratios
,
anchor_size
)
anchor_boxes
=
anchor_gen
(
input_size
)
anchor_labeler
=
anchor
.
AnchorLabeler
()
# Uses the first anchors as ground truth. The ground truth should map to
# two anchors with two intermediate scales at the same location.
gt_boxes
=
anchor_boxes
[
3
][
0
:
1
,
0
,
0
:
4
]
gt_classes
=
tf
.
constant
([[
ground_truth_class_id
]],
dtype
=
tf
.
float32
)
(
cls_targets
,
box_targets
,
_
,
box_weights
)
=
anchor_labeler
.
label_anchors
(
anchor_boxes
,
gt_boxes
,
gt_classes
)
for
k
,
v
in
cls_targets
.
items
():
cls_targets
[
k
]
=
v
.
numpy
()
for
k
,
v
in
box_targets
.
items
():
box_targets
[
k
]
=
v
.
numpy
()
box_weights
=
box_weights
.
numpy
()
anchor_locations
=
np
.
vstack
(
np
.
where
(
cls_targets
[
min_level
]
>
-
1
)).
transpose
()
self
.
assertAllClose
(
expected_anchor_locations
,
anchor_locations
)
# Two anchor boxes on min_level got matched to the gt_boxes.
self
.
assertAllClose
(
tf
.
reduce_sum
(
box_weights
),
2
)
@
parameterized
.
parameters
(
(
3
,
7
,
[.
5
,
1.
,
2.
],
2
,
8
,
(
256
,
256
)),
(
3
,
8
,
[
1.
],
3
,
32
,
(
512
,
512
)),
(
3
,
3
,
[
1.
],
2
,
4
,
(
32
,
32
)),
)
def
testEquivalentResult
(
self
,
min_level
,
max_level
,
aspect_ratios
,
num_scales
,
anchor_size
,
image_size
):
anchor_gen
=
anchor
.
build_anchor_generator
(
min_level
=
min_level
,
max_level
=
max_level
,
num_scales
=
num_scales
,
aspect_ratios
=
aspect_ratios
,
anchor_size
=
anchor_size
)
anchors
=
anchor_gen
(
image_size
)
expected_anchor_gen
=
anchor
.
Anchor
(
min_level
,
max_level
,
num_scales
,
aspect_ratios
,
anchor_size
,
image_size
)
expected_anchors
=
expected_anchor_gen
.
multilevel_boxes
for
k
in
expected_anchors
.
keys
():
self
.
assertAllClose
(
expected_anchors
[
k
],
anchors
[
k
])
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/beta/ops/box_ops.py
0 → 100644
View file @
cc748b2a
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Box related ops."""
# Import libraries
import
numpy
as
np
import
tensorflow
as
tf
EPSILON
=
1e-8
BBOX_XFORM_CLIP
=
np
.
log
(
1000.
/
16.
)
def
yxyx_to_xywh
(
boxes
):
"""Converts boxes from ymin, xmin, ymax, xmax to xmin, ymin, width, height.
Args:
boxes: a numpy array whose last dimension is 4 representing the coordinates
of boxes in ymin, xmin, ymax, xmax order.
Returns:
boxes: a numpy array whose shape is the same as `boxes` in new format.
Raises:
ValueError: If the last dimension of boxes is not 4.
"""
if
boxes
.
shape
[
-
1
]
!=
4
:
raise
ValueError
(
'boxes.shape[-1] is {:d}, but must be 4.'
.
format
(
boxes
.
shape
[
-
1
]))
boxes_ymin
=
boxes
[...,
0
]
boxes_xmin
=
boxes
[...,
1
]
boxes_width
=
boxes
[...,
3
]
-
boxes
[...,
1
]
boxes_height
=
boxes
[...,
2
]
-
boxes
[...,
0
]
new_boxes
=
np
.
stack
(
[
boxes_xmin
,
boxes_ymin
,
boxes_width
,
boxes_height
],
axis
=-
1
)
return
new_boxes
def
jitter_boxes
(
boxes
,
noise_scale
=
0.025
):
"""Jitter the box coordinates by some noise distribution.
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates of
boxes in ymin, xmin, ymax, xmax order.
noise_scale: a python float which specifies the magnitude of noise. The rule
of thumb is to set this between (0, 0.1]. The default value is found to
mimic the noisy detections best empirically.
Returns:
jittered_boxes: a tensor whose shape is the same as `boxes` representing
the jittered boxes.
Raises:
ValueError: If the last dimension of boxes is not 4.
"""
if
boxes
.
shape
[
-
1
]
!=
4
:
raise
ValueError
(
'boxes.shape[-1] is {:d}, but must be 4.'
.
format
(
boxes
.
shape
[
-
1
]))
with
tf
.
name_scope
(
'jitter_boxes'
):
bbox_jitters
=
tf
.
random
.
normal
(
tf
.
shape
(
boxes
),
stddev
=
noise_scale
)
ymin
=
boxes
[...,
0
:
1
]
xmin
=
boxes
[...,
1
:
2
]
ymax
=
boxes
[...,
2
:
3
]
xmax
=
boxes
[...,
3
:
4
]
width
=
xmax
-
xmin
height
=
ymax
-
ymin
new_center_x
=
(
xmin
+
xmax
)
/
2.0
+
bbox_jitters
[...,
0
:
1
]
*
width
new_center_y
=
(
ymin
+
ymax
)
/
2.0
+
bbox_jitters
[...,
1
:
2
]
*
height
new_width
=
width
*
tf
.
math
.
exp
(
bbox_jitters
[...,
2
:
3
])
new_height
=
height
*
tf
.
math
.
exp
(
bbox_jitters
[...,
3
:
4
])
jittered_boxes
=
tf
.
concat
(
[
new_center_y
-
new_height
*
0.5
,
new_center_x
-
new_width
*
0.5
,
new_center_y
+
new_height
*
0.5
,
new_center_x
+
new_width
*
0.5
],
axis
=-
1
)
return
jittered_boxes
def
normalize_boxes
(
boxes
,
image_shape
):
"""Converts boxes to the normalized coordinates.
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates
of boxes in ymin, xmin, ymax, xmax order.
image_shape: a list of two integers, a two-element vector or a tensor such
that all but the last dimensions are `broadcastable` to `boxes`. The last
dimension is 2, which represents [height, width].
Returns:
normalized_boxes: a tensor whose shape is the same as `boxes` representing
the normalized boxes.
Raises:
ValueError: If the last dimension of boxes is not 4.
"""
if
boxes
.
shape
[
-
1
]
!=
4
:
raise
ValueError
(
'boxes.shape[-1] is {:d}, but must be 4.'
.
format
(
boxes
.
shape
[
-
1
]))
with
tf
.
name_scope
(
'normalize_boxes'
):
if
isinstance
(
image_shape
,
list
)
or
isinstance
(
image_shape
,
tuple
):
height
,
width
=
image_shape
else
:
image_shape
=
tf
.
cast
(
image_shape
,
dtype
=
boxes
.
dtype
)
height
=
image_shape
[...,
0
:
1
]
width
=
image_shape
[...,
1
:
2
]
ymin
=
boxes
[...,
0
:
1
]
/
height
xmin
=
boxes
[...,
1
:
2
]
/
width
ymax
=
boxes
[...,
2
:
3
]
/
height
xmax
=
boxes
[...,
3
:
4
]
/
width
normalized_boxes
=
tf
.
concat
([
ymin
,
xmin
,
ymax
,
xmax
],
axis
=-
1
)
return
normalized_boxes
def
denormalize_boxes
(
boxes
,
image_shape
):
"""Converts boxes normalized by [height, width] to pixel coordinates.
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates
of boxes in ymin, xmin, ymax, xmax order.
image_shape: a list of two integers, a two-element vector or a tensor such
that all but the last dimensions are `broadcastable` to `boxes`. The last
dimension is 2, which represents [height, width].
Returns:
denormalized_boxes: a tensor whose shape is the same as `boxes` representing
the denormalized boxes.
Raises:
ValueError: If the last dimension of boxes is not 4.
"""
with
tf
.
name_scope
(
'denormalize_boxes'
):
if
isinstance
(
image_shape
,
list
)
or
isinstance
(
image_shape
,
tuple
):
height
,
width
=
image_shape
else
:
image_shape
=
tf
.
cast
(
image_shape
,
dtype
=
boxes
.
dtype
)
height
,
width
=
tf
.
split
(
image_shape
,
2
,
axis
=-
1
)
ymin
,
xmin
,
ymax
,
xmax
=
tf
.
split
(
boxes
,
4
,
axis
=-
1
)
ymin
=
ymin
*
height
xmin
=
xmin
*
width
ymax
=
ymax
*
height
xmax
=
xmax
*
width
denormalized_boxes
=
tf
.
concat
([
ymin
,
xmin
,
ymax
,
xmax
],
axis
=-
1
)
return
denormalized_boxes
def
clip_boxes
(
boxes
,
image_shape
):
"""Clips boxes to image boundaries.
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates
of boxes in ymin, xmin, ymax, xmax order.
image_shape: a list of two integers, a two-element vector or a tensor such
that all but the last dimensions are `broadcastable` to `boxes`. The last
dimension is 2, which represents [height, width].
Returns:
clipped_boxes: a tensor whose shape is the same as `boxes` representing the
clipped boxes.
Raises:
ValueError: If the last dimension of boxes is not 4.
"""
if
boxes
.
shape
[
-
1
]
!=
4
:
raise
ValueError
(
'boxes.shape[-1] is {:d}, but must be 4.'
.
format
(
boxes
.
shape
[
-
1
]))
with
tf
.
name_scope
(
'clip_boxes'
):
if
isinstance
(
image_shape
,
list
)
or
isinstance
(
image_shape
,
tuple
):
height
,
width
=
image_shape
max_length
=
[
height
,
width
,
height
,
width
]
else
:
image_shape
=
tf
.
cast
(
image_shape
,
dtype
=
boxes
.
dtype
)
height
,
width
=
tf
.
unstack
(
image_shape
,
axis
=-
1
)
max_length
=
tf
.
stack
([
height
,
width
,
height
,
width
],
axis
=-
1
)
clipped_boxes
=
tf
.
math
.
maximum
(
tf
.
math
.
minimum
(
boxes
,
max_length
),
0.0
)
return
clipped_boxes
def
compute_outer_boxes
(
boxes
,
image_shape
,
scale
=
1.0
):
"""Compute outer box encloses an object with a margin.
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates of
boxes in ymin, xmin, ymax, xmax order.
image_shape: a list of two integers, a two-element vector or a tensor such
that all but the last dimensions are `broadcastable` to `boxes`. The last
dimension is 2, which represents [height, width].
scale: a float number specifying the scale of output outer boxes to input
`boxes`.
Returns:
outer_boxes: a tensor whose shape is the same as `boxes` representing the
outer boxes.
"""
if
scale
<
1.0
:
raise
ValueError
(
'scale is {}, but outer box scale must be greater than 1.0.'
.
format
(
scale
))
centers_y
=
(
boxes
[...,
0
]
+
boxes
[...,
2
])
/
2.0
centers_x
=
(
boxes
[...,
1
]
+
boxes
[...,
3
])
/
2.0
box_height
=
(
boxes
[...,
2
]
-
boxes
[...,
0
])
*
scale
box_width
=
(
boxes
[...,
3
]
-
boxes
[...,
1
])
*
scale
outer_boxes
=
tf
.
stack
(
[
centers_y
-
box_height
/
2.0
,
centers_x
-
box_width
/
2.0
,
centers_y
+
box_height
/
2.0
,
centers_x
+
box_width
/
2.0
],
axis
=
1
)
outer_boxes
=
clip_boxes
(
outer_boxes
,
image_shape
)
return
outer_boxes
def
encode_boxes
(
boxes
,
anchors
,
weights
=
None
):
"""Encode boxes to targets.
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates
of boxes in ymin, xmin, ymax, xmax order.
anchors: a tensor whose shape is the same as, or `broadcastable` to `boxes`,
representing the coordinates of anchors in ymin, xmin, ymax, xmax order.
weights: None or a list of four float numbers used to scale coordinates.
Returns:
encoded_boxes: a tensor whose shape is the same as `boxes` representing the
encoded box targets.
Raises:
ValueError: If the last dimension of boxes is not 4.
"""
if
boxes
.
shape
[
-
1
]
!=
4
:
raise
ValueError
(
'boxes.shape[-1] is {:d}, but must be 4.'
.
format
(
boxes
.
shape
[
-
1
]))
with
tf
.
name_scope
(
'encode_boxes'
):
boxes
=
tf
.
cast
(
boxes
,
dtype
=
anchors
.
dtype
)
ymin
=
boxes
[...,
0
:
1
]
xmin
=
boxes
[...,
1
:
2
]
ymax
=
boxes
[...,
2
:
3
]
xmax
=
boxes
[...,
3
:
4
]
box_h
=
ymax
-
ymin
box_w
=
xmax
-
xmin
box_yc
=
ymin
+
0.5
*
box_h
box_xc
=
xmin
+
0.5
*
box_w
anchor_ymin
=
anchors
[...,
0
:
1
]
anchor_xmin
=
anchors
[...,
1
:
2
]
anchor_ymax
=
anchors
[...,
2
:
3
]
anchor_xmax
=
anchors
[...,
3
:
4
]
anchor_h
=
anchor_ymax
-
anchor_ymin
anchor_w
=
anchor_xmax
-
anchor_xmin
anchor_yc
=
anchor_ymin
+
0.5
*
anchor_h
anchor_xc
=
anchor_xmin
+
0.5
*
anchor_w
encoded_dy
=
(
box_yc
-
anchor_yc
)
/
anchor_h
encoded_dx
=
(
box_xc
-
anchor_xc
)
/
anchor_w
encoded_dh
=
tf
.
math
.
log
(
box_h
/
anchor_h
)
encoded_dw
=
tf
.
math
.
log
(
box_w
/
anchor_w
)
if
weights
:
encoded_dy
*=
weights
[
0
]
encoded_dx
*=
weights
[
1
]
encoded_dh
*=
weights
[
2
]
encoded_dw
*=
weights
[
3
]
encoded_boxes
=
tf
.
concat
(
[
encoded_dy
,
encoded_dx
,
encoded_dh
,
encoded_dw
],
axis
=-
1
)
return
encoded_boxes
def
decode_boxes
(
encoded_boxes
,
anchors
,
weights
=
None
):
"""Decode boxes.
Args:
encoded_boxes: a tensor whose last dimension is 4 representing the
coordinates of encoded boxes in ymin, xmin, ymax, xmax order.
anchors: a tensor whose shape is the same as, or `broadcastable` to `boxes`,
representing the coordinates of anchors in ymin, xmin, ymax, xmax order.
weights: None or a list of four float numbers used to scale coordinates.
Returns:
encoded_boxes: a tensor whose shape is the same as `boxes` representing the
decoded box targets.
"""
if
encoded_boxes
.
shape
[
-
1
]
!=
4
:
raise
ValueError
(
'encoded_boxes.shape[-1] is {:d}, but must be 4.'
.
format
(
encoded_boxes
.
shape
[
-
1
]))
with
tf
.
name_scope
(
'decode_boxes'
):
encoded_boxes
=
tf
.
cast
(
encoded_boxes
,
dtype
=
anchors
.
dtype
)
dy
=
encoded_boxes
[...,
0
:
1
]
dx
=
encoded_boxes
[...,
1
:
2
]
dh
=
encoded_boxes
[...,
2
:
3
]
dw
=
encoded_boxes
[...,
3
:
4
]
if
weights
:
dy
/=
weights
[
0
]
dx
/=
weights
[
1
]
dh
/=
weights
[
2
]
dw
/=
weights
[
3
]
dh
=
tf
.
math
.
minimum
(
dh
,
BBOX_XFORM_CLIP
)
dw
=
tf
.
math
.
minimum
(
dw
,
BBOX_XFORM_CLIP
)
anchor_ymin
=
anchors
[...,
0
:
1
]
anchor_xmin
=
anchors
[...,
1
:
2
]
anchor_ymax
=
anchors
[...,
2
:
3
]
anchor_xmax
=
anchors
[...,
3
:
4
]
anchor_h
=
anchor_ymax
-
anchor_ymin
anchor_w
=
anchor_xmax
-
anchor_xmin
anchor_yc
=
anchor_ymin
+
0.5
*
anchor_h
anchor_xc
=
anchor_xmin
+
0.5
*
anchor_w
decoded_boxes_yc
=
dy
*
anchor_h
+
anchor_yc
decoded_boxes_xc
=
dx
*
anchor_w
+
anchor_xc
decoded_boxes_h
=
tf
.
math
.
exp
(
dh
)
*
anchor_h
decoded_boxes_w
=
tf
.
math
.
exp
(
dw
)
*
anchor_w
decoded_boxes_ymin
=
decoded_boxes_yc
-
0.5
*
decoded_boxes_h
decoded_boxes_xmin
=
decoded_boxes_xc
-
0.5
*
decoded_boxes_w
decoded_boxes_ymax
=
decoded_boxes_ymin
+
decoded_boxes_h
decoded_boxes_xmax
=
decoded_boxes_xmin
+
decoded_boxes_w
decoded_boxes
=
tf
.
concat
(
[
decoded_boxes_ymin
,
decoded_boxes_xmin
,
decoded_boxes_ymax
,
decoded_boxes_xmax
],
axis
=-
1
)
return
decoded_boxes
def
filter_boxes
(
boxes
,
scores
,
image_shape
,
min_size_threshold
):
"""Filter and remove boxes that are too small or fall outside the image.
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates of
boxes in ymin, xmin, ymax, xmax order.
scores: a tensor whose shape is the same as tf.shape(boxes)[:-1]
representing the original scores of the boxes.
image_shape: a tensor whose shape is the same as, or `broadcastable` to
`boxes` except the last dimension, which is 2, representing [height,
width] of the scaled image.
min_size_threshold: a float representing the minimal box size in each side
(w.r.t. the scaled image). Boxes whose sides are smaller than it will be
filtered out.
Returns:
filtered_boxes: a tensor whose shape is the same as `boxes` but with
the position of the filtered boxes are filled with 0.
filtered_scores: a tensor whose shape is the same as 'scores' but with
the positinon of the filtered boxes filled with 0.
"""
if
boxes
.
shape
[
-
1
]
!=
4
:
raise
ValueError
(
'boxes.shape[1] is {:d}, but must be 4.'
.
format
(
boxes
.
shape
[
-
1
]))
with
tf
.
name_scope
(
'filter_boxes'
):
if
isinstance
(
image_shape
,
list
)
or
isinstance
(
image_shape
,
tuple
):
height
,
width
=
image_shape
else
:
image_shape
=
tf
.
cast
(
image_shape
,
dtype
=
boxes
.
dtype
)
height
=
image_shape
[...,
0
]
width
=
image_shape
[...,
1
]
ymin
=
boxes
[...,
0
]
xmin
=
boxes
[...,
1
]
ymax
=
boxes
[...,
2
]
xmax
=
boxes
[...,
3
]
h
=
ymax
-
ymin
w
=
xmax
-
xmin
yc
=
ymin
+
0.5
*
h
xc
=
xmin
+
0.5
*
w
min_size
=
tf
.
cast
(
tf
.
math
.
maximum
(
min_size_threshold
,
0.0
),
dtype
=
boxes
.
dtype
)
filtered_size_mask
=
tf
.
math
.
logical_and
(
tf
.
math
.
greater
(
h
,
min_size
),
tf
.
math
.
greater
(
w
,
min_size
))
filtered_center_mask
=
tf
.
logical_and
(
tf
.
math
.
logical_and
(
tf
.
math
.
greater
(
yc
,
0.0
),
tf
.
math
.
less
(
yc
,
height
)),
tf
.
math
.
logical_and
(
tf
.
math
.
greater
(
xc
,
0.0
),
tf
.
math
.
less
(
xc
,
width
)))
filtered_mask
=
tf
.
math
.
logical_and
(
filtered_size_mask
,
filtered_center_mask
)
filtered_scores
=
tf
.
where
(
filtered_mask
,
scores
,
tf
.
zeros_like
(
scores
))
filtered_boxes
=
tf
.
cast
(
tf
.
expand_dims
(
filtered_mask
,
axis
=-
1
),
dtype
=
boxes
.
dtype
)
*
boxes
return
filtered_boxes
,
filtered_scores
def
filter_boxes_by_scores
(
boxes
,
scores
,
min_score_threshold
):
"""Filter and remove boxes whose scores are smaller than the threshold.
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates of
boxes in ymin, xmin, ymax, xmax order.
scores: a tensor whose shape is the same as tf.shape(boxes)[:-1]
representing the original scores of the boxes.
min_score_threshold: a float representing the minimal box score threshold.
Boxes whose score are smaller than it will be filtered out.
Returns:
filtered_boxes: a tensor whose shape is the same as `boxes` but with
the position of the filtered boxes are filled with -1.
filtered_scores: a tensor whose shape is the same as 'scores' but with
the
"""
if
boxes
.
shape
[
-
1
]
!=
4
:
raise
ValueError
(
'boxes.shape[1] is {:d}, but must be 4.'
.
format
(
boxes
.
shape
[
-
1
]))
with
tf
.
name_scope
(
'filter_boxes_by_scores'
):
filtered_mask
=
tf
.
math
.
greater
(
scores
,
min_score_threshold
)
filtered_scores
=
tf
.
where
(
filtered_mask
,
scores
,
-
tf
.
ones_like
(
scores
))
filtered_boxes
=
tf
.
cast
(
tf
.
expand_dims
(
filtered_mask
,
axis
=-
1
),
dtype
=
boxes
.
dtype
)
*
boxes
return
filtered_boxes
,
filtered_scores
def
gather_instances
(
selected_indices
,
instances
,
*
aux_instances
):
"""Gather instances by indices.
Args:
selected_indices: a Tensor of shape [batch, K] which indicates the selected
indices in instance dimension (2nd dimension).
instances: a Tensor of shape [batch, N, ...] where the 2nd dimension is
the instance dimension to be selected from.
*aux_instances: the additional Tensors whose shapes are in [batch, N, ...]
which are the tensors to be selected from using the `selected_indices`.
Returns:
selected_instances: the tensor of shape [batch, K, ...] which corresponds to
the selected instances of the `instances` tensor.
selected_aux_instances: the additional tensors of shape [batch, K, ...]
which corresponds to the selected instances of the `aus_instances`
tensors.
"""
batch_size
=
instances
.
shape
[
0
]
if
batch_size
==
1
:
selected_instances
=
tf
.
squeeze
(
tf
.
gather
(
instances
,
selected_indices
,
axis
=
1
),
axis
=
1
)
if
aux_instances
:
selected_aux_instances
=
[
tf
.
squeeze
(
tf
.
gather
(
a
,
selected_indices
,
axis
=
1
),
axis
=
1
)
for
a
in
aux_instances
]
return
tuple
([
selected_instances
]
+
selected_aux_instances
)
else
:
return
selected_instances
else
:
indices_shape
=
tf
.
shape
(
selected_indices
)
batch_indices
=
(
tf
.
expand_dims
(
tf
.
range
(
indices_shape
[
0
]),
axis
=-
1
)
*
tf
.
ones
([
1
,
indices_shape
[
-
1
]],
dtype
=
tf
.
int32
))
gather_nd_indices
=
tf
.
stack
(
[
batch_indices
,
selected_indices
],
axis
=-
1
)
selected_instances
=
tf
.
gather_nd
(
instances
,
gather_nd_indices
)
if
aux_instances
:
selected_aux_instances
=
[
tf
.
gather_nd
(
a
,
gather_nd_indices
)
for
a
in
aux_instances
]
return
tuple
([
selected_instances
]
+
selected_aux_instances
)
else
:
return
selected_instances
def
top_k_boxes
(
boxes
,
scores
,
k
):
"""Sort and select top k boxes according to the scores.
Args:
boxes: a tensor of shape [batch_size, N, 4] representing the coordinate of
the boxes. N is the number of boxes per image.
scores: a tensor of shsape [batch_size, N] representing the socre of the
boxes.
k: an integer or a tensor indicating the top k number.
Returns:
selected_boxes: a tensor of shape [batch_size, k, 4] representing the
selected top k box coordinates.
selected_scores: a tensor of shape [batch_size, k] representing the selected
top k box scores.
"""
with
tf
.
name_scope
(
'top_k_boxes'
):
selected_scores
,
top_k_indices
=
tf
.
nn
.
top_k
(
scores
,
k
=
k
,
sorted
=
True
)
selected_boxes
=
gather_instances
(
top_k_indices
,
boxes
)
return
selected_boxes
,
selected_scores
def
get_non_empty_box_indices
(
boxes
):
"""Get indices for non-empty boxes."""
# Selects indices if box height or width is 0.
height
=
boxes
[:,
2
]
-
boxes
[:,
0
]
width
=
boxes
[:,
3
]
-
boxes
[:,
1
]
indices
=
tf
.
where
(
tf
.
logical_and
(
tf
.
greater
(
height
,
0
),
tf
.
greater
(
width
,
0
)))
return
indices
[:,
0
]
def
bbox_overlap
(
boxes
,
gt_boxes
):
"""Calculates the overlap between proposal and ground truth boxes.
Some `boxes` or `gt_boxes` may have been padded. The returned `iou` tensor
for these boxes will be -1.
Args:
boxes: a tensor with a shape of [batch_size, N, 4]. N is the number of
proposals before groundtruth assignment (e.g., rpn_post_nms_topn). The
last dimension is the pixel coordinates in [ymin, xmin, ymax, xmax] form.
gt_boxes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES, 4]. This
tensor might have paddings with a negative value.
Returns:
iou: a tensor with as a shape of [batch_size, N, MAX_NUM_INSTANCES].
"""
with
tf
.
name_scope
(
'bbox_overlap'
):
bb_y_min
,
bb_x_min
,
bb_y_max
,
bb_x_max
=
tf
.
split
(
value
=
boxes
,
num_or_size_splits
=
4
,
axis
=
2
)
gt_y_min
,
gt_x_min
,
gt_y_max
,
gt_x_max
=
tf
.
split
(
value
=
gt_boxes
,
num_or_size_splits
=
4
,
axis
=
2
)
# Calculates the intersection area.
i_xmin
=
tf
.
math
.
maximum
(
bb_x_min
,
tf
.
transpose
(
gt_x_min
,
[
0
,
2
,
1
]))
i_xmax
=
tf
.
math
.
minimum
(
bb_x_max
,
tf
.
transpose
(
gt_x_max
,
[
0
,
2
,
1
]))
i_ymin
=
tf
.
math
.
maximum
(
bb_y_min
,
tf
.
transpose
(
gt_y_min
,
[
0
,
2
,
1
]))
i_ymax
=
tf
.
math
.
minimum
(
bb_y_max
,
tf
.
transpose
(
gt_y_max
,
[
0
,
2
,
1
]))
i_area
=
(
tf
.
math
.
maximum
((
i_xmax
-
i_xmin
),
0
)
*
tf
.
math
.
maximum
((
i_ymax
-
i_ymin
),
0
))
# Calculates the union area.
bb_area
=
(
bb_y_max
-
bb_y_min
)
*
(
bb_x_max
-
bb_x_min
)
gt_area
=
(
gt_y_max
-
gt_y_min
)
*
(
gt_x_max
-
gt_x_min
)
# Adds a small epsilon to avoid divide-by-zero.
u_area
=
bb_area
+
tf
.
transpose
(
gt_area
,
[
0
,
2
,
1
])
-
i_area
+
1e-8
# Calculates IoU.
iou
=
i_area
/
u_area
# Fills -1 for IoU entries between the padded ground truth boxes.
gt_invalid_mask
=
tf
.
less
(
tf
.
reduce_max
(
gt_boxes
,
axis
=-
1
,
keepdims
=
True
),
0.0
)
padding_mask
=
tf
.
logical_or
(
tf
.
zeros_like
(
bb_x_min
,
dtype
=
tf
.
bool
),
tf
.
transpose
(
gt_invalid_mask
,
[
0
,
2
,
1
]))
iou
=
tf
.
where
(
padding_mask
,
-
tf
.
ones_like
(
iou
),
iou
)
# Fills -1 for for invalid (-1) boxes.
boxes_invalid_mask
=
tf
.
less
(
tf
.
reduce_max
(
boxes
,
axis
=-
1
,
keepdims
=
True
),
0.0
)
iou
=
tf
.
where
(
boxes_invalid_mask
,
-
tf
.
ones_like
(
iou
),
iou
)
return
iou
def
box_matching
(
boxes
,
gt_boxes
,
gt_classes
):
"""Match boxes to groundtruth boxes.
Given the proposal boxes and the groundtruth boxes and classes, perform the
groundtruth matching by taking the argmax of the IoU between boxes and
groundtruth boxes.
Args:
boxes: a tensor of shape of [batch_size, N, 4] representing the box
coordiantes to be matched to groundtruth boxes.
gt_boxes: a tensor of shape of [batch_size, MAX_INSTANCES, 4] representing
the groundtruth box coordinates. It is padded with -1s to indicate the
invalid boxes.
gt_classes: [batch_size, MAX_INSTANCES] representing the groundtruth box
classes. It is padded with -1s to indicate the invalid classes.
Returns:
matched_gt_boxes: a tensor of shape of [batch_size, N, 4], representing
the matched groundtruth box coordinates for each input box. If the box
does not overlap with any groundtruth boxes, the matched boxes of it
will be set to all 0s.
matched_gt_classes: a tensor of shape of [batch_size, N], representing
the matched groundtruth classes for each input box. If the box does not
overlap with any groundtruth boxes, the matched box classes of it will
be set to 0, which corresponds to the background class.
matched_gt_indices: a tensor of shape of [batch_size, N], representing
the indices of the matched groundtruth boxes in the original gt_boxes
tensor. If the box does not overlap with any groundtruth boxes, the
index of the matched groundtruth will be set to -1.
matched_iou: a tensor of shape of [batch_size, N], representing the IoU
between the box and its matched groundtruth box. The matched IoU is the
maximum IoU of the box and all the groundtruth boxes.
iou: a tensor of shape of [batch_size, N, K], representing the IoU matrix
between boxes and the groundtruth boxes. The IoU between a box and the
invalid groundtruth boxes whose coordinates are [-1, -1, -1, -1] is -1.
"""
# Compute IoU between boxes and gt_boxes.
# iou <- [batch_size, N, K]
iou
=
bbox_overlap
(
boxes
,
gt_boxes
)
# max_iou <- [batch_size, N]
# 0.0 -> no match to gt, or -1.0 match to no gt
matched_iou
=
tf
.
reduce_max
(
iou
,
axis
=-
1
)
# background_box_mask <- bool, [batch_size, N]
background_box_mask
=
tf
.
less_equal
(
matched_iou
,
0.0
)
argmax_iou_indices
=
tf
.
argmax
(
iou
,
axis
=-
1
,
output_type
=
tf
.
int32
)
matched_gt_boxes
,
matched_gt_classes
=
gather_instances
(
argmax_iou_indices
,
gt_boxes
,
gt_classes
)
matched_gt_boxes
=
tf
.
where
(
tf
.
tile
(
tf
.
expand_dims
(
background_box_mask
,
axis
=-
1
),
[
1
,
1
,
4
]),
tf
.
zeros_like
(
matched_gt_boxes
,
dtype
=
matched_gt_boxes
.
dtype
),
matched_gt_boxes
)
matched_gt_classes
=
tf
.
where
(
background_box_mask
,
tf
.
zeros_like
(
matched_gt_classes
),
matched_gt_classes
)
matched_gt_indices
=
tf
.
where
(
background_box_mask
,
-
tf
.
ones_like
(
argmax_iou_indices
),
argmax_iou_indices
)
return
(
matched_gt_boxes
,
matched_gt_classes
,
matched_gt_indices
,
matched_iou
,
iou
)
official/vision/beta/ops/box_ops_test.py
0 → 100644
View file @
cc748b2a
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for box_ops.py."""
# Import libraries
import
numpy
as
np
import
tensorflow
as
tf
from
official.vision.beta.ops
import
box_ops
def
_transform_boxes_on_tpu_and_cpu
(
transform_fn
,
boxes
,
*
args
):
# Runs on TPU.
strategy
=
tf
.
distribute
.
experimental
.
TPUStrategy
()
with
strategy
.
scope
():
transformed_op_tpu
=
transform_fn
(
boxes
,
*
args
)
transfomred_boxes_tpu
=
tf
.
nest
.
map_structure
(
lambda
x
:
x
.
numpy
(),
transformed_op_tpu
)
# Runs on CPU.
transfomred_op_cpu
=
transform_fn
(
boxes
,
*
args
)
transfomred_boxes_cpu
=
tf
.
nest
.
map_structure
(
lambda
x
:
x
.
numpy
(),
transfomred_op_cpu
)
return
transfomred_boxes_tpu
,
transfomred_boxes_cpu
class
ConvertBoxesTest
(
tf
.
test
.
TestCase
):
def
testConvertBoxes
(
self
):
# y1, x1, y2, x2.
boxes
=
np
.
array
([[
0
,
0
,
1
,
2
],
[
0.2
,
0.1
,
1.2
,
1.1
]])
# x1, y1, width, height
target
=
np
.
array
([[
0
,
0
,
2
,
1
],
[
0.1
,
0.2
,
1
,
1
]])
outboxes
=
box_ops
.
yxyx_to_xywh
(
boxes
)
self
.
assertNDArrayNear
(
outboxes
,
target
,
1e-7
)
class
JitterBoxesTest
(
tf
.
test
.
TestCase
):
def
testJitterBoxes
(
self
):
boxes_data
=
[[
0
,
0
,
1
,
1
],
[
0
,
0.1
,
1
,
1.1
],
[
0
,
0.3
,
1
,
1.3
],
[
0
,
0.5
,
1
,
1.5
],
[
0
,
0.7
,
1
,
1.7
],
[
0
,
1.9
,
1
,
1.9
]]
boxes_np
=
np
.
array
(
boxes_data
,
dtype
=
np
.
float32
)
max_size
=
max
(
np
.
amax
(
boxes_np
[:,
3
]
-
boxes_np
[:,
1
]),
np
.
amax
(
boxes_np
[:,
2
]
-
boxes_np
[:,
0
]))
noise_scale
=
0.025
boxes
=
tf
.
constant
(
boxes_np
)
def
jitter_fn
(
input_boxes
,
arg_noise_scale
):
return
box_ops
.
jitter_boxes
(
input_boxes
,
arg_noise_scale
)
jittered_boxes_tpu
,
jittered_boxes_cpu
=
_transform_boxes_on_tpu_and_cpu
(
jitter_fn
,
boxes
,
noise_scale
)
# Test that the jittered box is within 10 stds from the inputs.
self
.
assertNDArrayNear
(
jittered_boxes_tpu
,
boxes_np
,
noise_scale
*
max_size
*
10
)
self
.
assertNDArrayNear
(
jittered_boxes_cpu
,
boxes_np
,
noise_scale
*
max_size
*
10
)
class
NormalizeBoxesTest
(
tf
.
test
.
TestCase
):
def
testNormalizeBoxes1DWithImageShapeAsList
(
self
):
boxes
=
tf
.
constant
([
10
,
30
,
40
,
90
],
tf
.
float32
)
image_shape
=
[
50
,
100
]
normalized_boxes_tpu
,
normalized_boxes_cpu
=
(
_transform_boxes_on_tpu_and_cpu
(
box_ops
.
normalize_boxes
,
boxes
,
image_shape
))
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
normalized_boxes_cpu
,
1e-5
)
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
[
0.2
,
0.3
,
0.8
,
0.9
],
1e-5
)
def
testNormalizeBoxes1DWithImageShapeAsTensor
(
self
):
boxes
=
tf
.
constant
([
10
,
30
,
40
,
90
],
tf
.
float32
)
image_shape
=
tf
.
constant
([
50
,
100
],
tf
.
int32
)
normalized_boxes_tpu
,
normalized_boxes_cpu
=
(
_transform_boxes_on_tpu_and_cpu
(
box_ops
.
normalize_boxes
,
boxes
,
image_shape
))
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
normalized_boxes_cpu
,
1e-5
)
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
[
0.2
,
0.3
,
0.8
,
0.9
],
1e-5
)
def
testNormalizeBoxes2DWithImageShapeAsList
(
self
):
boxes
=
tf
.
constant
([[
10
,
30
,
40
,
90
],
[
30
,
10
,
40
,
50
]],
tf
.
float32
)
image_shape
=
[
50
,
100
]
normalized_boxes_tpu
,
normalized_boxes_cpu
=
(
_transform_boxes_on_tpu_and_cpu
(
box_ops
.
normalize_boxes
,
boxes
,
image_shape
))
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
normalized_boxes_cpu
,
1e-5
)
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
[[
0.2
,
0.3
,
0.8
,
0.9
],
[
0.6
,
0.1
,
0.8
,
0.5
]],
1e-5
)
def
testNormalizeBoxes2DWithImageShapeAsVector
(
self
):
boxes
=
tf
.
constant
([[
10
,
30
,
40
,
90
],
[
30
,
10
,
40
,
50
]],
tf
.
float32
)
image_shape
=
tf
.
constant
([
50
,
100
],
dtype
=
tf
.
int32
)
normalized_boxes_tpu
,
normalized_boxes_cpu
=
(
_transform_boxes_on_tpu_and_cpu
(
box_ops
.
normalize_boxes
,
boxes
,
image_shape
))
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
normalized_boxes_cpu
,
1e-5
)
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
[[
0.2
,
0.3
,
0.8
,
0.9
],
[
0.6
,
0.1
,
0.8
,
0.5
]],
1e-5
)
def
testNormalizeBoxes2DWithImageShapeAsBroadcastableTensor
(
self
):
boxes
=
tf
.
constant
([[
10
,
30
,
40
,
90
],
[
30
,
10
,
40
,
50
]],
tf
.
float32
)
image_shape
=
tf
.
constant
([[
50
,
100
]],
tf
.
int32
)
normalized_boxes_tpu
,
normalized_boxes_cpu
=
(
_transform_boxes_on_tpu_and_cpu
(
box_ops
.
normalize_boxes
,
boxes
,
image_shape
))
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
normalized_boxes_cpu
,
1e-5
)
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
[[
0.2
,
0.3
,
0.8
,
0.9
],
[
0.6
,
0.1
,
0.8
,
0.5
]],
1e-5
)
def
testNormalizeBoxes2DWithImageShapeAsSameShapeTensor
(
self
):
boxes
=
tf
.
constant
([[
10
,
30
,
40
,
90
],
[
30
,
10
,
40
,
50
]],
tf
.
float32
)
image_shape
=
tf
.
constant
([[
50
,
100
],
[
50
,
100
]],
tf
.
int32
)
normalized_boxes_tpu
,
normalized_boxes_cpu
=
(
_transform_boxes_on_tpu_and_cpu
(
box_ops
.
normalize_boxes
,
boxes
,
image_shape
))
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
normalized_boxes_cpu
,
1e-5
)
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
[[
0.2
,
0.3
,
0.8
,
0.9
],
[
0.6
,
0.1
,
0.8
,
0.5
]],
1e-5
)
def
testNormalizeBoxes3DWithImageShapeAsList
(
self
):
boxes
=
tf
.
constant
([[[
10
,
30
,
40
,
90
],
[
30
,
10
,
40
,
50
]],
[[
20
,
40
,
50
,
80
],
[
30
,
50
,
40
,
90
]]],
tf
.
float32
)
image_shape
=
[
50
,
100
]
normalized_boxes_tpu
,
normalized_boxes_cpu
=
(
_transform_boxes_on_tpu_and_cpu
(
box_ops
.
normalize_boxes
,
boxes
,
image_shape
))
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
normalized_boxes_cpu
,
1e-5
)
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
[[[
0.2
,
0.3
,
0.8
,
0.9
],
[
0.6
,
0.1
,
0.8
,
0.5
]],
[[
0.4
,
0.4
,
1.0
,
0.8
],
[
0.6
,
0.5
,
0.8
,
0.9
]]],
1e-5
)
def
testNormalizeBoxes3DWithImageShapeAsVector
(
self
):
boxes
=
tf
.
constant
([[[
10
,
30
,
40
,
90
],
[
30
,
10
,
40
,
50
]],
[[
20
,
40
,
50
,
80
],
[
30
,
50
,
40
,
90
]]],
tf
.
float32
)
image_shape
=
tf
.
constant
([
50
,
100
],
tf
.
int32
)
normalized_boxes_tpu
,
normalized_boxes_cpu
=
(
_transform_boxes_on_tpu_and_cpu
(
box_ops
.
normalize_boxes
,
boxes
,
image_shape
))
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
normalized_boxes_cpu
,
1e-5
)
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
[[[
0.2
,
0.3
,
0.8
,
0.9
],
[
0.6
,
0.1
,
0.8
,
0.5
]],
[[
0.4
,
0.4
,
1.0
,
0.8
],
[
0.6
,
0.5
,
0.8
,
0.9
]]],
1e-5
)
def
testNormalizeBoxes3DWithImageShapeAsBroadcastableTensor
(
self
):
boxes
=
tf
.
constant
([[[
10
,
30
,
40
,
90
],
[
30
,
10
,
40
,
50
]],
[[
20
,
40
,
50
,
80
],
[
30
,
50
,
40
,
90
]]],
tf
.
float32
)
image_shape
=
tf
.
constant
([[[
50
,
100
]],
[[
500
,
1000
]]],
tf
.
int32
)
normalized_boxes_tpu
,
normalized_boxes_cpu
=
(
_transform_boxes_on_tpu_and_cpu
(
box_ops
.
normalize_boxes
,
boxes
,
image_shape
))
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
normalized_boxes_cpu
,
1e-5
)
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
[[[
0.2
,
0.3
,
0.8
,
0.9
],
[
0.6
,
0.1
,
0.8
,
0.5
]],
[[
0.04
,
0.04
,
0.1
,
0.08
],
[
0.06
,
0.05
,
0.08
,
0.09
]]],
1e-5
)
def
testNormalizeBoxes3DWithImageShapeAsSameShapeTensor
(
self
):
boxes
=
tf
.
constant
([[[
10
,
30
,
40
,
90
],
[
30
,
10
,
40
,
50
]],
[[
20
,
40
,
50
,
80
],
[
30
,
50
,
40
,
90
]]],
tf
.
float32
)
image_shape
=
tf
.
constant
(
[[[
50
,
100
],
[
50
,
100
]],
[[
500
,
1000
],
[
500
,
1000
]]],
tf
.
int32
)
normalized_boxes_tpu
,
normalized_boxes_cpu
=
(
_transform_boxes_on_tpu_and_cpu
(
box_ops
.
normalize_boxes
,
boxes
,
image_shape
))
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
normalized_boxes_cpu
,
1e-5
)
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
[[[
0.2
,
0.3
,
0.8
,
0.9
],
[
0.6
,
0.1
,
0.8
,
0.5
]],
[[
0.04
,
0.04
,
0.1
,
0.08
],
[
0.06
,
0.05
,
0.08
,
0.09
]]],
1e-5
)
class
DenormalizeBoxesTest
(
tf
.
test
.
TestCase
):
def
testDenormalizeBoxes1DWithImageShapeAsList
(
self
):
boxes
=
tf
.
constant
([
0.2
,
0.3
,
0.8
,
0.9
],
tf
.
float32
)
image_shape
=
[
50
,
100
]
normalized_boxes_tpu
,
normalized_boxes_cpu
=
(
_transform_boxes_on_tpu_and_cpu
(
box_ops
.
denormalize_boxes
,
boxes
,
image_shape
))
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
normalized_boxes_cpu
,
1e-5
)
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
[
10
,
30
,
40
,
90
],
1e-5
)
def
testDenormalizeBoxes1DWithImageShapeAsTensor
(
self
):
boxes
=
tf
.
constant
([
0.2
,
0.3
,
0.8
,
0.9
],
tf
.
float32
)
image_shape
=
tf
.
constant
([
50
,
100
],
tf
.
int32
)
normalized_boxes_tpu
,
normalized_boxes_cpu
=
(
_transform_boxes_on_tpu_and_cpu
(
box_ops
.
denormalize_boxes
,
boxes
,
image_shape
))
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
normalized_boxes_cpu
,
1e-5
)
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
[
10
,
30
,
40
,
90
],
1e-5
)
def
testDenormalizeBoxes2DWithImageShapeAsList
(
self
):
boxes
=
tf
.
constant
([[
0.2
,
0.3
,
0.8
,
0.9
],
[
0.6
,
0.1
,
0.8
,
0.5
]],
tf
.
float32
)
image_shape
=
[
50
,
100
]
normalized_boxes_tpu
,
normalized_boxes_cpu
=
(
_transform_boxes_on_tpu_and_cpu
(
box_ops
.
denormalize_boxes
,
boxes
,
image_shape
))
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
normalized_boxes_cpu
,
1e-5
)
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
[[
10
,
30
,
40
,
90
],
[
30
,
10
,
40
,
50
]],
1e-5
)
def
testDenormalizeBoxes2DWithImageShapeAsVector
(
self
):
boxes
=
tf
.
constant
([[
0.2
,
0.3
,
0.8
,
0.9
],
[
0.6
,
0.1
,
0.8
,
0.5
]],
tf
.
float32
)
image_shape
=
tf
.
constant
([
50
,
100
],
dtype
=
tf
.
int32
)
normalized_boxes_tpu
,
normalized_boxes_cpu
=
(
_transform_boxes_on_tpu_and_cpu
(
box_ops
.
denormalize_boxes
,
boxes
,
image_shape
))
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
normalized_boxes_cpu
,
1e-5
)
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
[[
10
,
30
,
40
,
90
],
[
30
,
10
,
40
,
50
]],
1e-5
)
def
testDenormalizeBoxes2DWithImageShapeAsBroadcastableTensor
(
self
):
boxes
=
tf
.
constant
([[
0.2
,
0.3
,
0.8
,
0.9
],
[
0.6
,
0.1
,
0.8
,
0.5
]],
tf
.
float32
)
image_shape
=
tf
.
constant
([[
50
,
100
]],
tf
.
int32
)
normalized_boxes_tpu
,
normalized_boxes_cpu
=
(
_transform_boxes_on_tpu_and_cpu
(
box_ops
.
denormalize_boxes
,
boxes
,
image_shape
))
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
normalized_boxes_cpu
,
1e-5
)
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
[[
10
,
30
,
40
,
90
],
[
30
,
10
,
40
,
50
]],
1e-5
)
def
testDenormalizeBoxes2DWithImageShapeAsSameShapeTensor
(
self
):
boxes
=
tf
.
constant
([[
0.2
,
0.3
,
0.8
,
0.9
],
[
0.6
,
0.1
,
0.8
,
0.5
]],
tf
.
float32
)
image_shape
=
tf
.
constant
([[
50
,
100
],
[
50
,
100
]],
tf
.
int32
)
normalized_boxes_tpu
,
normalized_boxes_cpu
=
(
_transform_boxes_on_tpu_and_cpu
(
box_ops
.
denormalize_boxes
,
boxes
,
image_shape
))
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
normalized_boxes_cpu
,
1e-5
)
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
[[
10
,
30
,
40
,
90
],
[
30
,
10
,
40
,
50
]],
1e-5
)
def
testDenormalizeBoxes3DWithImageShapeAsList
(
self
):
boxes
=
tf
.
constant
([[[
0.2
,
0.3
,
0.8
,
0.9
],
[
0.6
,
0.1
,
0.8
,
0.5
]],
[[
0.4
,
0.4
,
1.0
,
0.8
],
[
0.6
,
0.5
,
0.8
,
0.9
]]],
tf
.
float32
)
image_shape
=
[
50
,
100
]
normalized_boxes_tpu
,
normalized_boxes_cpu
=
(
_transform_boxes_on_tpu_and_cpu
(
box_ops
.
denormalize_boxes
,
boxes
,
image_shape
))
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
normalized_boxes_cpu
,
1e-5
)
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
[[[
10
,
30
,
40
,
90
],
[
30
,
10
,
40
,
50
]],
[[
20
,
40
,
50
,
80
],
[
30
,
50
,
40
,
90
]]],
1e-5
)
def
testDenormalizeBoxes3DWithImageShapeAsVector
(
self
):
boxes
=
tf
.
constant
([[[
0.2
,
0.3
,
0.8
,
0.9
],
[
0.6
,
0.1
,
0.8
,
0.5
]],
[[
0.4
,
0.4
,
1.0
,
0.8
],
[
0.6
,
0.5
,
0.8
,
0.9
]]],
tf
.
float32
)
image_shape
=
tf
.
constant
([
50
,
100
],
tf
.
int32
)
normalized_boxes_tpu
,
normalized_boxes_cpu
=
(
_transform_boxes_on_tpu_and_cpu
(
box_ops
.
denormalize_boxes
,
boxes
,
image_shape
))
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
normalized_boxes_cpu
,
1e-5
)
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
[[[
10
,
30
,
40
,
90
],
[
30
,
10
,
40
,
50
]],
[[
20
,
40
,
50
,
80
],
[
30
,
50
,
40
,
90
]]],
1e-5
)
def
testDenormalizeBoxes3DWithImageShapeAsBroadcastableTensor
(
self
):
boxes
=
tf
.
constant
([[[
0.2
,
0.3
,
0.8
,
0.9
],
[
0.6
,
0.1
,
0.8
,
0.5
]],
[[
0.04
,
0.04
,
0.1
,
0.08
],
[
0.06
,
0.05
,
0.08
,
0.09
]]],
tf
.
float32
)
image_shape
=
tf
.
constant
([[[
50
,
100
]],
[[
500
,
1000
]]],
tf
.
int32
)
normalized_boxes_tpu
,
normalized_boxes_cpu
=
(
_transform_boxes_on_tpu_and_cpu
(
box_ops
.
denormalize_boxes
,
boxes
,
image_shape
))
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
normalized_boxes_cpu
,
1e-5
)
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
[[[
10
,
30
,
40
,
90
],
[
30
,
10
,
40
,
50
]],
[[
20
,
40
,
50
,
80
],
[
30
,
50
,
40
,
90
]]],
1e-5
)
def
testDenormalizeBoxes3DWithImageShapeAsSameShapeTensor
(
self
):
boxes
=
tf
.
constant
([[[
0.2
,
0.3
,
0.8
,
0.9
],
[
0.6
,
0.1
,
0.8
,
0.5
]],
[[
0.04
,
0.04
,
0.1
,
0.08
],
[
0.06
,
0.05
,
0.08
,
0.09
]]],
tf
.
float32
)
image_shape
=
tf
.
constant
(
[[[
50
,
100
],
[
50
,
100
]],
[[
500
,
1000
],
[
500
,
1000
]]],
tf
.
int32
)
normalized_boxes_tpu
,
normalized_boxes_cpu
=
(
_transform_boxes_on_tpu_and_cpu
(
box_ops
.
denormalize_boxes
,
boxes
,
image_shape
))
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
normalized_boxes_cpu
,
1e-5
)
self
.
assertNDArrayNear
(
normalized_boxes_tpu
,
[[[
10
,
30
,
40
,
90
],
[
30
,
10
,
40
,
50
]],
[[
20
,
40
,
50
,
80
],
[
30
,
50
,
40
,
90
]]],
1e-5
)
class
ClipBoxesTest
(
tf
.
test
.
TestCase
):
def
testClipBoxesImageShapeAsList
(
self
):
boxes_data
=
[[
0
,
0
,
1
,
1
],
[
0
,
0.1
,
1
,
1.1
],
[
0
,
0.3
,
1
,
1.3
],
[
0
,
0.5
,
1
,
1.5
],
[
0
,
0.7
,
1
,
1.7
],
[
0
,
1.9
,
1
,
1.9
]]
image_shape
=
[
3
,
3
]
boxes
=
tf
.
constant
(
boxes_data
)
clipped_boxes_tpu
,
clipped_boxes_cpu
=
_transform_boxes_on_tpu_and_cpu
(
box_ops
.
clip_boxes
,
boxes
,
image_shape
)
self
.
assertAllClose
(
clipped_boxes_tpu
,
clipped_boxes_cpu
)
self
.
assertAllClose
(
clipped_boxes_tpu
,
[[
0
,
0
,
1
,
1
],
[
0
,
0.1
,
1
,
1.1
],
[
0
,
0.3
,
1
,
1.3
],
[
0
,
0.5
,
1
,
1.5
],
[
0
,
0.7
,
1
,
1.7
],
[
0
,
1.9
,
1
,
1.9
]])
def
testClipBoxesImageShapeAsVector
(
self
):
boxes_data
=
[[
0
,
0
,
1
,
1
],
[
0
,
0.1
,
1
,
1.1
],
[
0
,
0.3
,
1
,
1.3
],
[
0
,
0.5
,
1
,
1.5
],
[
0
,
0.7
,
1
,
1.7
],
[
0
,
1.9
,
1
,
1.9
]]
boxes
=
tf
.
constant
(
boxes_data
)
image_shape
=
np
.
array
([
3
,
3
],
dtype
=
np
.
float32
)
clipped_boxes_tpu
,
clipped_boxes_cpu
=
_transform_boxes_on_tpu_and_cpu
(
box_ops
.
clip_boxes
,
boxes
,
image_shape
)
self
.
assertAllClose
(
clipped_boxes_tpu
,
clipped_boxes_cpu
)
self
.
assertAllClose
(
clipped_boxes_tpu
,
[[
0
,
0
,
1
,
1
],
[
0
,
0.1
,
1
,
1.1
],
[
0
,
0.3
,
1
,
1.3
],
[
0
,
0.5
,
1
,
1.5
],
[
0
,
0.7
,
1
,
1.7
],
[
0
,
1.9
,
1
,
1.9
]])
def
testClipBoxesImageShapeAsTensor
(
self
):
boxes_data
=
[[
0
,
0
,
1
,
1
],
[
0
,
0.1
,
1
,
1.1
],
[
0
,
0.3
,
1
,
1.3
],
[
0
,
0.5
,
1
,
1.5
],
[
0
,
0.7
,
1
,
1.7
],
[
0
,
1.9
,
1
,
1.9
]]
boxes
=
tf
.
constant
(
boxes_data
)
image_shape
=
tf
.
constant
([[
3
,
3
],
[
3
,
3
],
[
3
,
3
],
[
3
,
3
],
[
3
,
3
],
[
3
,
3
]],
dtype
=
tf
.
float32
)
clipped_boxes_tpu
,
clipped_boxes_cpu
=
_transform_boxes_on_tpu_and_cpu
(
box_ops
.
clip_boxes
,
boxes
,
image_shape
)
self
.
assertAllClose
(
clipped_boxes_tpu
,
clipped_boxes_cpu
)
self
.
assertAllClose
(
clipped_boxes_tpu
,
[[
0
,
0
,
1
,
1
],
[
0
,
0.1
,
1
,
1.1
],
[
0
,
0.3
,
1
,
1.3
],
[
0
,
0.5
,
1
,
1.5
],
[
0
,
0.7
,
1
,
1.7
],
[
0
,
1.9
,
1
,
1.9
]])
class
EncodeDecodeBoxesTest
(
tf
.
test
.
TestCase
):
def
test_encode_decode_boxes
(
self
):
boxes_np
=
np
.
array
([[[
1.0
,
2.0
,
3.0
,
4.0
],
[
2.0
,
3.0
,
4.0
,
5.0
]],
[[
4.0
,
5.0
,
6.0
,
7.0
],
[
5.0
,
6.0
,
7.0
,
8.0
]]])
boxes
=
tf
.
constant
(
boxes_np
,
dtype
=
tf
.
float32
)
anchors
=
tf
.
constant
([[[
1.5
,
2.5
,
3.5
,
4.5
],
[
2.5
,
3.5
,
4.5
,
5.5
]],
[[
1.5
,
2.5
,
3.5
,
4.5
],
[
2.5
,
3.5
,
4.5
,
5.5
]]],
dtype
=
tf
.
float32
)
weights
=
[
1.0
,
1.0
,
1.0
,
1.0
]
def
test_fn
(
boxes
,
anchors
):
encoded_boxes
=
box_ops
.
encode_boxes
(
boxes
,
anchors
,
weights
)
decoded_boxes
=
box_ops
.
decode_boxes
(
encoded_boxes
,
anchors
,
weights
)
return
decoded_boxes
decoded_boxes_tpu
,
decoded_boxes_cpu
=
_transform_boxes_on_tpu_and_cpu
(
test_fn
,
boxes
,
anchors
)
self
.
assertNDArrayNear
(
decoded_boxes_tpu
,
decoded_boxes_cpu
,
1e-5
)
self
.
assertNDArrayNear
(
decoded_boxes_tpu
,
boxes_np
,
1e-5
)
def
test_encode_decode_boxes_batch_broadcast
(
self
):
boxes_np
=
np
.
array
([[[
1.0
,
2.0
,
3.0
,
4.0
],
[
2.0
,
3.0
,
4.0
,
5.0
]],
[[
4.0
,
5.0
,
6.0
,
7.0
],
[
5.0
,
6.0
,
7.0
,
8.0
]]])
boxes
=
tf
.
constant
(
boxes_np
,
dtype
=
tf
.
float32
)
anchors
=
tf
.
constant
([[[
1.5
,
2.5
,
3.5
,
4.5
],
[
2.5
,
3.5
,
4.5
,
5.5
]]],
dtype
=
tf
.
float32
)
weights
=
[
1.0
,
1.0
,
1.0
,
1.0
]
def
test_fn
(
boxes
,
anchors
):
encoded_boxes
=
box_ops
.
encode_boxes
(
boxes
,
anchors
,
weights
)
decoded_boxes
=
box_ops
.
decode_boxes
(
encoded_boxes
,
anchors
,
weights
)
return
decoded_boxes
decoded_boxes_tpu
,
decoded_boxes_cpu
=
_transform_boxes_on_tpu_and_cpu
(
test_fn
,
boxes
,
anchors
)
self
.
assertNDArrayNear
(
decoded_boxes_tpu
,
decoded_boxes_cpu
,
1e-5
)
self
.
assertNDArrayNear
(
decoded_boxes_tpu
,
boxes_np
,
1e-5
)
class
FilterBoxesTest
(
tf
.
test
.
TestCase
):
def
test_filter_boxes_batch
(
self
):
# boxes -> [[small, good, outside], [outside, small, good]]
boxes_np
=
np
.
array
([[[
1.0
,
2.0
,
1.5
,
2.5
],
[
2.0
,
3.0
,
4.5
,
5.5
],
[
7.0
,
4.0
,
9.5
,
6.5
]],
[[
-
2.0
,
5.0
,
0.0
,
7.5
],
[
5.0
,
6.0
,
5.1
,
6.0
],
[
4.0
,
1.0
,
7.0
,
4.0
]]])
filtered_boxes_np
=
np
.
array
([[[
0.0
,
0.0
,
0.0
,
0.0
],
[
2.0
,
3.0
,
4.5
,
5.5
],
[
0.0
,
0.0
,
0.0
,
0.0
]],
[[
0.0
,
0.0
,
0.0
,
0.0
],
[
0.0
,
0.0
,
0.0
,
0.0
],
[
4.0
,
1.0
,
7.0
,
4.0
]]])
boxes
=
tf
.
constant
(
boxes_np
,
dtype
=
tf
.
float32
)
scores_np
=
np
.
array
([[
0.9
,
0.7
,
0.5
],
[
0.11
,
0.22
,
0.33
]])
filtered_scores_np
=
np
.
array
([[
0.0
,
0.7
,
0.0
],
[
0.0
,
0.0
,
0.33
]])
scores
=
tf
.
constant
(
scores_np
,
dtype
=
tf
.
float32
)
image_shape
=
tf
.
expand_dims
(
tf
.
constant
([[
8
,
8
],
[
8
,
8
]],
dtype
=
tf
.
int32
),
axis
=
1
)
min_size_threshold
=
2.0
def
test_fn
(
boxes
,
scores
,
image_shape
):
filtered_boxes
,
filtered_scores
=
box_ops
.
filter_boxes
(
boxes
,
scores
,
image_shape
,
min_size_threshold
)
return
filtered_boxes
,
filtered_scores
filtered_results_tpu
,
filtered_results_cpu
=
(
_transform_boxes_on_tpu_and_cpu
(
test_fn
,
boxes
,
scores
,
image_shape
))
filtered_boxes_tpu
,
filtered_scores_tpu
=
filtered_results_tpu
filtered_boxes_cpu
,
filtered_scores_cpu
=
filtered_results_cpu
self
.
assertNDArrayNear
(
filtered_boxes_tpu
,
filtered_boxes_cpu
,
1e-5
)
self
.
assertNDArrayNear
(
filtered_scores_tpu
,
filtered_scores_cpu
,
1e-5
)
self
.
assertNDArrayNear
(
filtered_boxes_tpu
,
filtered_boxes_np
,
1e-5
)
self
.
assertNDArrayNear
(
filtered_scores_tpu
,
filtered_scores_np
,
1e-5
)
class
FilterBoxesByScoresTest
(
tf
.
test
.
TestCase
):
def
test_filter_boxes_by_scores_batch
(
self
):
# boxes -> [[small, good, outside], [outside, small, good]]
boxes_np
=
np
.
array
([[[
1.0
,
2.0
,
1.5
,
2.5
],
[
2.0
,
3.0
,
4.5
,
5.5
],
[
7.0
,
4.0
,
9.5
,
6.5
]],
[[
-
2.0
,
5.0
,
0.0
,
7.5
],
[
5.0
,
6.0
,
5.1
,
6.0
],
[
4.0
,
1.0
,
7.0
,
4.0
]]])
filtered_boxes_np
=
np
.
array
([[[
0.0
,
0.0
,
0.0
,
0.0
],
[
2.0
,
3.0
,
4.5
,
5.5
],
[
7.0
,
4.0
,
9.5
,
6.5
]],
[[
0.0
,
0.0
,
0.0
,
0.0
],
[
0.0
,
0.0
,
0.0
,
0.0
],
[
4.0
,
1.0
,
7.0
,
4.0
]]])
boxes
=
tf
.
constant
(
boxes_np
,
dtype
=
tf
.
float32
)
scores_np
=
np
.
array
([[
0.1
,
0.7
,
0.6
],
[
0.11
,
0.22
,
0.53
]])
filtered_scores_np
=
np
.
array
([[
-
1.0
,
0.7
,
0.6
],
[
-
1.0
,
-
1.0
,
0.53
]])
scores
=
tf
.
constant
(
scores_np
,
dtype
=
tf
.
float32
)
min_score_threshold
=
0.5
def
test_fn
(
boxes
,
scores
):
filtered_boxes
,
filtered_scores
=
box_ops
.
filter_boxes_by_scores
(
boxes
,
scores
,
min_score_threshold
)
return
filtered_boxes
,
filtered_scores
filtered_results_tpu
,
filtered_results_cpu
=
_transform_boxes_on_tpu_and_cpu
(
test_fn
,
boxes
,
scores
)
filtered_boxes_tpu
,
filtered_scores_tpu
=
filtered_results_tpu
filtered_boxes_cpu
,
filtered_scores_cpu
=
filtered_results_cpu
self
.
assertNDArrayNear
(
filtered_boxes_tpu
,
filtered_boxes_cpu
,
1e-5
)
self
.
assertNDArrayNear
(
filtered_scores_tpu
,
filtered_scores_cpu
,
1e-5
)
self
.
assertNDArrayNear
(
filtered_boxes_tpu
,
filtered_boxes_np
,
1e-5
)
self
.
assertNDArrayNear
(
filtered_scores_tpu
,
filtered_scores_np
,
1e-5
)
class
GatherInstancesTest
(
tf
.
test
.
TestCase
):
def
test_gather_instances
(
self
):
boxes_np
=
np
.
array
([[[
1.0
,
2.0
,
1.5
,
2.5
],
[
2.0
,
3.0
,
4.5
,
5.5
],
[
7.0
,
4.0
,
9.5
,
6.5
]],
[[
-
2.0
,
5.0
,
0.0
,
7.5
],
[
5.0
,
6.0
,
5.1
,
6.0
],
[
4.0
,
1.0
,
7.0
,
4.0
]]])
indices_np
=
np
.
array
([[
2
,
0
],
[
0
,
1
]])
boxes
=
tf
.
constant
(
boxes_np
,
dtype
=
tf
.
float32
)
indices
=
tf
.
constant
(
indices_np
,
dtype
=
tf
.
int32
)
selected_boxes
=
box_ops
.
gather_instances
(
indices
,
boxes
)
expected_selected_boxes
=
np
.
array
(
[[[
7.0
,
4.0
,
9.5
,
6.5
],
[
1.0
,
2.0
,
1.5
,
2.5
]],
[[
-
2.0
,
5.0
,
0.0
,
7.5
],
[
5.0
,
6.0
,
5.1
,
6.0
]]])
self
.
assertNDArrayNear
(
expected_selected_boxes
,
selected_boxes
,
1e-5
)
def
test_gather_instances_with_multiple_inputs
(
self
):
boxes_np
=
np
.
array
([[[
1.0
,
2.0
,
1.5
,
2.5
],
[
2.0
,
3.0
,
4.5
,
5.5
],
[
7.0
,
4.0
,
9.5
,
6.5
]],
[[
-
2.0
,
5.0
,
0.0
,
7.5
],
[
5.0
,
6.0
,
5.1
,
6.0
],
[
4.0
,
1.0
,
7.0
,
4.0
]]])
classes_np
=
np
.
array
([[
1
,
2
,
3
],
[
20
,
30
,
40
]])
indices_np
=
np
.
array
([[
2
,
0
],
[
0
,
1
]])
boxes
=
tf
.
constant
(
boxes_np
,
dtype
=
tf
.
float32
)
classes
=
tf
.
constant
(
classes_np
,
dtype
=
tf
.
int32
)
indices
=
tf
.
constant
(
indices_np
,
dtype
=
tf
.
int32
)
selected_boxes
,
selected_classes
=
box_ops
.
gather_instances
(
indices
,
boxes
,
classes
)
expected_selected_boxes
=
np
.
array
(
[[[
7.0
,
4.0
,
9.5
,
6.5
],
[
1.0
,
2.0
,
1.5
,
2.5
]],
[[
-
2.0
,
5.0
,
0.0
,
7.5
],
[
5.0
,
6.0
,
5.1
,
6.0
]]])
expected_selected_classes
=
np
.
array
(
[[
3
,
1
],
[
20
,
30
]])
self
.
assertNDArrayNear
(
expected_selected_boxes
,
selected_boxes
,
1e-5
)
self
.
assertAllEqual
(
expected_selected_classes
,
selected_classes
)
class
TopKBoxesTest
(
tf
.
test
.
TestCase
):
def
test_top_k_boxes_batch1
(
self
):
boxes_np
=
np
.
array
([[[
1.0
,
2.0
,
1.5
,
2.5
],
[
2.0
,
3.0
,
4.5
,
5.5
],
[
7.0
,
4.0
,
9.5
,
6.5
]]])
boxes
=
tf
.
constant
(
boxes_np
,
dtype
=
tf
.
float32
)
scores_np
=
np
.
array
([[
0.9
,
0.5
,
0.7
]])
scores
=
tf
.
constant
(
scores_np
,
dtype
=
tf
.
float32
)
top_k_boxes_np
=
np
.
array
([[[
1.0
,
2.0
,
1.5
,
2.5
],
[
7.0
,
4.0
,
9.5
,
6.5
]]])
top_k_scores_np
=
np
.
array
([[
0.9
,
0.7
]])
def
test_fn
(
boxes
,
scores
):
top_k_boxes
,
top_k_scores
=
box_ops
.
top_k_boxes
(
boxes
,
scores
,
k
=
2
)
return
top_k_boxes
,
top_k_scores
top_k_results_tpu
,
top_k_results_cpu
=
_transform_boxes_on_tpu_and_cpu
(
test_fn
,
boxes
,
scores
)
top_k_boxes_tpu
,
top_k_scores_tpu
=
top_k_results_tpu
top_k_boxes_cpu
,
top_k_scores_cpu
=
top_k_results_cpu
self
.
assertNDArrayNear
(
top_k_boxes_tpu
,
top_k_boxes_cpu
,
1e-5
)
self
.
assertNDArrayNear
(
top_k_scores_tpu
,
top_k_scores_cpu
,
1e-5
)
self
.
assertNDArrayNear
(
top_k_boxes_tpu
,
top_k_boxes_np
,
1e-5
)
self
.
assertNDArrayNear
(
top_k_scores_tpu
,
top_k_scores_np
,
1e-5
)
def
test_top_k_boxes_batch2
(
self
):
boxes_np
=
np
.
array
([[[
1.0
,
2.0
,
1.5
,
2.5
],
[
2.0
,
3.0
,
4.5
,
5.5
],
[
7.0
,
4.0
,
9.5
,
6.5
]],
[[
-
2.0
,
5.0
,
0.0
,
7.5
],
[
5.0
,
6.0
,
5.1
,
6.0
],
[
4.0
,
1.0
,
7.0
,
4.0
]]])
boxes
=
tf
.
constant
(
boxes_np
,
dtype
=
tf
.
float32
)
scores_np
=
np
.
array
([[
0.9
,
0.7
,
0.5
],
[
0.11
,
0.22
,
0.33
]])
scores
=
tf
.
constant
(
scores_np
,
dtype
=
tf
.
float32
)
top_k_boxes_np
=
np
.
array
([[[
1.0
,
2.0
,
1.5
,
2.5
],
[
2.0
,
3.0
,
4.5
,
5.5
]],
[[
4.0
,
1.0
,
7.0
,
4.0
],
[
5.0
,
6.0
,
5.1
,
6.0
]]])
top_k_scores_np
=
np
.
array
([[
0.9
,
0.7
],
[
0.33
,
0.22
]])
def
test_fn
(
boxes
,
scores
):
top_k_boxes
,
top_k_scores
=
box_ops
.
top_k_boxes
(
boxes
,
scores
,
k
=
2
)
return
top_k_boxes
,
top_k_scores
top_k_results_tpu
,
top_k_results_cpu
=
_transform_boxes_on_tpu_and_cpu
(
test_fn
,
boxes
,
scores
)
top_k_boxes_tpu
,
top_k_scores_tpu
=
top_k_results_tpu
top_k_boxes_cpu
,
top_k_scores_cpu
=
top_k_results_cpu
self
.
assertNDArrayNear
(
top_k_boxes_tpu
,
top_k_boxes_cpu
,
1e-5
)
self
.
assertNDArrayNear
(
top_k_scores_tpu
,
top_k_scores_cpu
,
1e-5
)
self
.
assertNDArrayNear
(
top_k_boxes_tpu
,
top_k_boxes_np
,
1e-5
)
self
.
assertNDArrayNear
(
top_k_scores_tpu
,
top_k_scores_np
,
1e-5
)
class
BboxeOverlapTest
(
tf
.
test
.
TestCase
):
def
testBBoxeOverlapOpCorrectness
(
self
):
boxes_data
=
[[[
0
,
0
,
0.1
,
1
],
[
0
,
0.2
,
0.2
,
1.2
],
[
0
,
0.3
,
0.3
,
1.3
],
[
0
,
0.5
,
0.4
,
1.5
],
[
0
,
0.7
,
0.5
,
1.7
],
[
0
,
0.9
,
0.6
,
1.9
],
[
0
,
0.1
,
0.1
,
1.1
],
[
0
,
0.3
,
0.7
,
1.3
],
[
0
,
0.9
,
2
,
1.9
]],
[[
0
,
0
,
1
,
0.2
],
[
0
,
0.2
,
0.5
,
1.2
],
[
0
,
0.4
,
0.9
,
1.4
],
[
0
,
0.6
,
1.1
,
1.6
],
[
0
,
0.8
,
1.2
,
1.8
],
[
0
,
1
,
1.5
,
2
],
[
0
,
0.5
,
1
,
1
],
[
0.5
,
0.8
,
1
,
1.8
],
[
-
1
,
-
1
,
-
1
,
-
1
]]]
boxes_np
=
np
.
array
(
boxes_data
,
dtype
=
np
.
float32
)
gt_boxes_data
=
[[[
0
,
0.1
,
0.1
,
1.1
],
[
0
,
0.3
,
0.7
,
1.3
],
[
0
,
0.9
,
2
,
1.9
]],
[[
0
,
0.5
,
1
,
1
],
[
0.5
,
0.8
,
1
,
1.8
],
[
-
1
,
-
1
,
-
1
,
-
1
]]]
gt_boxes_np
=
np
.
array
(
gt_boxes_data
,
dtype
=
np
.
float32
)
# Runs on TPU.
strategy
=
tf
.
distribute
.
experimental
.
TPUStrategy
()
with
strategy
.
scope
():
boxes
=
tf
.
constant
(
boxes_np
)
gt_boxes
=
tf
.
constant
(
gt_boxes_np
)
iou
=
box_ops
.
bbox_overlap
(
boxes
=
boxes
,
gt_boxes
=
gt_boxes
)
iou
=
iou
.
numpy
()
self
.
assertEqual
(
iou
.
shape
,
(
2
,
9
,
3
))
self
.
assertAllEqual
(
np
.
argmax
(
iou
,
axis
=
2
),
[[
0
,
0
,
1
,
1
,
1
,
2
,
0
,
1
,
2
],
[
0
,
0
,
0
,
0
,
1
,
1
,
0
,
1
,
0
]])
def
testBBoxeOverlapOpCheckShape
(
self
):
batch_size
=
2
rpn_post_nms_topn
=
2000
gt_max_instances
=
100
boxes_np
=
np
.
random
.
rand
(
batch_size
,
rpn_post_nms_topn
,
4
).
astype
(
np
.
float32
)
gt_boxes_np
=
np
.
random
.
rand
(
batch_size
,
gt_max_instances
,
4
).
astype
(
np
.
float32
)
strategy
=
tf
.
distribute
.
experimental
.
TPUStrategy
()
with
strategy
.
scope
():
boxes
=
tf
.
constant
(
boxes_np
)
gt_boxes
=
tf
.
constant
(
gt_boxes_np
)
iou
=
box_ops
.
bbox_overlap
(
boxes
=
boxes
,
gt_boxes
=
gt_boxes
)
iou
=
iou
.
numpy
()
self
.
assertEqual
(
iou
.
shape
,
(
batch_size
,
(
rpn_post_nms_topn
),
gt_max_instances
))
def
testBBoxeOverlapOpCorrectnessWithNegativeData
(
self
):
boxes_data
=
[[[
0
,
-
0.01
,
0.1
,
1.1
],
[
0
,
0.2
,
0.2
,
5.0
],
[
0
,
-
0.01
,
0.1
,
1.
],
[
-
1
,
-
1
,
-
1
,
-
1
]]]
boxes_np
=
np
.
array
(
boxes_data
,
dtype
=
np
.
float32
)
gt_boxes_np
=
boxes_np
strategy
=
tf
.
distribute
.
experimental
.
TPUStrategy
()
with
strategy
.
scope
():
boxes
=
tf
.
constant
(
boxes_np
)
gt_boxes
=
tf
.
constant
(
gt_boxes_np
)
iou
=
box_ops
.
bbox_overlap
(
boxes
=
boxes
,
gt_boxes
=
gt_boxes
)
iou
=
iou
.
numpy
()
expected
=
np
.
array
([[[
0.99999994
,
0.0917431
,
0.9099099
,
-
1.
],
[
0.0917431
,
1.
,
0.08154944
,
-
1.
],
[
0.9099099
,
0.08154944
,
1.
,
-
1.
],
[
-
1.
,
-
1.
,
-
1.
,
-
1.
]]])
self
.
assertAllClose
(
expected
,
iou
)
class
BoxMatchingTest
(
tf
.
test
.
TestCase
):
def
test_box_matching_single
(
self
):
boxes_np
=
np
.
array
(
[[[
0
,
0
,
5
,
5
],
[
2.5
,
2.5
,
7.5
,
7.5
],
[
5
,
5
,
10
,
10
],
[
7.5
,
7.5
,
12.5
,
12.5
]]])
boxes
=
tf
.
constant
(
boxes_np
,
dtype
=
tf
.
float32
)
gt_boxes_np
=
np
.
array
(
[[[
10
,
10
,
15
,
15
],
[
2.5
,
2.5
,
7.5
,
7.5
],
[
-
1
,
-
1
,
-
1
,
-
1
]]])
gt_boxes
=
tf
.
constant
(
gt_boxes_np
,
dtype
=
tf
.
float32
)
gt_classes_np
=
np
.
array
([[
2
,
10
,
-
1
]])
gt_classes
=
tf
.
constant
(
gt_classes_np
,
dtype
=
tf
.
int32
)
matched_gt_boxes_np
=
np
.
array
(
[[[
2.5
,
2.5
,
7.5
,
7.5
],
[
2.5
,
2.5
,
7.5
,
7.5
],
[
2.5
,
2.5
,
7.5
,
7.5
],
[
10
,
10
,
15
,
15
]]])
matched_gt_classes_np
=
np
.
array
([[
10
,
10
,
10
,
2
]])
matched_gt_indices_np
=
np
.
array
([[
1
,
1
,
1
,
0
]])
matched_iou_np
=
np
.
array
(
[[
0.142857142857
,
1.0
,
0.142857142857
,
0.142857142857
]])
iou_np
=
np
.
array
(
[[[
0
,
0.142857142857
,
-
1.0
],
[
0
,
1.0
,
-
1.0
],
[
0
,
0.142857142857
,
-
1.0
],
[
0.142857142857
,
0
,
-
1.0
]]])
# Runs on TPU.
strategy
=
tf
.
distribute
.
experimental
.
TPUStrategy
()
with
strategy
.
scope
():
(
matched_gt_boxes_tpu
,
matched_gt_classes_tpu
,
matched_gt_indices_tpu
,
matched_iou_tpu
,
iou_tpu
)
=
(
box_ops
.
box_matching
(
boxes
,
gt_boxes
,
gt_classes
))
# Runs on CPU.
(
matched_gt_boxes_cpu
,
matched_gt_classes_cpu
,
matched_gt_indices_cpu
,
matched_iou_cpu
,
iou_cpu
)
=
(
box_ops
.
box_matching
(
boxes
,
gt_boxes
,
gt_classes
))
# consistency.
self
.
assertNDArrayNear
(
matched_gt_boxes_tpu
.
numpy
(),
matched_gt_boxes_cpu
.
numpy
(),
1e-5
)
self
.
assertAllEqual
(
matched_gt_classes_tpu
.
numpy
(),
matched_gt_classes_cpu
.
numpy
())
self
.
assertAllEqual
(
matched_gt_indices_tpu
.
numpy
(),
matched_gt_indices_cpu
.
numpy
())
self
.
assertNDArrayNear
(
matched_iou_tpu
.
numpy
(),
matched_iou_cpu
.
numpy
(),
1e-5
)
self
.
assertNDArrayNear
(
iou_tpu
.
numpy
(),
iou_cpu
.
numpy
(),
1e-5
)
# correctness.
self
.
assertNDArrayNear
(
matched_gt_boxes_tpu
.
numpy
(),
matched_gt_boxes_np
,
1e-5
)
self
.
assertAllEqual
(
matched_gt_classes_tpu
.
numpy
(),
matched_gt_classes_np
)
self
.
assertAllEqual
(
matched_gt_indices_tpu
.
numpy
(),
matched_gt_indices_np
)
self
.
assertNDArrayNear
(
matched_iou_tpu
.
numpy
(),
matched_iou_np
,
1e-5
)
self
.
assertNDArrayNear
(
iou_tpu
.
numpy
(),
iou_np
,
1e-5
)
def
test_box_matching_single_no_gt
(
self
):
boxes_np
=
np
.
array
(
[[[
0
,
0
,
5
,
5
],
[
2.5
,
2.5
,
7.5
,
7.5
],
[
5
,
5
,
10
,
10
],
[
7.5
,
7.5
,
12.5
,
12.5
]]])
boxes
=
tf
.
constant
(
boxes_np
,
dtype
=
tf
.
float32
)
gt_boxes_np
=
np
.
array
(
[[[
-
1
,
-
1
,
-
1
,
-
1
],
[
-
1
,
-
1
,
-
1
,
-
1
],
[
-
1
,
-
1
,
-
1
,
-
1
]]])
gt_boxes
=
tf
.
constant
(
gt_boxes_np
,
dtype
=
tf
.
float32
)
gt_classes_np
=
np
.
array
([[
-
1
,
-
1
,
-
1
]])
gt_classes
=
tf
.
constant
(
gt_classes_np
,
dtype
=
tf
.
int32
)
matched_gt_boxes_np
=
np
.
array
(
[[[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]]])
matched_gt_classes_np
=
np
.
array
([[
0
,
0
,
0
,
0
]])
matched_gt_indices_np
=
np
.
array
([[
-
1
,
-
1
,
-
1
,
-
1
]])
matched_iou_np
=
np
.
array
([[
-
1
,
-
1
,
-
1
,
-
1
]])
iou_np
=
np
.
array
(
[[[
-
1
,
-
1
,
-
1
],
[
-
1
,
-
1
,
-
1
],
[
-
1
,
-
1
,
-
1
],
[
-
1
,
-
1
,
-
1
]]])
# Runs on TPU.
strategy
=
tf
.
distribute
.
experimental
.
TPUStrategy
()
with
strategy
.
scope
():
(
matched_gt_boxes_tpu
,
matched_gt_classes_tpu
,
matched_gt_indices_tpu
,
matched_iou_tpu
,
iou_tpu
)
=
(
box_ops
.
box_matching
(
boxes
,
gt_boxes
,
gt_classes
))
# Runs on CPU.
(
matched_gt_boxes_cpu
,
matched_gt_classes_cpu
,
matched_gt_indices_cpu
,
matched_iou_cpu
,
iou_cpu
)
=
(
box_ops
.
box_matching
(
boxes
,
gt_boxes
,
gt_classes
))
# consistency.
self
.
assertNDArrayNear
(
matched_gt_boxes_tpu
.
numpy
(),
matched_gt_boxes_cpu
.
numpy
(),
1e-5
)
self
.
assertAllEqual
(
matched_gt_classes_tpu
.
numpy
(),
matched_gt_classes_cpu
.
numpy
())
self
.
assertAllEqual
(
matched_gt_indices_tpu
.
numpy
(),
matched_gt_indices_cpu
.
numpy
())
self
.
assertNDArrayNear
(
matched_iou_tpu
.
numpy
(),
matched_iou_cpu
.
numpy
(),
1e-5
)
self
.
assertNDArrayNear
(
iou_tpu
.
numpy
(),
iou_cpu
.
numpy
(),
1e-5
)
# correctness.
self
.
assertNDArrayNear
(
matched_gt_boxes_tpu
.
numpy
(),
matched_gt_boxes_np
,
1e-5
)
self
.
assertAllEqual
(
matched_gt_classes_tpu
.
numpy
(),
matched_gt_classes_np
)
self
.
assertAllEqual
(
matched_gt_indices_tpu
.
numpy
(),
matched_gt_indices_np
)
self
.
assertNDArrayNear
(
matched_iou_tpu
.
numpy
(),
matched_iou_np
,
1e-5
)
self
.
assertNDArrayNear
(
iou_tpu
.
numpy
(),
iou_np
,
1e-5
)
def
test_box_matching_batch
(
self
):
boxes_np
=
np
.
array
(
[[[
0
,
0
,
5
,
5
],
[
2.5
,
2.5
,
7.5
,
7.5
],
[
5
,
5
,
10
,
10
],
[
7.5
,
7.5
,
12.5
,
12.5
]],
[[
0
,
0
,
5
,
5
],
[
2.5
,
2.5
,
7.5
,
7.5
],
[
5
,
5
,
10
,
10
],
[
7.5
,
7.5
,
12.5
,
12.5
]]])
boxes
=
tf
.
constant
(
boxes_np
,
dtype
=
tf
.
float32
)
gt_boxes_np
=
np
.
array
(
[[[
10
,
10
,
15
,
15
],
[
2.5
,
2.5
,
7.5
,
7.5
],
[
-
1
,
-
1
,
-
1
,
-
1
]],
[[
-
1
,
-
1
,
-
1
,
-
1
],
[
-
1
,
-
1
,
-
1
,
-
1
],
[
-
1
,
-
1
,
-
1
,
-
1
]]])
gt_boxes
=
tf
.
constant
(
gt_boxes_np
,
dtype
=
tf
.
float32
)
gt_classes_np
=
np
.
array
([[
2
,
10
,
-
1
],
[
-
1
,
-
1
,
-
1
]])
gt_classes
=
tf
.
constant
(
gt_classes_np
,
dtype
=
tf
.
int32
)
matched_gt_boxes_np
=
np
.
array
(
[[[
2.5
,
2.5
,
7.5
,
7.5
],
[
2.5
,
2.5
,
7.5
,
7.5
],
[
2.5
,
2.5
,
7.5
,
7.5
],
[
10
,
10
,
15
,
15
]],
[[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]]])
matched_gt_classes_np
=
np
.
array
(
[[
10
,
10
,
10
,
2
],
[
0
,
0
,
0
,
0
]])
matched_gt_indices_np
=
np
.
array
(
[[
1
,
1
,
1
,
0
],
[
-
1
,
-
1
,
-
1
,
-
1
]])
matched_iou_np
=
np
.
array
(
[[
0.142857142857
,
1.0
,
0.142857142857
,
0.142857142857
],
[
-
1
,
-
1
,
-
1
,
-
1
]])
iou_np
=
np
.
array
(
[[[
0
,
0.142857142857
,
-
1.0
],
[
0
,
1.0
,
-
1.0
],
[
0
,
0.142857142857
,
-
1.0
],
[
0.142857142857
,
0
,
-
1.0
]],
[[
-
1
,
-
1
,
-
1
],
[
-
1
,
-
1
,
-
1
],
[
-
1
,
-
1
,
-
1
],
[
-
1
,
-
1
,
-
1
]]])
# Runs on TPU.
strategy
=
tf
.
distribute
.
experimental
.
TPUStrategy
()
with
strategy
.
scope
():
(
matched_gt_boxes_tpu
,
matched_gt_classes_tpu
,
matched_gt_indices_tpu
,
matched_iou_tpu
,
iou_tpu
)
=
(
box_ops
.
box_matching
(
boxes
,
gt_boxes
,
gt_classes
))
# Runs on CPU.
(
matched_gt_boxes_cpu
,
matched_gt_classes_cpu
,
matched_gt_indices_cpu
,
matched_iou_cpu
,
iou_cpu
)
=
(
box_ops
.
box_matching
(
boxes
,
gt_boxes
,
gt_classes
))
# consistency.
self
.
assertNDArrayNear
(
matched_gt_boxes_tpu
.
numpy
(),
matched_gt_boxes_cpu
.
numpy
(),
1e-5
)
self
.
assertAllEqual
(
matched_gt_classes_tpu
.
numpy
(),
matched_gt_classes_cpu
.
numpy
())
self
.
assertAllEqual
(
matched_gt_indices_tpu
.
numpy
(),
matched_gt_indices_cpu
.
numpy
())
self
.
assertNDArrayNear
(
matched_iou_tpu
.
numpy
(),
matched_iou_cpu
.
numpy
(),
1e-5
)
self
.
assertNDArrayNear
(
iou_tpu
.
numpy
(),
iou_cpu
.
numpy
(),
1e-5
)
# correctness.
self
.
assertNDArrayNear
(
matched_gt_boxes_tpu
.
numpy
(),
matched_gt_boxes_np
,
1e-5
)
self
.
assertAllEqual
(
matched_gt_classes_tpu
.
numpy
(),
matched_gt_classes_np
)
self
.
assertAllEqual
(
matched_gt_indices_tpu
.
numpy
(),
matched_gt_indices_np
)
self
.
assertNDArrayNear
(
matched_iou_tpu
.
numpy
(),
matched_iou_np
,
1e-5
)
self
.
assertNDArrayNear
(
iou_tpu
.
numpy
(),
iou_np
,
1e-5
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/beta/ops/experimental/anchor_generator.py
0 → 100644
View file @
cc748b2a
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Multi scale anchor generator definition."""
import
tensorflow
as
tf
# (TODO/tanzheny): consider having customized anchor offset.
class
_SingleAnchorGenerator
:
"""Utility to generate anchors for a single feature map.
Example:
```python
anchor_gen = _SingleAnchorGenerator(32, [.5, 1., 2.], stride=16)
anchors = anchor_gen([512, 512, 3])
```
"""
def
__init__
(
self
,
anchor_size
,
scales
,
aspect_ratios
,
stride
,
clip_boxes
=
False
):
"""Constructs single scale anchor.
Args:
anchor_size: A single int represents the base anchor size. The anchor
height will be `anchor_size / sqrt(aspect_ratio)`, anchor width will be
`anchor_size * sqrt(aspect_ratio)`.
scales: A list/tuple, or a list/tuple of a list/tuple of positive
floats representing the actual anchor size to the base `anchor_size`.
aspect_ratios: a list/tuple of positive floats representing the ratio of
anchor width to anchor height.
stride: A single int represents the anchor stride size between center of
each anchor.
clip_boxes: Boolean to represent whether the anchor coordinates should be
clipped to the image size. Defaults to `True`.
Input shape: the size of the image, `[H, W, C]`
Output shape: the size of anchors, `[(H / stride) * (W / stride), 4]`
"""
self
.
anchor_size
=
anchor_size
self
.
scales
=
scales
self
.
aspect_ratios
=
aspect_ratios
self
.
stride
=
stride
self
.
clip_boxes
=
clip_boxes
def
__call__
(
self
,
image_size
):
image_height
=
tf
.
cast
(
image_size
[
0
],
tf
.
float32
)
image_width
=
tf
.
cast
(
image_size
[
1
],
tf
.
float32
)
k
=
len
(
self
.
scales
)
*
len
(
self
.
aspect_ratios
)
aspect_ratios_sqrt
=
tf
.
cast
(
tf
.
sqrt
(
self
.
aspect_ratios
),
dtype
=
tf
.
float32
)
anchor_size
=
tf
.
cast
(
self
.
anchor_size
,
tf
.
float32
)
# [K]
anchor_heights
=
[]
anchor_widths
=
[]
for
scale
in
self
.
scales
:
anchor_size_t
=
anchor_size
*
scale
anchor_height
=
anchor_size_t
/
aspect_ratios_sqrt
anchor_width
=
anchor_size_t
*
aspect_ratios_sqrt
anchor_heights
.
append
(
anchor_height
)
anchor_widths
.
append
(
anchor_width
)
anchor_heights
=
tf
.
concat
(
anchor_heights
,
axis
=
0
)
anchor_widths
=
tf
.
concat
(
anchor_widths
,
axis
=
0
)
half_anchor_heights
=
tf
.
reshape
(
0.5
*
anchor_heights
,
[
1
,
1
,
k
])
half_anchor_widths
=
tf
.
reshape
(
0.5
*
anchor_widths
,
[
1
,
1
,
k
])
stride
=
tf
.
cast
(
self
.
stride
,
tf
.
float32
)
# [W]
cx
=
tf
.
range
(
0.5
*
stride
,
image_width
,
stride
)
# [H]
cy
=
tf
.
range
(
0.5
*
stride
,
image_height
,
stride
)
# [H, W]
cx_grid
,
cy_grid
=
tf
.
meshgrid
(
cx
,
cy
)
# [H, W, 1]
cx_grid
=
tf
.
expand_dims
(
cx_grid
,
axis
=-
1
)
cy_grid
=
tf
.
expand_dims
(
cy_grid
,
axis
=-
1
)
# [H, W, K, 1]
y_min
=
tf
.
expand_dims
(
cy_grid
-
half_anchor_heights
,
axis
=-
1
)
y_max
=
tf
.
expand_dims
(
cy_grid
+
half_anchor_heights
,
axis
=-
1
)
x_min
=
tf
.
expand_dims
(
cx_grid
-
half_anchor_widths
,
axis
=-
1
)
x_max
=
tf
.
expand_dims
(
cx_grid
+
half_anchor_widths
,
axis
=-
1
)
if
self
.
clip_boxes
:
y_min
=
tf
.
maximum
(
tf
.
minimum
(
y_min
,
image_height
),
0.
)
y_max
=
tf
.
maximum
(
tf
.
minimum
(
y_max
,
image_height
),
0.
)
x_min
=
tf
.
maximum
(
tf
.
minimum
(
x_min
,
image_width
),
0.
)
x_max
=
tf
.
maximum
(
tf
.
minimum
(
x_max
,
image_width
),
0.
)
# [H, W, K, 4]
result
=
tf
.
concat
([
y_min
,
x_min
,
y_max
,
x_max
],
axis
=-
1
)
shape
=
result
.
shape
.
as_list
()
# [H, W, K * 4]
return
tf
.
reshape
(
result
,
[
shape
[
0
],
shape
[
1
],
shape
[
2
]
*
shape
[
3
]])
class
AnchorGenerator
():
"""Utility to generate anchors for a multiple feature maps.
Example:
```python
anchor_gen = AnchorGenerator([32, 64], [.5, 1., 2.],
strides=[16, 32])
anchors = anchor_gen([512, 512, 3])
```
"""
def
__init__
(
self
,
anchor_sizes
,
scales
,
aspect_ratios
,
strides
,
clip_boxes
=
False
):
"""Constructs multiscale anchors.
Args:
anchor_sizes: A list of int represents the anchor size for each scale. The
anchor height will be `anchor_size / sqrt(aspect_ratio)`, anchor width
will be `anchor_size * sqrt(aspect_ratio)` for each scale.
scales: A list/tuple, or a list/tuple of a list/tuple of positive
floats representing the actual anchor size to the base `anchor_size`.
aspect_ratios: A list/tuple, or a list/tuple of a list/tuple of positive
floats representing the ratio of anchor width to anchor height.
strides: A list/tuple of ints represent the anchor stride size between
center of anchors at each scale.
clip_boxes: Boolean to represents whether the anchor coordinates should be
clipped to the image size. Defaults to `False`.
Input shape: the size of the image, `[H, W, C]`
Output shape: the size of anchors concat on each level, `[(H /
strides) * (W / strides), K * 4]`
"""
# aspect_ratio is a single list that is the same across all levels.
aspect_ratios
=
maybe_map_structure_for_anchor
(
aspect_ratios
,
anchor_sizes
)
scales
=
maybe_map_structure_for_anchor
(
scales
,
anchor_sizes
)
if
isinstance
(
anchor_sizes
,
dict
):
self
.
anchor_generators
=
{}
for
k
in
anchor_sizes
.
keys
():
self
.
anchor_generators
[
k
]
=
_SingleAnchorGenerator
(
anchor_sizes
[
k
],
scales
[
k
],
aspect_ratios
[
k
],
strides
[
k
],
clip_boxes
)
elif
isinstance
(
anchor_sizes
,
(
list
,
tuple
)):
self
.
anchor_generators
=
[]
for
anchor_size
,
scale_list
,
ar_list
,
stride
in
zip
(
anchor_sizes
,
scales
,
aspect_ratios
,
strides
):
self
.
anchor_generators
.
append
(
_SingleAnchorGenerator
(
anchor_size
,
scale_list
,
ar_list
,
stride
,
clip_boxes
))
def
__call__
(
self
,
image_size
):
anchor_generators
=
tf
.
nest
.
flatten
(
self
.
anchor_generators
)
results
=
[
anchor_gen
(
image_size
)
for
anchor_gen
in
anchor_generators
]
return
tf
.
nest
.
pack_sequence_as
(
self
.
anchor_generators
,
results
)
def
maybe_map_structure_for_anchor
(
params
,
anchor_sizes
):
"""broadcast the params to match anchor_sizes."""
if
all
(
isinstance
(
param
,
(
int
,
float
))
for
param
in
params
):
if
isinstance
(
anchor_sizes
,
(
tuple
,
list
)):
return
[
params
]
*
len
(
anchor_sizes
)
elif
isinstance
(
anchor_sizes
,
dict
):
return
tf
.
nest
.
map_structure
(
lambda
_
:
params
,
anchor_sizes
)
else
:
raise
ValueError
(
"the structure of `anchor_sizes` must be a tuple, "
"list, or dict, given {}"
.
format
(
anchor_sizes
))
else
:
return
params
official/vision/beta/ops/experimental/anchor_generator_test.py
0 → 100644
View file @
cc748b2a
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for anchor_generator.py."""
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
tensorflow.python.distribute
import
combinations
from
tensorflow.python.distribute
import
strategy_combinations
from
official.vision.beta.ops.experimental
import
anchor_generator
class
AnchorGeneratorTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
# Single scale anchor.
(
5
,
[
1.0
],
[[[
-
16.
,
-
16.
,
48.
,
48.
],
[
-
16.
,
16.
,
48.
,
80.
]],
[[
16.
,
-
16.
,
80.
,
48.
],
[
16.
,
16.
,
80.
,
80.
]]]),
# # Multi aspect ratio anchor.
(
6
,
[
1.0
,
4.0
,
0.25
],
[[[
-
32.
,
-
32.
,
96.
,
96.
,
0.
,
-
96.
,
64.
,
160.
,
-
96.
,
0.
,
160.
,
64.
]]]),
)
def
testAnchorGeneration
(
self
,
level
,
aspect_ratios
,
expected_boxes
):
image_size
=
[
64
,
64
]
anchor_size
=
2
**
(
level
+
1
)
stride
=
2
**
level
anchor_gen
=
anchor_generator
.
_SingleAnchorGenerator
(
anchor_size
=
anchor_size
,
scales
=
[
1.
],
aspect_ratios
=
aspect_ratios
,
stride
=
stride
,
clip_boxes
=
False
)
anchors
=
anchor_gen
(
image_size
).
numpy
()
self
.
assertAllClose
(
expected_boxes
,
anchors
)
@
parameterized
.
parameters
(
# Single scale anchor.
(
5
,
[
1.0
],
[[[
0.
,
0.
,
48.
,
48.
],
[
0.
,
16.
,
48.
,
64.
]],
[[
16.
,
0.
,
64.
,
48.
],
[
16.
,
16.
,
64.
,
64.
]]]),
# # Multi aspect ratio anchor.
(
6
,
[
1.0
,
4.0
,
0.25
],
[[[
0.
,
0.
,
64.
,
64.
,
0.
,
0.
,
64.
,
64.
,
0.
,
0.
,
64.
,
64.
]]]),
)
def
testAnchorGenerationClipped
(
self
,
level
,
aspect_ratios
,
expected_boxes
):
image_size
=
[
64
,
64
]
anchor_size
=
2
**
(
level
+
1
)
stride
=
2
**
level
anchor_gen
=
anchor_generator
.
_SingleAnchorGenerator
(
anchor_size
=
anchor_size
,
scales
=
[
1.
],
aspect_ratios
=
aspect_ratios
,
stride
=
stride
,
clip_boxes
=
True
)
anchors
=
anchor_gen
(
image_size
).
numpy
()
self
.
assertAllClose
(
expected_boxes
,
anchors
)
@
combinations
.
generate
(
combinations
.
combine
(
distribution
=
strategy_combinations
.
all_strategies
))
def
testAnchorGenerationDistributed
(
self
,
distribution
):
image_size
=
[
64
,
64
]
anchor_size
=
64
stride
=
32
aspect_ratios
=
[
1.0
]
with
distribution
.
scope
():
anchor_gen
=
anchor_generator
.
_SingleAnchorGenerator
(
anchor_size
=
anchor_size
,
scales
=
[
1.
],
aspect_ratios
=
aspect_ratios
,
stride
=
stride
,
clip_boxes
=
False
)
anchors
=
anchor_gen
(
image_size
).
numpy
()
expected_boxes
=
[[[
-
16.
,
-
16.
,
48.
,
48.
],
[
-
16.
,
16.
,
48.
,
80.
]],
[[
16.
,
-
16.
,
80.
,
48.
],
[
16.
,
16.
,
80.
,
80.
]]]
self
.
assertAllClose
(
expected_boxes
,
anchors
)
class
MultiScaleAnchorGeneratorTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
# Multi scale anchor.
(
5
,
6
,
[[
1.0
],
[
1.0
]],
[[
-
16
,
-
16
,
48
,
48
],
[
-
16
,
16
,
48
,
80
],
[
16
,
-
16
,
80
,
48
],
[
16
,
16
,
80
,
80
],
[
-
32
,
-
32
,
96
,
96
]]),)
def
testAnchorGeneration
(
self
,
min_level
,
max_level
,
aspect_ratios
,
expected_boxes
):
image_size
=
[
64
,
64
]
levels
=
range
(
min_level
,
max_level
+
1
)
anchor_sizes
=
[
2
**
(
level
+
1
)
for
level
in
levels
]
strides
=
[
2
**
level
for
level
in
levels
]
anchor_gen
=
anchor_generator
.
AnchorGenerator
(
anchor_sizes
=
anchor_sizes
,
scales
=
[
1.
],
aspect_ratios
=
aspect_ratios
,
strides
=
strides
)
anchors
=
anchor_gen
(
image_size
)
anchors
=
[
tf
.
reshape
(
anchor
,
[
-
1
,
4
])
for
anchor
in
anchors
]
anchors
=
tf
.
concat
(
anchors
,
axis
=
0
).
numpy
()
self
.
assertAllClose
(
expected_boxes
,
anchors
)
@
parameterized
.
parameters
(
# Multi scale anchor.
(
5
,
6
,
[[
1.0
],
[
1.0
]],
[[
-
16
,
-
16
,
48
,
48
],
[
-
16
,
16
,
48
,
80
],
[
16
,
-
16
,
80
,
48
],
[
16
,
16
,
80
,
80
],
[
-
32
,
-
32
,
96
,
96
]]),)
def
testAnchorGenerationClipped
(
self
,
min_level
,
max_level
,
aspect_ratios
,
expected_boxes
):
image_size
=
[
64
,
64
]
levels
=
range
(
min_level
,
max_level
+
1
)
anchor_sizes
=
[
2
**
(
level
+
1
)
for
level
in
levels
]
strides
=
[
2
**
level
for
level
in
levels
]
anchor_gen
=
anchor_generator
.
AnchorGenerator
(
anchor_sizes
=
anchor_sizes
,
scales
=
[
1.
],
aspect_ratios
=
aspect_ratios
,
strides
=
strides
,
clip_boxes
=
False
)
anchors
=
anchor_gen
(
image_size
)
anchors
=
[
tf
.
reshape
(
anchor
,
[
-
1
,
4
])
for
anchor
in
anchors
]
anchors
=
tf
.
concat
(
anchors
,
axis
=
0
).
numpy
()
self
.
assertAllClose
(
expected_boxes
,
anchors
)
@
parameterized
.
parameters
(
# Multi scale anchor.
(
5
,
6
,
[
1.0
],
{
5
:
[[[
-
16.
,
-
16.
,
48.
,
48.
],
[
-
16.
,
16.
,
48.
,
80.
]],
[[
16.
,
-
16.
,
80.
,
48.
],
[
16.
,
16.
,
80.
,
80.
]]],
6
:
[[[
-
32
,
-
32
,
96
,
96
]]]
}),)
def
testAnchorGenerationDict
(
self
,
min_level
,
max_level
,
aspect_ratios
,
expected_boxes
):
image_size
=
[
64
,
64
]
levels
=
range
(
min_level
,
max_level
+
1
)
anchor_sizes
=
dict
((
level
,
2
**
(
level
+
1
))
for
level
in
levels
)
strides
=
dict
((
level
,
2
**
level
)
for
level
in
levels
)
anchor_gen
=
anchor_generator
.
AnchorGenerator
(
anchor_sizes
=
anchor_sizes
,
scales
=
[
1.
],
aspect_ratios
=
aspect_ratios
,
strides
=
strides
,
clip_boxes
=
False
)
anchors
=
anchor_gen
(
image_size
)
for
k
in
expected_boxes
.
keys
():
self
.
assertAllClose
(
expected_boxes
[
k
],
anchors
[
k
].
numpy
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/beta/ops/mask_ops.py
0 → 100644
View file @
cc748b2a
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utility functions for segmentations."""
import
math
# Import libraries
from
cvx2
import
latest
as
cv2
import
numpy
as
np
def
paste_instance_masks
(
masks
,
detected_boxes
,
image_height
,
image_width
):
"""Paste instance masks to generate the image segmentation results.
Args:
masks: a numpy array of shape [N, mask_height, mask_width] representing the
instance masks w.r.t. the `detected_boxes`.
detected_boxes: a numpy array of shape [N, 4] representing the reference
bounding boxes.
image_height: an integer representing the height of the image.
image_width: an integer representing the width of the image.
Returns:
segms: a numpy array of shape [N, image_height, image_width] representing
the instance masks *pasted* on the image canvas.
"""
def
expand_boxes
(
boxes
,
scale
):
"""Expands an array of boxes by a given scale."""
# Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/utils/boxes.py#L227 # pylint: disable=line-too-long
# The `boxes` in the reference implementation is in [x1, y1, x2, y2] form,
# whereas `boxes` here is in [x1, y1, w, h] form
w_half
=
boxes
[:,
2
]
*
.
5
h_half
=
boxes
[:,
3
]
*
.
5
x_c
=
boxes
[:,
0
]
+
w_half
y_c
=
boxes
[:,
1
]
+
h_half
w_half
*=
scale
h_half
*=
scale
boxes_exp
=
np
.
zeros
(
boxes
.
shape
)
boxes_exp
[:,
0
]
=
x_c
-
w_half
boxes_exp
[:,
2
]
=
x_c
+
w_half
boxes_exp
[:,
1
]
=
y_c
-
h_half
boxes_exp
[:,
3
]
=
y_c
+
h_half
return
boxes_exp
# Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/core/test.py#L812 # pylint: disable=line-too-long
# To work around an issue with cv2.resize (it seems to automatically pad
# with repeated border values), we manually zero-pad the masks by 1 pixel
# prior to resizing back to the original image resolution. This prevents
# "top hat" artifacts. We therefore need to expand the reference boxes by an
# appropriate factor.
_
,
mask_height
,
mask_width
=
masks
.
shape
scale
=
max
((
mask_width
+
2.0
)
/
mask_width
,
(
mask_height
+
2.0
)
/
mask_height
)
ref_boxes
=
expand_boxes
(
detected_boxes
,
scale
)
ref_boxes
=
ref_boxes
.
astype
(
np
.
int32
)
padded_mask
=
np
.
zeros
((
mask_height
+
2
,
mask_width
+
2
),
dtype
=
np
.
float32
)
segms
=
[]
for
mask_ind
,
mask
in
enumerate
(
masks
):
im_mask
=
np
.
zeros
((
image_height
,
image_width
),
dtype
=
np
.
uint8
)
# Process mask inside bounding boxes.
padded_mask
[
1
:
-
1
,
1
:
-
1
]
=
mask
[:,
:]
ref_box
=
ref_boxes
[
mask_ind
,
:]
w
=
ref_box
[
2
]
-
ref_box
[
0
]
+
1
h
=
ref_box
[
3
]
-
ref_box
[
1
]
+
1
w
=
np
.
maximum
(
w
,
1
)
h
=
np
.
maximum
(
h
,
1
)
mask
=
cv2
.
resize
(
padded_mask
,
(
w
,
h
))
mask
=
np
.
array
(
mask
>
0.5
,
dtype
=
np
.
uint8
)
x_0
=
min
(
max
(
ref_box
[
0
],
0
),
image_width
)
x_1
=
min
(
max
(
ref_box
[
2
]
+
1
,
0
),
image_width
)
y_0
=
min
(
max
(
ref_box
[
1
],
0
),
image_height
)
y_1
=
min
(
max
(
ref_box
[
3
]
+
1
,
0
),
image_height
)
im_mask
[
y_0
:
y_1
,
x_0
:
x_1
]
=
mask
[
(
y_0
-
ref_box
[
1
]):(
y_1
-
ref_box
[
1
]),
(
x_0
-
ref_box
[
0
]):(
x_1
-
ref_box
[
0
])
]
segms
.
append
(
im_mask
)
segms
=
np
.
array
(
segms
)
assert
masks
.
shape
[
0
]
==
segms
.
shape
[
0
]
return
segms
def
paste_instance_masks_v2
(
masks
,
detected_boxes
,
image_height
,
image_width
):
"""Paste instance masks to generate the image segmentation (v2).
Args:
masks: a numpy array of shape [N, mask_height, mask_width] representing the
instance masks w.r.t. the `detected_boxes`.
detected_boxes: a numpy array of shape [N, 4] representing the reference
bounding boxes.
image_height: an integer representing the height of the image.
image_width: an integer representing the width of the image.
Returns:
segms: a numpy array of shape [N, image_height, image_width] representing
the instance masks *pasted* on the image canvas.
"""
_
,
mask_height
,
mask_width
=
masks
.
shape
segms
=
[]
for
i
,
mask
in
enumerate
(
masks
):
box
=
detected_boxes
[
i
,
:]
xmin
=
box
[
0
]
ymin
=
box
[
1
]
xmax
=
xmin
+
box
[
2
]
ymax
=
ymin
+
box
[
3
]
# Sample points of the cropped mask w.r.t. the image grid.
# Note that these coordinates may fall beyond the image.
# Pixel clipping will happen after warping.
xmin_int
=
int
(
math
.
floor
(
xmin
))
xmax_int
=
int
(
math
.
ceil
(
xmax
))
ymin_int
=
int
(
math
.
floor
(
ymin
))
ymax_int
=
int
(
math
.
ceil
(
ymax
))
alpha
=
box
[
2
]
/
(
1.0
*
mask_width
)
beta
=
box
[
3
]
/
(
1.0
*
mask_height
)
# pylint: disable=invalid-name
# Transformation from mask pixel indices to image coordinate.
M_mask_to_image
=
np
.
array
(
[[
alpha
,
0
,
xmin
],
[
0
,
beta
,
ymin
],
[
0
,
0
,
1
]],
dtype
=
np
.
float32
)
# Transformation from image to cropped mask coordinate.
M_image_to_crop
=
np
.
array
(
[[
1
,
0
,
-
xmin_int
],
[
0
,
1
,
-
ymin_int
],
[
0
,
0
,
1
]],
dtype
=
np
.
float32
)
M
=
np
.
dot
(
M_image_to_crop
,
M_mask_to_image
)
# Compensate the half pixel offset that OpenCV has in the
# warpPerspective implementation: the top-left pixel is sampled
# at (0,0), but we want it to be at (0.5, 0.5).
M
=
np
.
dot
(
np
.
dot
(
np
.
array
([[
1
,
0
,
-
0.5
],
[
0
,
1
,
-
0.5
],
[
0
,
0
,
1
]],
np
.
float32
),
M
),
np
.
array
([[
1
,
0
,
0.5
],
[
0
,
1
,
0.5
],
[
0
,
0
,
1
]],
np
.
float32
))
# pylint: enable=invalid-name
cropped_mask
=
cv2
.
warpPerspective
(
mask
.
astype
(
np
.
float32
),
M
,
(
xmax_int
-
xmin_int
,
ymax_int
-
ymin_int
))
cropped_mask
=
np
.
array
(
cropped_mask
>
0.5
,
dtype
=
np
.
uint8
)
img_mask
=
np
.
zeros
((
image_height
,
image_width
))
x0
=
max
(
min
(
xmin_int
,
image_width
),
0
)
x1
=
max
(
min
(
xmax_int
,
image_width
),
0
)
y0
=
max
(
min
(
ymin_int
,
image_height
),
0
)
y1
=
max
(
min
(
ymax_int
,
image_height
),
0
)
img_mask
[
y0
:
y1
,
x0
:
x1
]
=
cropped_mask
[
(
y0
-
ymin_int
):(
y1
-
ymin_int
),
(
x0
-
xmin_int
):(
x1
-
xmin_int
)]
segms
.
append
(
img_mask
)
segms
=
np
.
array
(
segms
)
return
segms
official/vision/beta/ops/mask_ops_test.py
0 → 100644
View file @
cc748b2a
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for mask_ops.py."""
# Import libraries
import
numpy
as
np
import
tensorflow
as
tf
from
official.vision.beta.ops
import
mask_ops
class
MaskUtilsTest
(
tf
.
test
.
TestCase
):
def
testPasteInstanceMasks
(
self
):
image_height
=
10
image_width
=
10
mask_height
=
6
mask_width
=
6
masks
=
np
.
random
.
randint
(
0
,
255
,
(
1
,
mask_height
,
mask_width
))
detected_boxes
=
np
.
array
([[
0.0
,
2.0
,
mask_width
,
mask_height
]])
_
=
mask_ops
.
paste_instance_masks
(
masks
,
detected_boxes
,
image_height
,
image_width
)
def
testPasteInstanceMasksV2
(
self
):
image_height
=
10
image_width
=
10
mask_height
=
6
mask_width
=
6
masks
=
np
.
random
.
randint
(
0
,
255
,
(
1
,
mask_height
,
mask_width
))
detected_boxes
=
np
.
array
([[
0.0
,
2.0
,
mask_width
,
mask_height
]])
image_masks
=
mask_ops
.
paste_instance_masks_v2
(
masks
,
detected_boxes
,
image_height
,
image_width
)
self
.
assertNDArrayNear
(
image_masks
[:,
2
:
8
,
0
:
6
],
np
.
array
(
masks
>
0.5
,
dtype
=
np
.
uint8
),
1e-5
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/beta/ops/nms.py
0 → 100644
View file @
cc748b2a
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tensorflow implementation of non max suppression."""
# Import libraries
import
tensorflow
as
tf
from
official.vision.beta.ops
import
box_ops
NMS_TILE_SIZE
=
512
def
_self_suppression
(
iou
,
_
,
iou_sum
):
batch_size
=
tf
.
shape
(
iou
)[
0
]
can_suppress_others
=
tf
.
cast
(
tf
.
reshape
(
tf
.
reduce_max
(
iou
,
1
)
<=
0.5
,
[
batch_size
,
-
1
,
1
]),
iou
.
dtype
)
iou_suppressed
=
tf
.
reshape
(
tf
.
cast
(
tf
.
reduce_max
(
can_suppress_others
*
iou
,
1
)
<=
0.5
,
iou
.
dtype
),
[
batch_size
,
-
1
,
1
])
*
iou
iou_sum_new
=
tf
.
reduce_sum
(
iou_suppressed
,
[
1
,
2
])
return
[
iou_suppressed
,
tf
.
reduce_any
(
iou_sum
-
iou_sum_new
>
0.5
),
iou_sum_new
]
def
_cross_suppression
(
boxes
,
box_slice
,
iou_threshold
,
inner_idx
):
batch_size
=
tf
.
shape
(
boxes
)[
0
]
new_slice
=
tf
.
slice
(
boxes
,
[
0
,
inner_idx
*
NMS_TILE_SIZE
,
0
],
[
batch_size
,
NMS_TILE_SIZE
,
4
])
iou
=
box_ops
.
bbox_overlap
(
new_slice
,
box_slice
)
ret_slice
=
tf
.
expand_dims
(
tf
.
cast
(
tf
.
reduce_all
(
iou
<
iou_threshold
,
[
1
]),
box_slice
.
dtype
),
2
)
*
box_slice
return
boxes
,
ret_slice
,
iou_threshold
,
inner_idx
+
1
def
_suppression_loop_body
(
boxes
,
iou_threshold
,
output_size
,
idx
):
"""Process boxes in the range [idx*NMS_TILE_SIZE, (idx+1)*NMS_TILE_SIZE).
Args:
boxes: a tensor with a shape of [batch_size, anchors, 4].
iou_threshold: a float representing the threshold for deciding whether boxes
overlap too much with respect to IOU.
output_size: an int32 tensor of size [batch_size]. Representing the number
of selected boxes for each batch.
idx: an integer scalar representing induction variable.
Returns:
boxes: updated boxes.
iou_threshold: pass down iou_threshold to the next iteration.
output_size: the updated output_size.
idx: the updated induction variable.
"""
num_tiles
=
tf
.
shape
(
boxes
)[
1
]
//
NMS_TILE_SIZE
batch_size
=
tf
.
shape
(
boxes
)[
0
]
# Iterates over tiles that can possibly suppress the current tile.
box_slice
=
tf
.
slice
(
boxes
,
[
0
,
idx
*
NMS_TILE_SIZE
,
0
],
[
batch_size
,
NMS_TILE_SIZE
,
4
])
_
,
box_slice
,
_
,
_
=
tf
.
while_loop
(
lambda
_boxes
,
_box_slice
,
_threshold
,
inner_idx
:
inner_idx
<
idx
,
_cross_suppression
,
[
boxes
,
box_slice
,
iou_threshold
,
tf
.
constant
(
0
)])
# Iterates over the current tile to compute self-suppression.
iou
=
box_ops
.
bbox_overlap
(
box_slice
,
box_slice
)
mask
=
tf
.
expand_dims
(
tf
.
reshape
(
tf
.
range
(
NMS_TILE_SIZE
),
[
1
,
-
1
])
>
tf
.
reshape
(
tf
.
range
(
NMS_TILE_SIZE
),
[
-
1
,
1
]),
0
)
iou
*=
tf
.
cast
(
tf
.
logical_and
(
mask
,
iou
>=
iou_threshold
),
iou
.
dtype
)
suppressed_iou
,
_
,
_
=
tf
.
while_loop
(
lambda
_iou
,
loop_condition
,
_iou_sum
:
loop_condition
,
_self_suppression
,
[
iou
,
tf
.
constant
(
True
),
tf
.
reduce_sum
(
iou
,
[
1
,
2
])])
suppressed_box
=
tf
.
reduce_sum
(
suppressed_iou
,
1
)
>
0
box_slice
*=
tf
.
expand_dims
(
1.0
-
tf
.
cast
(
suppressed_box
,
box_slice
.
dtype
),
2
)
# Uses box_slice to update the input boxes.
mask
=
tf
.
reshape
(
tf
.
cast
(
tf
.
equal
(
tf
.
range
(
num_tiles
),
idx
),
boxes
.
dtype
),
[
1
,
-
1
,
1
,
1
])
boxes
=
tf
.
tile
(
tf
.
expand_dims
(
box_slice
,
[
1
]),
[
1
,
num_tiles
,
1
,
1
])
*
mask
+
tf
.
reshape
(
boxes
,
[
batch_size
,
num_tiles
,
NMS_TILE_SIZE
,
4
])
*
(
1
-
mask
)
boxes
=
tf
.
reshape
(
boxes
,
[
batch_size
,
-
1
,
4
])
# Updates output_size.
output_size
+=
tf
.
reduce_sum
(
tf
.
cast
(
tf
.
reduce_any
(
box_slice
>
0
,
[
2
]),
tf
.
int32
),
[
1
])
return
boxes
,
iou_threshold
,
output_size
,
idx
+
1
def
sorted_non_max_suppression_padded
(
scores
,
boxes
,
max_output_size
,
iou_threshold
):
"""A wrapper that handles non-maximum suppression.
Assumption:
* The boxes are sorted by scores unless the box is a dot (all coordinates
are zero).
* Boxes with higher scores can be used to suppress boxes with lower scores.
The overal design of the algorithm is to handle boxes tile-by-tile:
boxes = boxes.pad_to_multiply_of(tile_size)
num_tiles = len(boxes) // tile_size
output_boxes = []
for i in range(num_tiles):
box_tile = boxes[i*tile_size : (i+1)*tile_size]
for j in range(i - 1):
suppressing_tile = boxes[j*tile_size : (j+1)*tile_size]
iou = bbox_overlap(box_tile, suppressing_tile)
# if the box is suppressed in iou, clear it to a dot
box_tile *= _update_boxes(iou)
# Iteratively handle the diagnal tile.
iou = _box_overlap(box_tile, box_tile)
iou_changed = True
while iou_changed:
# boxes that are not suppressed by anything else
suppressing_boxes = _get_suppressing_boxes(iou)
# boxes that are suppressed by suppressing_boxes
suppressed_boxes = _get_suppressed_boxes(iou, suppressing_boxes)
# clear iou to 0 for boxes that are suppressed, as they cannot be used
# to suppress other boxes any more
new_iou = _clear_iou(iou, suppressed_boxes)
iou_changed = (new_iou != iou)
iou = new_iou
# remaining boxes that can still suppress others, are selected boxes.
output_boxes.append(_get_suppressing_boxes(iou))
if len(output_boxes) >= max_output_size:
break
Args:
scores: a tensor with a shape of [batch_size, anchors].
boxes: a tensor with a shape of [batch_size, anchors, 4].
max_output_size: a scalar integer `Tensor` representing the maximum number
of boxes to be selected by non max suppression.
iou_threshold: a float representing the threshold for deciding whether boxes
overlap too much with respect to IOU.
Returns:
nms_scores: a tensor with a shape of [batch_size, anchors]. It has same
dtype as input scores.
nms_proposals: a tensor with a shape of [batch_size, anchors, 4]. It has
same dtype as input boxes.
"""
batch_size
=
tf
.
shape
(
boxes
)[
0
]
num_boxes
=
tf
.
shape
(
boxes
)[
1
]
pad
=
tf
.
cast
(
tf
.
math
.
ceil
(
tf
.
cast
(
num_boxes
,
tf
.
float32
)
/
NMS_TILE_SIZE
),
tf
.
int32
)
*
NMS_TILE_SIZE
-
num_boxes
boxes
=
tf
.
pad
(
tf
.
cast
(
boxes
,
tf
.
float32
),
[[
0
,
0
],
[
0
,
pad
],
[
0
,
0
]])
scores
=
tf
.
pad
(
tf
.
cast
(
scores
,
tf
.
float32
),
[[
0
,
0
],
[
0
,
pad
]],
constant_values
=-
1
)
num_boxes
+=
pad
def
_loop_cond
(
unused_boxes
,
unused_threshold
,
output_size
,
idx
):
return
tf
.
logical_and
(
tf
.
reduce_min
(
output_size
)
<
max_output_size
,
idx
<
num_boxes
//
NMS_TILE_SIZE
)
selected_boxes
,
_
,
output_size
,
_
=
tf
.
while_loop
(
_loop_cond
,
_suppression_loop_body
,
[
boxes
,
iou_threshold
,
tf
.
zeros
([
batch_size
],
tf
.
int32
),
tf
.
constant
(
0
)
])
idx
=
num_boxes
-
tf
.
cast
(
tf
.
nn
.
top_k
(
tf
.
cast
(
tf
.
reduce_any
(
selected_boxes
>
0
,
[
2
]),
tf
.
int32
)
*
tf
.
expand_dims
(
tf
.
range
(
num_boxes
,
0
,
-
1
),
0
),
max_output_size
)[
0
],
tf
.
int32
)
idx
=
tf
.
minimum
(
idx
,
num_boxes
-
1
)
idx
=
tf
.
reshape
(
idx
+
tf
.
reshape
(
tf
.
range
(
batch_size
)
*
num_boxes
,
[
-
1
,
1
]),
[
-
1
])
boxes
=
tf
.
reshape
(
tf
.
gather
(
tf
.
reshape
(
boxes
,
[
-
1
,
4
]),
idx
),
[
batch_size
,
max_output_size
,
4
])
boxes
=
boxes
*
tf
.
cast
(
tf
.
reshape
(
tf
.
range
(
max_output_size
),
[
1
,
-
1
,
1
])
<
tf
.
reshape
(
output_size
,
[
-
1
,
1
,
1
]),
boxes
.
dtype
)
scores
=
tf
.
reshape
(
tf
.
gather
(
tf
.
reshape
(
scores
,
[
-
1
,
1
]),
idx
),
[
batch_size
,
max_output_size
])
scores
=
scores
*
tf
.
cast
(
tf
.
reshape
(
tf
.
range
(
max_output_size
),
[
1
,
-
1
])
<
tf
.
reshape
(
output_size
,
[
-
1
,
1
]),
scores
.
dtype
)
return
scores
,
boxes
official/vision/beta/ops/nms_test.py
0 → 100644
View file @
cc748b2a
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for nms.py."""
# Import libraries
import
numpy
as
np
import
tensorflow
as
tf
from
official.vision.beta.ops
import
nms
class
SortedNonMaxSuppressionTest
(
tf
.
test
.
TestCase
):
def
setUp
(
self
):
super
(
SortedNonMaxSuppressionTest
,
self
).
setUp
()
self
.
boxes_data
=
[[[
0
,
0
,
1
,
1
],
[
0
,
0.2
,
1
,
1.2
],
[
0
,
0.4
,
1
,
1.4
],
[
0
,
0.6
,
1
,
1.6
],
[
0
,
0.8
,
1
,
1.8
],
[
0
,
2
,
1
,
2
]],
[[
0
,
2
,
1
,
2
],
[
0
,
0.8
,
1
,
1.8
],
[
0
,
0.6
,
1
,
1.6
],
[
0
,
0.4
,
1
,
1.4
],
[
0
,
0.2
,
1
,
1.2
],
[
0
,
0
,
1
,
1
]]]
self
.
scores_data
=
[[
0.9
,
0.7
,
0.6
,
0.5
,
0.4
,
0.3
],
[
0.8
,
0.7
,
0.6
,
0.5
,
0.4
,
0.3
]]
self
.
max_output_size
=
6
self
.
iou_threshold
=
0.5
def
testSortedNonMaxSuppressionOnTPU
(
self
):
boxes_np
=
np
.
array
(
self
.
boxes_data
,
dtype
=
np
.
float32
)
scores_np
=
np
.
array
(
self
.
scores_data
,
dtype
=
np
.
float32
)
iou_threshold_np
=
np
.
array
(
self
.
iou_threshold
,
dtype
=
np
.
float32
)
boxes
=
tf
.
constant
(
boxes_np
)
scores
=
tf
.
constant
(
scores_np
)
iou_threshold
=
tf
.
constant
(
iou_threshold_np
)
# Runs on TPU.
strategy
=
tf
.
distribute
.
experimental
.
TPUStrategy
()
with
strategy
.
scope
():
scores_tpu
,
boxes_tpu
=
nms
.
sorted_non_max_suppression_padded
(
boxes
=
boxes
,
scores
=
scores
,
max_output_size
=
self
.
max_output_size
,
iou_threshold
=
iou_threshold
)
self
.
assertEqual
(
boxes_tpu
.
numpy
().
shape
,
(
2
,
self
.
max_output_size
,
4
))
self
.
assertAllClose
(
scores_tpu
.
numpy
(),
[[
0.9
,
0.6
,
0.4
,
0.3
,
0.
,
0.
],
[
0.8
,
0.7
,
0.5
,
0.3
,
0.
,
0.
]])
def
testSortedNonMaxSuppressionOnCPU
(
self
):
boxes_np
=
np
.
array
(
self
.
boxes_data
,
dtype
=
np
.
float32
)
scores_np
=
np
.
array
(
self
.
scores_data
,
dtype
=
np
.
float32
)
iou_threshold_np
=
np
.
array
(
self
.
iou_threshold
,
dtype
=
np
.
float32
)
boxes
=
tf
.
constant
(
boxes_np
)
scores
=
tf
.
constant
(
scores_np
)
iou_threshold
=
tf
.
constant
(
iou_threshold_np
)
# Runs on CPU.
scores_cpu
,
boxes_cpu
=
nms
.
sorted_non_max_suppression_padded
(
boxes
=
boxes
,
scores
=
scores
,
max_output_size
=
self
.
max_output_size
,
iou_threshold
=
iou_threshold
)
self
.
assertEqual
(
boxes_cpu
.
numpy
().
shape
,
(
2
,
self
.
max_output_size
,
4
))
self
.
assertAllClose
(
scores_cpu
.
numpy
(),
[[
0.9
,
0.6
,
0.4
,
0.3
,
0.
,
0.
],
[
0.8
,
0.7
,
0.5
,
0.3
,
0.
,
0.
]])
def
testSortedNonMaxSuppressionOnTPUSpeed
(
self
):
boxes_np
=
np
.
random
.
rand
(
2
,
12000
,
4
).
astype
(
np
.
float32
)
scores_np
=
np
.
random
.
rand
(
2
,
12000
).
astype
(
np
.
float32
)
iou_threshold_np
=
np
.
array
(
0.7
,
dtype
=
np
.
float32
)
boxes
=
tf
.
constant
(
boxes_np
)
scores
=
tf
.
constant
(
scores_np
)
iou_threshold
=
tf
.
constant
(
iou_threshold_np
)
# Runs on TPU.
strategy
=
tf
.
distribute
.
experimental
.
TPUStrategy
()
with
strategy
.
scope
():
scores_tpu
,
boxes_tpu
=
nms
.
sorted_non_max_suppression_padded
(
boxes
=
boxes
,
scores
=
scores
,
max_output_size
=
2000
,
iou_threshold
=
iou_threshold
)
self
.
assertEqual
(
scores_tpu
.
numpy
().
shape
,
(
2
,
2000
))
self
.
assertEqual
(
boxes_tpu
.
numpy
().
shape
,
(
2
,
2000
,
4
))
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/beta/ops/preprocess_ops.py
0 → 100644
View file @
cc748b2a
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Preprocessing ops."""
import
math
from
six.moves
import
range
import
tensorflow
as
tf
from
official.vision.beta.ops
import
box_ops
CENTER_CROP_FRACTION
=
0.875
def
clip_or_pad_to_fixed_size
(
input_tensor
,
size
,
constant_values
=
0
):
"""Pads data to a fixed length at the first dimension.
Args:
input_tensor: `Tensor` with any dimension.
size: `int` number for the first dimension of output Tensor.
constant_values: `int` value assigned to the paddings.
Returns:
`Tensor` with the first dimension padded to `size`.
"""
input_shape
=
input_tensor
.
get_shape
().
as_list
()
padding_shape
=
[]
# Computes the padding length on the first dimension, clip input tensor if it
# is longer than `size`.
input_length
=
tf
.
shape
(
input_tensor
)[
0
]
input_length
=
tf
.
clip_by_value
(
input_length
,
0
,
size
)
input_tensor
=
input_tensor
[:
input_length
]
padding_length
=
tf
.
maximum
(
0
,
size
-
input_length
)
padding_shape
.
append
(
padding_length
)
# Copies shapes of the rest of input shape dimensions.
for
i
in
range
(
1
,
len
(
input_shape
)):
padding_shape
.
append
(
tf
.
shape
(
input_tensor
)[
i
])
# Pads input tensor to the fixed first dimension.
paddings
=
tf
.
cast
(
constant_values
*
tf
.
ones
(
padding_shape
),
input_tensor
.
dtype
)
padded_tensor
=
tf
.
concat
([
input_tensor
,
paddings
],
axis
=
0
)
output_shape
=
input_shape
output_shape
[
0
]
=
size
padded_tensor
.
set_shape
(
output_shape
)
return
padded_tensor
def
normalize_image
(
image
,
offset
=
(
0.485
,
0.456
,
0.406
),
scale
=
(
0.229
,
0.224
,
0.225
)):
"""Normalizes the image to zero mean and unit variance."""
with
tf
.
name_scope
(
'normalize_image'
):
image
=
tf
.
image
.
convert_image_dtype
(
image
,
dtype
=
tf
.
float32
)
offset
=
tf
.
constant
(
offset
)
offset
=
tf
.
expand_dims
(
offset
,
axis
=
0
)
offset
=
tf
.
expand_dims
(
offset
,
axis
=
0
)
image
-=
offset
scale
=
tf
.
constant
(
scale
)
scale
=
tf
.
expand_dims
(
scale
,
axis
=
0
)
scale
=
tf
.
expand_dims
(
scale
,
axis
=
0
)
image
/=
scale
return
image
def
compute_padded_size
(
desired_size
,
stride
):
"""Compute the padded size given the desired size and the stride.
The padded size will be the smallest rectangle, such that each dimension is
the smallest multiple of the stride which is larger than the desired
dimension. For example, if desired_size = (100, 200) and stride = 32,
the output padded_size = (128, 224).
Args:
desired_size: a `Tensor` or `int` list/tuple of two elements representing
[height, width] of the target output image size.
stride: an integer, the stride of the backbone network.
Returns:
padded_size: a `Tensor` or `int` list/tuple of two elements representing
[height, width] of the padded output image size.
"""
if
isinstance
(
desired_size
,
list
)
or
isinstance
(
desired_size
,
tuple
):
padded_size
=
[
int
(
math
.
ceil
(
d
*
1.0
/
stride
)
*
stride
)
for
d
in
desired_size
]
else
:
padded_size
=
tf
.
cast
(
tf
.
math
.
ceil
(
tf
.
cast
(
desired_size
,
dtype
=
tf
.
float32
)
/
stride
)
*
stride
,
tf
.
int32
)
return
padded_size
def
resize_and_crop_image
(
image
,
desired_size
,
padded_size
,
aug_scale_min
=
1.0
,
aug_scale_max
=
1.0
,
seed
=
1
,
method
=
tf
.
image
.
ResizeMethod
.
BILINEAR
):
"""Resizes the input image to output size (RetinaNet style).
Resize and pad images given the desired output size of the image and
stride size.
Here are the preprocessing steps.
1. For a given image, keep its aspect ratio and rescale the image to make it
the largest rectangle to be bounded by the rectangle specified by the
`desired_size`.
2. Pad the rescaled image to the padded_size.
Args:
image: a `Tensor` of shape [height, width, 3] representing an image.
desired_size: a `Tensor` or `int` list/tuple of two elements representing
[height, width] of the desired actual output image size.
padded_size: a `Tensor` or `int` list/tuple of two elements representing
[height, width] of the padded output image size. Padding will be applied
after scaling the image to the desired_size.
aug_scale_min: a `float` with range between [0, 1.0] representing minimum
random scale applied to desired_size for training scale jittering.
aug_scale_max: a `float` with range between [1.0, inf] representing maximum
random scale applied to desired_size for training scale jittering.
seed: seed for random scale jittering.
method: function to resize input image to scaled image.
Returns:
output_image: `Tensor` of shape [height, width, 3] where [height, width]
equals to `output_size`.
image_info: a 2D `Tensor` that encodes the information of the image and the
applied preprocessing. It is in the format of
[[original_height, original_width], [desired_height, desired_width],
[y_scale, x_scale], [y_offset, x_offset]], where [desired_height,
desired_width] is the actual scaled image size, and [y_scale, x_scale] is
the scaling factor, which is the ratio of
scaled dimension / original dimension.
"""
with
tf
.
name_scope
(
'resize_and_crop_image'
):
image_size
=
tf
.
cast
(
tf
.
shape
(
image
)[
0
:
2
],
tf
.
float32
)
random_jittering
=
(
aug_scale_min
!=
1.0
or
aug_scale_max
!=
1.0
)
if
random_jittering
:
random_scale
=
tf
.
random
.
uniform
(
[],
aug_scale_min
,
aug_scale_max
,
seed
=
seed
)
scaled_size
=
tf
.
round
(
random_scale
*
desired_size
)
else
:
scaled_size
=
desired_size
scale
=
tf
.
minimum
(
scaled_size
[
0
]
/
image_size
[
0
],
scaled_size
[
1
]
/
image_size
[
1
])
scaled_size
=
tf
.
round
(
image_size
*
scale
)
# Computes 2D image_scale.
image_scale
=
scaled_size
/
image_size
# Selects non-zero random offset (x, y) if scaled image is larger than
# desired_size.
if
random_jittering
:
max_offset
=
scaled_size
-
desired_size
max_offset
=
tf
.
where
(
tf
.
less
(
max_offset
,
0
),
tf
.
zeros_like
(
max_offset
),
max_offset
)
offset
=
max_offset
*
tf
.
random
.
uniform
([
2
,],
0
,
1
,
seed
=
seed
)
offset
=
tf
.
cast
(
offset
,
tf
.
int32
)
else
:
offset
=
tf
.
zeros
((
2
,),
tf
.
int32
)
scaled_image
=
tf
.
image
.
resize
(
image
,
tf
.
cast
(
scaled_size
,
tf
.
int32
),
method
=
method
)
if
random_jittering
:
scaled_image
=
scaled_image
[
offset
[
0
]:
offset
[
0
]
+
desired_size
[
0
],
offset
[
1
]:
offset
[
1
]
+
desired_size
[
1
],
:]
output_image
=
tf
.
image
.
pad_to_bounding_box
(
scaled_image
,
0
,
0
,
padded_size
[
0
],
padded_size
[
1
])
image_info
=
tf
.
stack
([
image_size
,
tf
.
constant
(
desired_size
,
dtype
=
tf
.
float32
),
image_scale
,
tf
.
cast
(
offset
,
tf
.
float32
)])
return
output_image
,
image_info
def
resize_and_crop_image_v2
(
image
,
short_side
,
long_side
,
padded_size
,
aug_scale_min
=
1.0
,
aug_scale_max
=
1.0
,
seed
=
1
,
method
=
tf
.
image
.
ResizeMethod
.
BILINEAR
):
"""Resizes the input image to output size (Faster R-CNN style).
Resize and pad images given the specified short / long side length and the
stride size.
Here are the preprocessing steps.
1. For a given image, keep its aspect ratio and first try to rescale the short
side of the original image to `short_side`.
2. If the scaled image after 1 has a long side that exceeds `long_side`, keep
the aspect ratio and rescal the long side of the image to `long_side`.
2. Pad the rescaled image to the padded_size.
Args:
image: a `Tensor` of shape [height, width, 3] representing an image.
short_side: a scalar `Tensor` or `int` representing the desired short side
to be rescaled to.
long_side: a scalar `Tensor` or `int` representing the desired long side to
be rescaled to.
padded_size: a `Tensor` or `int` list/tuple of two elements representing
[height, width] of the padded output image size. Padding will be applied
after scaling the image to the desired_size.
aug_scale_min: a `float` with range between [0, 1.0] representing minimum
random scale applied to desired_size for training scale jittering.
aug_scale_max: a `float` with range between [1.0, inf] representing maximum
random scale applied to desired_size for training scale jittering.
seed: seed for random scale jittering.
method: function to resize input image to scaled image.
Returns:
output_image: `Tensor` of shape [height, width, 3] where [height, width]
equals to `output_size`.
image_info: a 2D `Tensor` that encodes the information of the image and the
applied preprocessing. It is in the format of
[[original_height, original_width], [desired_height, desired_width],
[y_scale, x_scale], [y_offset, x_offset]], where [desired_height,
desired_width] is the actual scaled image size, and [y_scale, x_scale] is
the scaling factor, which is the ratio of
scaled dimension / original dimension.
"""
with
tf
.
name_scope
(
'resize_and_crop_image_v2'
):
image_size
=
tf
.
cast
(
tf
.
shape
(
image
)[
0
:
2
],
tf
.
float32
)
scale_using_short_side
=
(
short_side
/
tf
.
math
.
minimum
(
image_size
[
0
],
image_size
[
1
]))
scale_using_long_side
=
(
long_side
/
tf
.
math
.
maximum
(
image_size
[
0
],
image_size
[
1
]))
scaled_size
=
tf
.
math
.
round
(
image_size
*
scale_using_short_side
)
scaled_size
=
tf
.
where
(
tf
.
math
.
greater
(
tf
.
math
.
maximum
(
scaled_size
[
0
],
scaled_size
[
1
]),
long_side
),
tf
.
math
.
round
(
image_size
*
scale_using_long_side
),
scaled_size
)
desired_size
=
scaled_size
random_jittering
=
(
aug_scale_min
!=
1.0
or
aug_scale_max
!=
1.0
)
if
random_jittering
:
random_scale
=
tf
.
random
.
uniform
(
[],
aug_scale_min
,
aug_scale_max
,
seed
=
seed
)
scaled_size
=
tf
.
math
.
round
(
random_scale
*
scaled_size
)
# Computes 2D image_scale.
image_scale
=
scaled_size
/
image_size
# Selects non-zero random offset (x, y) if scaled image is larger than
# desired_size.
if
random_jittering
:
max_offset
=
scaled_size
-
desired_size
max_offset
=
tf
.
where
(
tf
.
math
.
less
(
max_offset
,
0
),
tf
.
zeros_like
(
max_offset
),
max_offset
)
offset
=
max_offset
*
tf
.
random
.
uniform
([
2
,],
0
,
1
,
seed
=
seed
)
offset
=
tf
.
cast
(
offset
,
tf
.
int32
)
else
:
offset
=
tf
.
zeros
((
2
,),
tf
.
int32
)
scaled_image
=
tf
.
image
.
resize
(
image
,
tf
.
cast
(
scaled_size
,
tf
.
int32
),
method
=
method
)
if
random_jittering
:
scaled_image
=
scaled_image
[
offset
[
0
]:
offset
[
0
]
+
desired_size
[
0
],
offset
[
1
]:
offset
[
1
]
+
desired_size
[
1
],
:]
output_image
=
tf
.
image
.
pad_to_bounding_box
(
scaled_image
,
0
,
0
,
padded_size
[
0
],
padded_size
[
1
])
image_info
=
tf
.
stack
([
image_size
,
tf
.
cast
(
desired_size
,
dtype
=
tf
.
float32
),
image_scale
,
tf
.
cast
(
offset
,
tf
.
float32
)])
return
output_image
,
image_info
def
center_crop_image
(
image
):
"""Center crop a square shape slice from the input image.
It crops a square shape slice from the image. The side of the actual crop
is 224 / 256 = 0.875 of the short side of the original image. References:
[1] Very Deep Convolutional Networks for Large-Scale Image Recognition
https://arxiv.org/abs/1409.1556
[2] Deep Residual Learning for Image Recognition
https://arxiv.org/abs/1512.03385
Args:
image: a Tensor of shape [height, width, 3] representing the input image.
Returns:
cropped_image: a Tensor representing the center cropped image.
"""
with
tf
.
name_scope
(
'center_crop_image'
):
image_size
=
tf
.
cast
(
tf
.
shape
(
image
)[:
2
],
dtype
=
tf
.
float32
)
crop_size
=
(
CENTER_CROP_FRACTION
*
tf
.
math
.
minimum
(
image_size
[
0
],
image_size
[
1
]))
crop_offset
=
tf
.
cast
((
image_size
-
crop_size
)
/
2.0
,
dtype
=
tf
.
int32
)
crop_size
=
tf
.
cast
(
crop_size
,
dtype
=
tf
.
int32
)
cropped_image
=
image
[
crop_offset
[
0
]:
crop_offset
[
0
]
+
crop_size
,
crop_offset
[
1
]:
crop_offset
[
1
]
+
crop_size
,
:]
return
cropped_image
def
center_crop_image_v2
(
image_bytes
,
image_shape
):
"""Center crop a square shape slice from the input image.
It crops a square shape slice from the image. The side of the actual crop
is 224 / 256 = 0.875 of the short side of the original image. References:
[1] Very Deep Convolutional Networks for Large-Scale Image Recognition
https://arxiv.org/abs/1409.1556
[2] Deep Residual Learning for Image Recognition
https://arxiv.org/abs/1512.03385
This is a faster version of `center_crop_image` which takes the original
image bytes and image size as the inputs, and partially decode the JPEG
bytes according to the center crop.
Args:
image_bytes: a Tensor of type string representing the raw image bytes.
image_shape: a Tensor specifying the shape of the raw image.
Returns:
cropped_image: a Tensor representing the center cropped image.
"""
with
tf
.
name_scope
(
'center_image_crop_v2'
):
image_shape
=
tf
.
cast
(
image_shape
,
tf
.
float32
)
crop_size
=
(
CENTER_CROP_FRACTION
*
tf
.
math
.
minimum
(
image_shape
[
0
],
image_shape
[
1
]))
crop_offset
=
tf
.
cast
((
image_shape
-
crop_size
)
/
2.0
,
dtype
=
tf
.
int32
)
crop_size
=
tf
.
cast
(
crop_size
,
dtype
=
tf
.
int32
)
crop_window
=
tf
.
stack
(
[
crop_offset
[
0
],
crop_offset
[
1
],
crop_size
,
crop_size
])
cropped_image
=
tf
.
image
.
decode_and_crop_jpeg
(
image_bytes
,
crop_window
,
channels
=
3
)
return
cropped_image
def
random_crop_image
(
image
,
aspect_ratio_range
=
(
3.
/
4.
,
4.
/
3.
),
area_range
=
(
0.08
,
1.0
),
max_attempts
=
10
,
seed
=
1
):
"""Randomly crop an arbitrary shaped slice from the input image.
Args:
image: a Tensor of shape [height, width, 3] representing the input image.
aspect_ratio_range: a list of floats. The cropped area of the image must
have an aspect ratio = width / height within this range.
area_range: a list of floats. The cropped reas of the image must contain
a fraction of the input image within this range.
max_attempts: the number of attempts at generating a cropped region of the
image of the specified constraints. After max_attempts failures, return
the entire image.
seed: the seed of the random generator.
Returns:
cropped_image: a Tensor representing the random cropped image. Can be the
original image if max_attempts is exhausted.
"""
with
tf
.
name_scope
(
'random_crop_image'
):
crop_offset
,
crop_size
,
_
=
tf
.
image
.
sample_distorted_bounding_box
(
tf
.
shape
(
image
),
tf
.
constant
([
0.0
,
0.0
,
1.0
,
1.0
],
dtype
=
tf
.
float32
,
shape
=
[
1
,
1
,
4
]),
seed
=
seed
,
min_object_covered
=
area_range
[
0
],
aspect_ratio_range
=
aspect_ratio_range
,
area_range
=
area_range
,
max_attempts
=
max_attempts
)
cropped_image
=
tf
.
slice
(
image
,
crop_offset
,
crop_size
)
return
cropped_image
def
random_crop_image_v2
(
image_bytes
,
image_shape
,
aspect_ratio_range
=
(
3.
/
4.
,
4.
/
3.
),
area_range
=
(
0.08
,
1.0
),
max_attempts
=
10
,
seed
=
1
):
"""Randomly crop an arbitrary shaped slice from the input image.
This is a faster version of `random_crop_image` which takes the original
image bytes and image size as the inputs, and partially decode the JPEG
bytes according to the generated crop.
Args:
image_bytes: a Tensor of type string representing the raw image bytes.
image_shape: a Tensor specifying the shape of the raw image.
aspect_ratio_range: a list of floats. The cropped area of the image must
have an aspect ratio = width / height within this range.
area_range: a list of floats. The cropped reas of the image must contain
a fraction of the input image within this range.
max_attempts: the number of attempts at generating a cropped region of the
image of the specified constraints. After max_attempts failures, return
the entire image.
seed: the seed of the random generator.
Returns:
cropped_image: a Tensor representing the random cropped image. Can be the
original image if max_attempts is exhausted.
"""
with
tf
.
name_scope
(
'random_crop_image_v2'
):
crop_offset
,
crop_size
,
_
=
tf
.
image
.
sample_distorted_bounding_box
(
image_shape
,
tf
.
constant
([
0.0
,
0.0
,
1.0
,
1.0
],
dtype
=
tf
.
float32
,
shape
=
[
1
,
1
,
4
]),
seed
=
seed
,
min_object_covered
=
area_range
[
0
],
aspect_ratio_range
=
aspect_ratio_range
,
area_range
=
area_range
,
max_attempts
=
max_attempts
)
offset_y
,
offset_x
,
_
=
tf
.
unstack
(
crop_offset
)
crop_height
,
crop_width
,
_
=
tf
.
unstack
(
crop_size
)
crop_window
=
tf
.
stack
([
offset_y
,
offset_x
,
crop_height
,
crop_width
])
cropped_image
=
tf
.
image
.
decode_and_crop_jpeg
(
image_bytes
,
crop_window
,
channels
=
3
)
return
cropped_image
def
resize_and_crop_boxes
(
boxes
,
image_scale
,
output_size
,
offset
):
"""Resizes boxes to output size with scale and offset.
Args:
boxes: `Tensor` of shape [N, 4] representing ground truth boxes.
image_scale: 2D float `Tensor` representing scale factors that apply to
[height, width] of input image.
output_size: 2D `Tensor` or `int` representing [height, width] of target
output image size.
offset: 2D `Tensor` representing top-left corner [y0, x0] to crop scaled
boxes.
Returns:
boxes: `Tensor` of shape [N, 4] representing the scaled boxes.
"""
with
tf
.
name_scope
(
'resize_and_crop_boxes'
):
# Adjusts box coordinates based on image_scale and offset.
boxes
*=
tf
.
tile
(
tf
.
expand_dims
(
image_scale
,
axis
=
0
),
[
1
,
2
])
boxes
-=
tf
.
tile
(
tf
.
expand_dims
(
offset
,
axis
=
0
),
[
1
,
2
])
# Clips the boxes.
boxes
=
box_ops
.
clip_boxes
(
boxes
,
output_size
)
return
boxes
def
resize_and_crop_masks
(
masks
,
image_scale
,
output_size
,
offset
):
"""Resizes boxes to output size with scale and offset.
Args:
masks: `Tensor` of shape [N, H, W, 1] representing ground truth masks.
image_scale: 2D float `Tensor` representing scale factors that apply to
[height, width] of input image.
output_size: 2D `Tensor` or `int` representing [height, width] of target
output image size.
offset: 2D `Tensor` representing top-left corner [y0, x0] to crop scaled
boxes.
Returns:
masks: `Tensor` of shape [N, H, W, 1] representing the scaled masks.
"""
with
tf
.
name_scope
(
'resize_and_crop_masks'
):
mask_size
=
tf
.
cast
(
tf
.
shape
(
masks
)[
1
:
3
],
tf
.
float32
)
# Pad masks to avoid empty mask annotations.
masks
=
tf
.
concat
(
[
tf
.
zeros
([
1
,
mask_size
[
0
],
mask_size
[
1
],
1
]),
masks
],
axis
=
0
)
scaled_size
=
tf
.
cast
(
image_scale
*
mask_size
,
tf
.
int32
)
scaled_masks
=
tf
.
image
.
resize
(
masks
,
scaled_size
,
method
=
tf
.
image
.
ResizeMethod
.
NEAREST_NEIGHBOR
)
offset
=
tf
.
cast
(
offset
,
tf
.
int32
)
scaled_masks
=
scaled_masks
[
:,
offset
[
0
]:
offset
[
0
]
+
output_size
[
0
],
offset
[
1
]:
offset
[
1
]
+
output_size
[
1
],
:]
output_masks
=
tf
.
image
.
pad_to_bounding_box
(
scaled_masks
,
0
,
0
,
output_size
[
0
],
output_size
[
1
])
# Remove padding.
output_masks
=
output_masks
[
1
::]
return
output_masks
def
horizontal_flip_image
(
image
):
"""Flips image horizontally."""
return
tf
.
image
.
flip_left_right
(
image
)
def
horizontal_flip_boxes
(
normalized_boxes
):
"""Flips normalized boxes horizontally."""
ymin
,
xmin
,
ymax
,
xmax
=
tf
.
split
(
value
=
normalized_boxes
,
num_or_size_splits
=
4
,
axis
=
1
)
flipped_xmin
=
tf
.
subtract
(
1.0
,
xmax
)
flipped_xmax
=
tf
.
subtract
(
1.0
,
xmin
)
flipped_boxes
=
tf
.
concat
([
ymin
,
flipped_xmin
,
ymax
,
flipped_xmax
],
1
)
return
flipped_boxes
def
horizontal_flip_masks
(
masks
):
"""Flips masks horizontally."""
return
masks
[:,
:,
::
-
1
]
def
random_horizontal_flip
(
image
,
normalized_boxes
=
None
,
masks
=
None
,
seed
=
1
):
"""Randomly flips input image and bounding boxes."""
with
tf
.
name_scope
(
'random_horizontal_flip'
):
do_flip
=
tf
.
greater
(
tf
.
random
.
uniform
([],
seed
=
seed
),
0.5
)
image
=
tf
.
cond
(
do_flip
,
lambda
:
horizontal_flip_image
(
image
),
lambda
:
image
)
if
normalized_boxes
is
not
None
:
normalized_boxes
=
tf
.
cond
(
do_flip
,
lambda
:
horizontal_flip_boxes
(
normalized_boxes
),
lambda
:
normalized_boxes
)
if
masks
is
not
None
:
masks
=
tf
.
cond
(
do_flip
,
lambda
:
horizontal_flip_masks
(
masks
),
lambda
:
masks
)
return
image
,
normalized_boxes
,
masks
official/vision/beta/ops/preprocess_ops_3d.py
0 → 100644
View file @
cc748b2a
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utils for processing video dataset features."""
from
typing
import
Optional
import
tensorflow
as
tf
def
_sample_or_pad_sequence_indices
(
sequence
:
tf
.
Tensor
,
num_steps
:
int
,
stride
:
int
,
offset
:
tf
.
Tensor
)
->
tf
.
Tensor
:
"""Returns indices to take for sampling or padding sequences to fixed size."""
sequence_length
=
tf
.
shape
(
sequence
)[
0
]
sel_idx
=
tf
.
range
(
sequence_length
)
# Repeats sequence until num_steps are available in total.
max_length
=
num_steps
*
stride
+
offset
num_repeats
=
tf
.
math
.
floordiv
(
max_length
+
sequence_length
-
1
,
sequence_length
)
sel_idx
=
tf
.
tile
(
sel_idx
,
[
num_repeats
])
steps
=
tf
.
range
(
offset
,
offset
+
num_steps
*
stride
,
stride
)
return
tf
.
gather
(
sel_idx
,
steps
)
def
sample_linspace_sequence
(
sequence
:
tf
.
Tensor
,
num_windows
:
int
,
num_steps
:
int
,
stride
:
int
)
->
tf
.
Tensor
:
"""Samples `num_windows` segments from sequence with linearly spaced offsets.
The samples are concatenated in a single `tf.Tensor` in order to have the same
format structure per timestep (e.g. a single frame). If `num_steps` * `stride`
is bigger than the number of timesteps, the sequence is repeated. This
function can be used in evaluation in order to extract enough segments to span
the entire sequence.
Args:
sequence: Any tensor where the first dimension is timesteps.
num_windows: Number of windows retrieved from the sequence.
num_steps: Number of steps (e.g. frames) to take.
stride: Distance to sample between timesteps.
Returns:
A single `tf.Tensor` with first dimension `num_windows` * `num_steps`. The
tensor contains the concatenated list of `num_windows` tensors which offsets
have been linearly spaced from input.
"""
sequence_length
=
tf
.
shape
(
sequence
)[
0
]
max_offset
=
tf
.
maximum
(
0
,
sequence_length
-
num_steps
*
stride
)
offsets
=
tf
.
linspace
(
0.0
,
tf
.
cast
(
max_offset
,
tf
.
float32
),
num_windows
)
offsets
=
tf
.
cast
(
offsets
,
tf
.
int32
)
all_indices
=
[]
for
i
in
range
(
num_windows
):
all_indices
.
append
(
_sample_or_pad_sequence_indices
(
sequence
=
sequence
,
num_steps
=
num_steps
,
stride
=
stride
,
offset
=
offsets
[
i
]))
indices
=
tf
.
concat
(
all_indices
,
axis
=
0
)
indices
.
set_shape
((
num_windows
*
num_steps
,))
return
tf
.
gather
(
sequence
,
indices
)
def
sample_sequence
(
sequence
:
tf
.
Tensor
,
num_steps
:
int
,
random
:
bool
,
stride
:
int
,
seed
:
Optional
[
int
]
=
None
)
->
tf
.
Tensor
:
"""Samples a single segment of size `num_steps` from a given sequence.
If `random` is not `True`, this function will simply sample the central window
of the sequence. Otherwise, a random offset will be chosen in a way that the
desired `num_steps` might be extracted from the sequence.
Args:
sequence: Any tensor where the first dimension is timesteps.
num_steps: Number of steps (e.g. frames) to take.
random: A boolean indicating whether to random sample the single window. If
`True`, the offset is randomized. If `False`, the middle frame minus half
of `num_steps` is the first frame.
stride: Distance to sample between timesteps.
seed: A deterministic seed to use when sampling.
Returns:
A single `tf.Tensor` with first dimension `num_steps` with the sampled
segment.
"""
sequence_length
=
tf
.
shape
(
sequence
)[
0
]
if
random
:
sequence_length
=
tf
.
cast
(
sequence_length
,
tf
.
float32
)
max_offset
=
tf
.
cond
(
sequence_length
>
(
num_steps
-
1
)
*
stride
,
lambda
:
sequence_length
-
(
num_steps
-
1
)
*
stride
,
lambda
:
sequence_length
)
offset
=
tf
.
random
.
uniform
(
(),
maxval
=
tf
.
cast
(
max_offset
,
dtype
=
tf
.
int32
),
dtype
=
tf
.
int32
,
seed
=
seed
)
else
:
offset
=
(
sequence_length
-
num_steps
*
stride
)
//
2
offset
=
tf
.
maximum
(
0
,
offset
)
indices
=
_sample_or_pad_sequence_indices
(
sequence
=
sequence
,
num_steps
=
num_steps
,
stride
=
stride
,
offset
=
offset
)
indices
.
set_shape
((
num_steps
,))
return
tf
.
gather
(
sequence
,
indices
)
def
decode_jpeg
(
image_string
:
tf
.
Tensor
,
channels
:
int
=
0
)
->
tf
.
Tensor
:
"""Decodes JPEG raw bytes string into a RGB uint8 Tensor.
Args:
image_string: A `tf.Tensor` of type strings with the raw JPEG bytes where
the first dimension is timesteps.
channels: Number of channels of the JPEG image. Allowed values are 0, 1 and
3. If 0, the number of channels will be calculated at runtime and no
static shape is set.
Returns:
A Tensor of shape [T, H, W, C] of type uint8 with the decoded images.
"""
return
tf
.
map_fn
(
lambda
x
:
tf
.
image
.
decode_jpeg
(
x
,
channels
=
channels
),
image_string
,
back_prop
=
False
,
dtype
=
tf
.
uint8
)
def
crop_image
(
frames
:
tf
.
Tensor
,
height
:
int
,
width
:
int
,
random
:
bool
=
False
,
seed
:
Optional
[
int
]
=
None
)
->
tf
.
Tensor
:
"""Crops the image sequence of images.
If requested size is bigger than image size, image is padded with 0. If not
random cropping, a central crop is performed.
Args:
frames: A Tensor of dimension [timesteps, in_height, in_width, channels].
height: Cropped image height.
width: Cropped image width.
random: A boolean indicating if crop should be randomized.
seed: A deterministic seed to use when random cropping.
Returns:
A Tensor of shape [timesteps, out_height, out_width, channels] of type uint8
with the cropped images.
"""
if
random
:
# Random spatial crop.
shape
=
tf
.
shape
(
frames
)
# If a static_shape is available (e.g. when using this method from add_image
# method), it will be used to have an output tensor with static shape.
static_shape
=
frames
.
shape
.
as_list
()
seq_len
=
shape
[
0
]
if
static_shape
[
0
]
is
None
else
static_shape
[
0
]
channels
=
shape
[
3
]
if
static_shape
[
3
]
is
None
else
static_shape
[
3
]
frames
=
tf
.
image
.
random_crop
(
frames
,
(
seq_len
,
height
,
width
,
channels
),
seed
)
else
:
# Central crop or pad.
frames
=
tf
.
image
.
resize_with_crop_or_pad
(
frames
,
height
,
width
)
return
frames
def
resize_smallest
(
frames
:
tf
.
Tensor
,
min_resize
:
int
)
->
tf
.
Tensor
:
"""Resizes frames so that min(`height`, `width`) is equal to `min_resize`.
This function will not do anything if the min(`height`, `width`) is already
equal to `min_resize`. This allows to save compute time.
Args:
frames: A Tensor of dimension [timesteps, input_h, input_w, channels].
min_resize: Minimum size of the final image dimensions.
Returns:
A Tensor of shape [timesteps, output_h, output_w, channels] of type
frames.dtype where min(output_h, output_w) = min_resize.
"""
shape
=
tf
.
shape
(
frames
)
input_h
=
shape
[
1
]
input_w
=
shape
[
2
]
output_h
=
tf
.
maximum
(
min_resize
,
(
input_h
*
min_resize
)
//
input_w
)
output_w
=
tf
.
maximum
(
min_resize
,
(
input_w
*
min_resize
)
//
input_h
)
def
resize_fn
():
frames_resized
=
tf
.
image
.
resize
(
frames
,
(
output_h
,
output_w
))
return
tf
.
cast
(
frames_resized
,
frames
.
dtype
)
should_resize
=
tf
.
math
.
logical_or
(
tf
.
not_equal
(
input_w
,
output_w
),
tf
.
not_equal
(
input_h
,
output_h
))
frames
=
tf
.
cond
(
should_resize
,
resize_fn
,
lambda
:
frames
)
return
frames
def
random_flip_left_right
(
frames
:
tf
.
Tensor
,
seed
:
Optional
[
int
]
=
None
)
->
tf
.
Tensor
:
"""Flips all the frames with a probability of 50%.
Args:
frames: A Tensor of shape [timesteps, input_h, input_w, channels].
seed: A seed to use for the random sampling.
Returns:
A Tensor of shape [timesteps, output_h, output_w, channels] eventually
flipped left right.
"""
is_flipped
=
tf
.
random
.
uniform
(
(),
minval
=
0
,
maxval
=
2
,
dtype
=
tf
.
int32
,
seed
=
seed
)
frames
=
tf
.
cond
(
tf
.
equal
(
is_flipped
,
1
),
true_fn
=
lambda
:
tf
.
image
.
flip_left_right
(
frames
),
false_fn
=
lambda
:
frames
)
return
frames
def
normalize_image
(
frames
:
tf
.
Tensor
,
zero_centering_image
:
bool
,
dtype
:
tf
.
dtypes
.
DType
=
tf
.
float32
)
->
tf
.
Tensor
:
"""Normalizes images.
Args:
frames: A Tensor of numbers.
zero_centering_image: If True, results are in [-1, 1], if False, results are
in [0, 1].
dtype: Type of output Tensor.
Returns:
A Tensor of same shape as the input and of the given type.
"""
frames
=
tf
.
cast
(
frames
,
dtype
)
if
zero_centering_image
:
return
frames
*
(
2.0
/
255.0
)
-
1.0
else
:
return
frames
/
255.0
official/vision/beta/ops/preprocess_ops_3d_test.py
0 → 100644
View file @
cc748b2a
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import
io
import
itertools
import
numpy
as
np
from
PIL
import
Image
import
tensorflow
as
tf
from
official.vision.beta.ops
import
preprocess_ops_3d
class
ParserUtilsTest
(
tf
.
test
.
TestCase
):
def
setUp
(
self
):
super
().
setUp
()
# [[0, 1, ..., 119], [1, 2, ..., 120], ..., [119, 120, ..., 218]].
self
.
_frames
=
tf
.
stack
([
tf
.
range
(
i
,
i
+
120
)
for
i
in
range
(
90
)])
self
.
_frames
=
tf
.
cast
(
self
.
_frames
,
tf
.
uint8
)
self
.
_frames
=
self
.
_frames
[
tf
.
newaxis
,
:,
:,
tf
.
newaxis
]
self
.
_frames
=
tf
.
broadcast_to
(
self
.
_frames
,
(
6
,
90
,
120
,
3
))
# Create an equivalent numpy array for assertions.
self
.
_np_frames
=
np
.
array
([
range
(
i
,
i
+
120
)
for
i
in
range
(
90
)])
self
.
_np_frames
=
self
.
_np_frames
[
np
.
newaxis
,
:,
:,
np
.
newaxis
]
self
.
_np_frames
=
np
.
broadcast_to
(
self
.
_np_frames
,
(
6
,
90
,
120
,
3
))
def
test_sample_linspace_sequence
(
self
):
sequence
=
tf
.
range
(
100
)
sampled_seq_1
=
preprocess_ops_3d
.
sample_linspace_sequence
(
sequence
,
10
,
10
,
1
)
sampled_seq_2
=
preprocess_ops_3d
.
sample_linspace_sequence
(
sequence
,
7
,
10
,
1
)
sampled_seq_3
=
preprocess_ops_3d
.
sample_linspace_sequence
(
sequence
,
7
,
5
,
2
)
sampled_seq_4
=
preprocess_ops_3d
.
sample_linspace_sequence
(
sequence
,
101
,
1
,
1
)
self
.
assertAllEqual
(
sampled_seq_1
,
range
(
100
))
# [0, 1, 2, 3, 4, ..., 8, 9, 15, 16, ..., 97, 98, 99]
self
.
assertAllEqual
(
sampled_seq_2
,
[
15
*
i
+
j
for
i
,
j
in
itertools
.
product
(
range
(
7
),
range
(
10
))])
# [0, 2, 4, 6, 8, 15, 17, 19, ..., 96, 98]
self
.
assertAllEqual
(
sampled_seq_3
,
[
15
*
i
+
2
*
j
for
i
,
j
in
itertools
.
product
(
range
(
7
),
range
(
5
))])
self
.
assertAllEqual
(
sampled_seq_4
,
[
0
]
+
list
(
range
(
100
)))
def
test_sample_sequence
(
self
):
sequence
=
tf
.
range
(
100
)
sampled_seq_1
=
preprocess_ops_3d
.
sample_sequence
(
sequence
,
10
,
False
,
1
)
sampled_seq_2
=
preprocess_ops_3d
.
sample_sequence
(
sequence
,
10
,
False
,
2
)
sampled_seq_3
=
preprocess_ops_3d
.
sample_sequence
(
sequence
,
10
,
True
,
1
)
self
.
assertAllEqual
(
sampled_seq_1
,
range
(
45
,
55
))
self
.
assertAllEqual
(
sampled_seq_2
,
range
(
40
,
60
,
2
))
offset_3
=
sampled_seq_3
[
0
]
self
.
assertBetween
(
offset_3
,
0
,
99
)
self
.
assertAllEqual
(
sampled_seq_3
,
range
(
offset_3
,
offset_3
+
10
))
def
test_decode_jpeg
(
self
):
# Create a random RGB JPEG image.
random_image
=
np
.
random
.
randint
(
0
,
256
,
size
=
(
263
,
320
,
3
),
dtype
=
np
.
uint8
)
random_image
=
Image
.
fromarray
(
random_image
)
with
io
.
BytesIO
()
as
buffer
:
random_image
.
save
(
buffer
,
format
=
'JPEG'
)
raw_image_bytes
=
buffer
.
getvalue
()
raw_image
=
tf
.
constant
([
raw_image_bytes
,
raw_image_bytes
])
decoded_image
=
preprocess_ops_3d
.
decode_jpeg
(
raw_image
,
3
)
self
.
assertEqual
(
decoded_image
.
shape
.
as_list
()[
3
],
3
)
self
.
assertAllEqual
(
decoded_image
.
shape
,
(
2
,
263
,
320
,
3
))
def
test_crop_image
(
self
):
cropped_image_1
=
preprocess_ops_3d
.
crop_image
(
self
.
_frames
,
50
,
70
)
cropped_image_2
=
preprocess_ops_3d
.
crop_image
(
self
.
_frames
,
200
,
200
)
cropped_image_3
=
preprocess_ops_3d
.
crop_image
(
self
.
_frames
,
50
,
70
,
True
)
self
.
assertAllEqual
(
cropped_image_1
.
shape
,
(
6
,
50
,
70
,
3
))
self
.
assertAllEqual
(
cropped_image_1
,
self
.
_np_frames
[:,
20
:
70
,
25
:
95
,
:])
self
.
assertAllEqual
(
cropped_image_2
.
shape
,
(
6
,
200
,
200
,
3
))
expected
=
np
.
pad
(
self
.
_np_frames
,
((
0
,
0
),
(
55
,
55
),
(
40
,
40
),
(
0
,
0
)),
'constant'
)
self
.
assertAllEqual
(
cropped_image_2
,
expected
)
self
.
assertAllEqual
(
cropped_image_3
.
shape
,
(
6
,
50
,
70
,
3
))
offset
=
cropped_image_3
[
0
,
0
,
0
,
0
]
expected
=
np
.
array
([
range
(
i
,
i
+
70
)
for
i
in
range
(
offset
,
offset
+
50
)])
expected
=
expected
[
np
.
newaxis
,
:,
:,
np
.
newaxis
]
expected
=
np
.
broadcast_to
(
expected
,
(
6
,
50
,
70
,
3
))
self
.
assertAllEqual
(
cropped_image_3
,
expected
)
def
test_resize_smallest
(
self
):
resized_frames_1
=
preprocess_ops_3d
.
resize_smallest
(
self
.
_frames
,
180
)
resized_frames_2
=
preprocess_ops_3d
.
resize_smallest
(
self
.
_frames
,
45
)
resized_frames_3
=
preprocess_ops_3d
.
resize_smallest
(
self
.
_frames
,
90
)
resized_frames_4
=
preprocess_ops_3d
.
resize_smallest
(
tf
.
transpose
(
self
.
_frames
,
(
0
,
2
,
1
,
3
)),
45
)
self
.
assertAllEqual
(
resized_frames_1
.
shape
,
(
6
,
180
,
240
,
3
))
self
.
assertAllEqual
(
resized_frames_2
.
shape
,
(
6
,
45
,
60
,
3
))
self
.
assertAllEqual
(
resized_frames_3
.
shape
,
(
6
,
90
,
120
,
3
))
self
.
assertAllEqual
(
resized_frames_4
.
shape
,
(
6
,
60
,
45
,
3
))
def
test_random_flip_left_right
(
self
):
flipped_frames
=
preprocess_ops_3d
.
random_flip_left_right
(
self
.
_frames
)
flipped
=
np
.
fliplr
(
self
.
_np_frames
[
0
,
:,
:,
0
])
flipped
=
flipped
[
np
.
newaxis
,
:,
:,
np
.
newaxis
]
flipped
=
np
.
broadcast_to
(
flipped
,
(
6
,
90
,
120
,
3
))
self
.
assertTrue
((
flipped_frames
==
self
.
_np_frames
).
numpy
().
all
()
or
(
flipped_frames
==
flipped
).
numpy
().
all
())
def
test_normalize_image
(
self
):
normalized_images_1
=
preprocess_ops_3d
.
normalize_image
(
self
.
_frames
,
False
,
tf
.
float32
)
normalized_images_2
=
preprocess_ops_3d
.
normalize_image
(
self
.
_frames
,
True
,
tf
.
float32
)
self
.
assertAllClose
(
normalized_images_1
,
self
.
_np_frames
/
255
)
self
.
assertAllClose
(
normalized_images_2
,
self
.
_np_frames
*
2
/
255
-
1.0
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/beta/ops/preprocess_ops_test.py
0 → 100644
View file @
cc748b2a
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for preprocess_ops.py."""
import
io
# Import libraries
from
absl.testing
import
parameterized
import
numpy
as
np
from
PIL
import
Image
import
tensorflow
as
tf
from
official.vision.beta.ops
import
preprocess_ops
def
_encode_image
(
image_array
,
fmt
):
image
=
Image
.
fromarray
(
image_array
)
with
io
.
BytesIO
()
as
output
:
image
.
save
(
output
,
format
=
fmt
)
return
output
.
getvalue
()
class
InputUtilsTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
([
1
],
10
),
([
1
,
2
],
10
),
([
1
,
2
,
3
],
10
),
([
11
],
10
),
([
12
,
2
],
10
),
([
13
,
2
,
3
],
10
),
)
def
testPadToFixedSize
(
self
,
input_shape
,
output_size
):
# Copies input shape to padding shape.
clip_shape
=
input_shape
[:]
clip_shape
[
0
]
=
min
(
output_size
,
clip_shape
[
0
])
padding_shape
=
input_shape
[:]
padding_shape
[
0
]
=
max
(
output_size
-
input_shape
[
0
],
0
)
expected_outputs
=
np
.
concatenate
(
[
np
.
ones
(
clip_shape
),
np
.
zeros
(
padding_shape
)],
axis
=
0
)
data
=
tf
.
ones
(
input_shape
)
output_data
=
preprocess_ops
.
clip_or_pad_to_fixed_size
(
data
,
output_size
,
constant_values
=
0
)
output_data
=
output_data
.
numpy
()
self
.
assertAllClose
(
output_size
,
output_data
.
shape
[
0
])
self
.
assertAllClose
(
expected_outputs
,
output_data
)
@
parameterized
.
parameters
(
(
100
,
200
,
100
,
200
,
32
,
1.0
,
1.0
,
128
,
224
),
(
100
,
256
,
128
,
256
,
32
,
1.0
,
1.0
,
128
,
256
),
(
200
,
512
,
200
,
128
,
32
,
0.25
,
0.25
,
224
,
128
),
)
def
testResizeAndCropImageRectangluarCase
(
self
,
input_height
,
input_width
,
desired_height
,
desired_width
,
stride
,
scale_y
,
scale_x
,
output_height
,
output_width
):
image
=
tf
.
convert_to_tensor
(
np
.
random
.
rand
(
input_height
,
input_width
,
3
))
desired_size
=
(
desired_height
,
desired_width
)
resized_image
,
image_info
=
preprocess_ops
.
resize_and_crop_image
(
image
,
desired_size
=
desired_size
,
padded_size
=
preprocess_ops
.
compute_padded_size
(
desired_size
,
stride
))
resized_image_shape
=
tf
.
shape
(
resized_image
)
self
.
assertAllEqual
(
[
output_height
,
output_width
,
3
],
resized_image_shape
.
numpy
())
self
.
assertNDArrayNear
(
[[
input_height
,
input_width
],
[
desired_height
,
desired_width
],
[
scale_y
,
scale_x
],
[
0.0
,
0.0
]],
image_info
.
numpy
(),
1e-5
)
@
parameterized
.
parameters
(
(
100
,
200
,
220
,
220
,
32
,
1.1
,
1.1
,
224
,
224
),
(
512
,
512
,
1024
,
1024
,
32
,
2.0
,
2.0
,
1024
,
1024
),
)
def
testResizeAndCropImageSquareCase
(
self
,
input_height
,
input_width
,
desired_height
,
desired_width
,
stride
,
scale_y
,
scale_x
,
output_height
,
output_width
):
image
=
tf
.
convert_to_tensor
(
np
.
random
.
rand
(
input_height
,
input_width
,
3
))
desired_size
=
(
desired_height
,
desired_width
)
resized_image
,
image_info
=
preprocess_ops
.
resize_and_crop_image
(
image
,
desired_size
=
desired_size
,
padded_size
=
preprocess_ops
.
compute_padded_size
(
desired_size
,
stride
))
resized_image_shape
=
tf
.
shape
(
resized_image
)
self
.
assertAllEqual
(
[
output_height
,
output_width
,
3
],
resized_image_shape
.
numpy
())
self
.
assertNDArrayNear
(
[[
input_height
,
input_width
],
[
desired_height
,
desired_width
],
[
scale_y
,
scale_x
],
[
0.0
,
0.0
]],
image_info
.
numpy
(),
1e-5
)
@
parameterized
.
parameters
(
(
100
,
200
,
100
,
300
,
32
,
1.0
,
1.0
,
100
,
200
,
128
,
320
),
(
200
,
100
,
100
,
300
,
32
,
1.0
,
1.0
,
200
,
100
,
320
,
128
),
(
100
,
200
,
80
,
100
,
32
,
0.5
,
0.5
,
50
,
100
,
96
,
128
),
(
200
,
100
,
80
,
100
,
32
,
0.5
,
0.5
,
100
,
50
,
128
,
96
),
)
def
testResizeAndCropImageV2
(
self
,
input_height
,
input_width
,
short_side
,
long_side
,
stride
,
scale_y
,
scale_x
,
desired_height
,
desired_width
,
output_height
,
output_width
):
image
=
tf
.
convert_to_tensor
(
np
.
random
.
rand
(
input_height
,
input_width
,
3
))
image_shape
=
tf
.
shape
(
image
)[
0
:
2
]
desired_size
=
tf
.
where
(
tf
.
greater
(
image_shape
[
0
],
image_shape
[
1
]),
tf
.
constant
([
long_side
,
short_side
],
dtype
=
tf
.
int32
),
tf
.
constant
([
short_side
,
long_side
],
dtype
=
tf
.
int32
))
resized_image
,
image_info
=
preprocess_ops
.
resize_and_crop_image_v2
(
image
,
short_side
=
short_side
,
long_side
=
long_side
,
padded_size
=
preprocess_ops
.
compute_padded_size
(
desired_size
,
stride
))
resized_image_shape
=
tf
.
shape
(
resized_image
)
self
.
assertAllEqual
(
[
output_height
,
output_width
,
3
],
resized_image_shape
.
numpy
())
self
.
assertNDArrayNear
(
[[
input_height
,
input_width
],
[
desired_height
,
desired_width
],
[
scale_y
,
scale_x
],
[
0.0
,
0.0
]],
image_info
.
numpy
(),
1e-5
)
@
parameterized
.
parameters
(
(
400
,
600
),
(
600
,
400
),
)
def
testCenterCropImage
(
self
,
input_height
,
input_width
):
image
=
tf
.
convert_to_tensor
(
np
.
random
.
rand
(
input_height
,
input_width
,
3
))
cropped_image
=
preprocess_ops
.
center_crop_image
(
image
)
cropped_image_shape
=
tf
.
shape
(
cropped_image
)
self
.
assertAllEqual
([
350
,
350
,
3
],
cropped_image_shape
.
numpy
())
@
parameterized
.
parameters
(
(
400
,
600
),
(
600
,
400
),
)
def
testCenterCropImageV2
(
self
,
input_height
,
input_width
):
image_bytes
=
tf
.
constant
(
_encode_image
(
np
.
uint8
(
np
.
random
.
rand
(
input_height
,
input_width
,
3
)
*
255
),
fmt
=
'JPEG'
),
dtype
=
tf
.
string
)
cropped_image
=
preprocess_ops
.
center_crop_image_v2
(
image_bytes
,
tf
.
constant
([
input_height
,
input_width
,
3
],
tf
.
int32
))
cropped_image_shape
=
tf
.
shape
(
cropped_image
)
self
.
assertAllEqual
([
350
,
350
,
3
],
cropped_image_shape
.
numpy
())
@
parameterized
.
parameters
(
(
400
,
600
),
(
600
,
400
),
)
def
testRandomCropImage
(
self
,
input_height
,
input_width
):
image
=
tf
.
convert_to_tensor
(
np
.
random
.
rand
(
input_height
,
input_width
,
3
))
_
=
preprocess_ops
.
random_crop_image
(
image
)
@
parameterized
.
parameters
(
(
400
,
600
),
(
600
,
400
),
)
def
testRandomCropImageV2
(
self
,
input_height
,
input_width
):
image_bytes
=
tf
.
constant
(
_encode_image
(
np
.
uint8
(
np
.
random
.
rand
(
input_height
,
input_width
,
3
)
*
255
),
fmt
=
'JPEG'
),
dtype
=
tf
.
string
)
_
=
preprocess_ops
.
random_crop_image_v2
(
image_bytes
,
tf
.
constant
([
input_height
,
input_width
,
3
],
tf
.
int32
))
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/beta/ops/sampling_ops.py
0 → 100644
View file @
cc748b2a
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Class to subsample minibatches by balancing positives and negatives.
Subsamples minibatches based on a pre-specified positive fraction in range
[0,1]. The class presumes there are many more negatives than positive examples:
if the desired batch_size cannot be achieved with the pre-specified positive
fraction, it fills the rest with negative examples. If this is not sufficient
for obtaining the desired batch_size, it returns fewer examples.
The main function to call is Subsample(self, indicator, labels). For convenience
one can also call SubsampleWeights(self, weights, labels) which is defined in
the minibatch_sampler base class.
When is_static is True, it implements a method that guarantees static shapes.
It also ensures the length of output of the subsample is always batch_size, even
when number of examples set to True in indicator is less than batch_size.
This is originally implemented in TensorFlow Object Detection API.
"""
# Import libraries
import
tensorflow
as
tf
def
combined_static_and_dynamic_shape
(
tensor
):
"""Returns a list containing static and dynamic values for the dimensions.
Returns a list of static and dynamic values for shape dimensions. This is
useful to preserve static shapes when available in reshape operation.
Args:
tensor: A tensor of any type.
Returns:
A list of size tensor.shape.ndims containing integers or a scalar tensor.
"""
static_tensor_shape
=
tensor
.
shape
.
as_list
()
dynamic_tensor_shape
=
tf
.
shape
(
input
=
tensor
)
combined_shape
=
[]
for
index
,
dim
in
enumerate
(
static_tensor_shape
):
if
dim
is
not
None
:
combined_shape
.
append
(
dim
)
else
:
combined_shape
.
append
(
dynamic_tensor_shape
[
index
])
return
combined_shape
def
indices_to_dense_vector
(
indices
,
size
,
indices_value
=
1.
,
default_value
=
0
,
dtype
=
tf
.
float32
):
"""Creates dense vector with indices set to specific value and rest to zeros.
This function exists because it is unclear if it is safe to use
tf.sparse_to_dense(indices, [size], 1, validate_indices=False)
with indices which are not ordered.
This function accepts a dynamic size (e.g. tf.shape(tensor)[0])
Args:
indices: 1d Tensor with integer indices which are to be set to
indices_values.
size: scalar with size (integer) of output Tensor.
indices_value: values of elements specified by indices in the output vector
default_value: values of other elements in the output vector.
dtype: data type.
Returns:
dense 1D Tensor of shape [size] with indices set to indices_values and the
rest set to default_value.
"""
size
=
tf
.
cast
(
size
,
dtype
=
tf
.
int32
)
zeros
=
tf
.
ones
([
size
],
dtype
=
dtype
)
*
default_value
values
=
tf
.
ones_like
(
indices
,
dtype
=
dtype
)
*
indices_value
return
tf
.
dynamic_stitch
(
[
tf
.
range
(
size
),
tf
.
cast
(
indices
,
dtype
=
tf
.
int32
)],
[
zeros
,
values
])
def
matmul_gather_on_zeroth_axis
(
params
,
indices
,
scope
=
None
):
"""Matrix multiplication based implementation of tf.gather on zeroth axis.
TODO(rathodv, jonathanhuang): enable sparse matmul option.
Args:
params: A float32 Tensor. The tensor from which to gather values.
Must be at least rank 1.
indices: A Tensor. Must be one of the following types: int32, int64.
Must be in range [0, params.shape[0])
scope: A name for the operation (optional).
Returns:
A Tensor. Has the same type as params. Values from params gathered
from indices given by indices, with shape indices.shape + params.shape[1:].
"""
scope
=
scope
or
'MatMulGather'
with
tf
.
name_scope
(
scope
):
params_shape
=
combined_static_and_dynamic_shape
(
params
)
indices_shape
=
combined_static_and_dynamic_shape
(
indices
)
params2d
=
tf
.
reshape
(
params
,
[
params_shape
[
0
],
-
1
])
indicator_matrix
=
tf
.
one_hot
(
indices
,
params_shape
[
0
])
gathered_result_flattened
=
tf
.
matmul
(
indicator_matrix
,
params2d
)
return
tf
.
reshape
(
gathered_result_flattened
,
tf
.
stack
(
indices_shape
+
params_shape
[
1
:]))
class
BalancedPositiveNegativeSampler
:
"""Subsamples minibatches to a desired balance of positives and negatives."""
def
__init__
(
self
,
positive_fraction
=
0.5
,
is_static
=
False
):
"""Constructs a minibatch sampler.
Args:
positive_fraction: desired fraction of positive examples (scalar in [0,1])
in the batch.
is_static: If True, uses an implementation with static shape guarantees.
Raises:
ValueError: if positive_fraction < 0, or positive_fraction > 1
"""
if
positive_fraction
<
0
or
positive_fraction
>
1
:
raise
ValueError
(
'positive_fraction should be in range [0,1]. '
'Received: %s.'
%
positive_fraction
)
self
.
_positive_fraction
=
positive_fraction
self
.
_is_static
=
is_static
@
staticmethod
def
subsample_indicator
(
indicator
,
num_samples
):
"""Subsample indicator vector.
Given a boolean indicator vector with M elements set to `True`, the function
assigns all but `num_samples` of these previously `True` elements to
`False`. If `num_samples` is greater than M, the original indicator vector
is returned.
Args:
indicator: a 1-dimensional boolean tensor indicating which elements
are allowed to be sampled and which are not.
num_samples: int32 scalar tensor
Returns:
a boolean tensor with the same shape as input (indicator) tensor
"""
indices
=
tf
.
where
(
indicator
)
indices
=
tf
.
random
.
shuffle
(
indices
)
indices
=
tf
.
reshape
(
indices
,
[
-
1
])
num_samples
=
tf
.
minimum
(
tf
.
size
(
input
=
indices
),
num_samples
)
selected_indices
=
tf
.
slice
(
indices
,
[
0
],
tf
.
reshape
(
num_samples
,
[
1
]))
selected_indicator
=
indices_to_dense_vector
(
selected_indices
,
tf
.
shape
(
input
=
indicator
)[
0
])
return
tf
.
equal
(
selected_indicator
,
1
)
def
_get_num_pos_neg_samples
(
self
,
sorted_indices_tensor
,
sample_size
):
"""Counts the number of positives and negatives numbers to be sampled.
Args:
sorted_indices_tensor: A sorted int32 tensor of shape [N] which contains
the signed indices of the examples where the sign is based on the label
value. The examples that cannot be sampled are set to 0. It samples
at most sample_size*positive_fraction positive examples and remaining
from negative examples.
sample_size: Size of subsamples.
Returns:
A tuple containing the number of positive and negative labels in the
subsample.
"""
input_length
=
tf
.
shape
(
input
=
sorted_indices_tensor
)[
0
]
valid_positive_index
=
tf
.
greater
(
sorted_indices_tensor
,
tf
.
zeros
(
input_length
,
tf
.
int32
))
num_sampled_pos
=
tf
.
reduce_sum
(
input_tensor
=
tf
.
cast
(
valid_positive_index
,
tf
.
int32
))
max_num_positive_samples
=
tf
.
constant
(
int
(
sample_size
*
self
.
_positive_fraction
),
tf
.
int32
)
num_positive_samples
=
tf
.
minimum
(
max_num_positive_samples
,
num_sampled_pos
)
num_negative_samples
=
tf
.
constant
(
sample_size
,
tf
.
int32
)
-
num_positive_samples
return
num_positive_samples
,
num_negative_samples
def
_get_values_from_start_and_end
(
self
,
input_tensor
,
num_start_samples
,
num_end_samples
,
total_num_samples
):
"""slices num_start_samples and last num_end_samples from input_tensor.
Args:
input_tensor: An int32 tensor of shape [N] to be sliced.
num_start_samples: Number of examples to be sliced from the beginning
of the input tensor.
num_end_samples: Number of examples to be sliced from the end of the
input tensor.
total_num_samples: Sum of is num_start_samples and num_end_samples. This
should be a scalar.
Returns:
A tensor containing the first num_start_samples and last num_end_samples
from input_tensor.
"""
input_length
=
tf
.
shape
(
input
=
input_tensor
)[
0
]
start_positions
=
tf
.
less
(
tf
.
range
(
input_length
),
num_start_samples
)
end_positions
=
tf
.
greater_equal
(
tf
.
range
(
input_length
),
input_length
-
num_end_samples
)
selected_positions
=
tf
.
logical_or
(
start_positions
,
end_positions
)
selected_positions
=
tf
.
cast
(
selected_positions
,
tf
.
float32
)
indexed_positions
=
tf
.
multiply
(
tf
.
cumsum
(
selected_positions
),
selected_positions
)
one_hot_selector
=
tf
.
one_hot
(
tf
.
cast
(
indexed_positions
,
tf
.
int32
)
-
1
,
total_num_samples
,
dtype
=
tf
.
float32
)
return
tf
.
cast
(
tf
.
tensordot
(
tf
.
cast
(
input_tensor
,
tf
.
float32
),
one_hot_selector
,
axes
=
[
0
,
0
]),
tf
.
int32
)
def
_static_subsample
(
self
,
indicator
,
batch_size
,
labels
):
"""Returns subsampled minibatch.
Args:
indicator: boolean tensor of shape [N] whose True entries can be sampled.
N should be a complie time constant.
batch_size: desired batch size. This scalar cannot be None.
labels: boolean tensor of shape [N] denoting positive(=True) and negative
(=False) examples. N should be a complie time constant.
Returns:
sampled_idx_indicator: boolean tensor of shape [N], True for entries which
are sampled. It ensures the length of output of the subsample is always
batch_size, even when number of examples set to True in indicator is
less than batch_size.
Raises:
ValueError: if labels and indicator are not 1D boolean tensors.
"""
# Check if indicator and labels have a static size.
if
not
indicator
.
shape
.
is_fully_defined
():
raise
ValueError
(
'indicator must be static in shape when is_static is'
'True'
)
if
not
labels
.
shape
.
is_fully_defined
():
raise
ValueError
(
'labels must be static in shape when is_static is'
'True'
)
if
not
isinstance
(
batch_size
,
int
):
raise
ValueError
(
'batch_size has to be an integer when is_static is'
'True.'
)
input_length
=
tf
.
shape
(
input
=
indicator
)[
0
]
# Set the number of examples set True in indicator to be at least
# batch_size.
num_true_sampled
=
tf
.
reduce_sum
(
input_tensor
=
tf
.
cast
(
indicator
,
tf
.
float32
))
additional_false_sample
=
tf
.
less_equal
(
tf
.
cumsum
(
tf
.
cast
(
tf
.
logical_not
(
indicator
),
tf
.
float32
)),
batch_size
-
num_true_sampled
)
indicator
=
tf
.
logical_or
(
indicator
,
additional_false_sample
)
# Shuffle indicator and label. Need to store the permutation to restore the
# order post sampling.
permutation
=
tf
.
random
.
shuffle
(
tf
.
range
(
input_length
))
indicator
=
matmul_gather_on_zeroth_axis
(
tf
.
cast
(
indicator
,
tf
.
float32
),
permutation
)
labels
=
matmul_gather_on_zeroth_axis
(
tf
.
cast
(
labels
,
tf
.
float32
),
permutation
)
# index (starting from 1) when indicator is True, 0 when False
indicator_idx
=
tf
.
where
(
tf
.
cast
(
indicator
,
tf
.
bool
),
tf
.
range
(
1
,
input_length
+
1
),
tf
.
zeros
(
input_length
,
tf
.
int32
))
# Replace -1 for negative, +1 for positive labels
signed_label
=
tf
.
where
(
tf
.
cast
(
labels
,
tf
.
bool
),
tf
.
ones
(
input_length
,
tf
.
int32
),
tf
.
scalar_mul
(
-
1
,
tf
.
ones
(
input_length
,
tf
.
int32
)))
# negative of index for negative label, positive index for positive label,
# 0 when indicator is False.
signed_indicator_idx
=
tf
.
multiply
(
indicator_idx
,
signed_label
)
sorted_signed_indicator_idx
=
tf
.
nn
.
top_k
(
signed_indicator_idx
,
input_length
,
sorted
=
True
).
values
[
num_positive_samples
,
num_negative_samples
]
=
self
.
_get_num_pos_neg_samples
(
sorted_signed_indicator_idx
,
batch_size
)
sampled_idx
=
self
.
_get_values_from_start_and_end
(
sorted_signed_indicator_idx
,
num_positive_samples
,
num_negative_samples
,
batch_size
)
# Shift the indices to start from 0 and remove any samples that are set as
# False.
sampled_idx
=
tf
.
abs
(
sampled_idx
)
-
tf
.
ones
(
batch_size
,
tf
.
int32
)
sampled_idx
=
tf
.
multiply
(
tf
.
cast
(
tf
.
greater_equal
(
sampled_idx
,
tf
.
constant
(
0
)),
tf
.
int32
),
sampled_idx
)
sampled_idx_indicator
=
tf
.
cast
(
tf
.
reduce_sum
(
input_tensor
=
tf
.
one_hot
(
sampled_idx
,
depth
=
input_length
),
axis
=
0
),
tf
.
bool
)
# project back the order based on stored permutations
reprojections
=
tf
.
one_hot
(
permutation
,
depth
=
input_length
,
dtype
=
tf
.
float32
)
return
tf
.
cast
(
tf
.
tensordot
(
tf
.
cast
(
sampled_idx_indicator
,
tf
.
float32
),
reprojections
,
axes
=
[
0
,
0
]),
tf
.
bool
)
def
subsample
(
self
,
indicator
,
batch_size
,
labels
,
scope
=
None
):
"""Returns subsampled minibatch.
Args:
indicator: boolean tensor of shape [N] whose True entries can be sampled.
batch_size: desired batch size. If None, keeps all positive samples and
randomly selects negative samples so that the positive sample fraction
matches self._positive_fraction. It cannot be None is is_static is True.
labels: boolean tensor of shape [N] denoting positive(=True) and negative
(=False) examples.
scope: name scope.
Returns:
sampled_idx_indicator: boolean tensor of shape [N], True for entries which
are sampled.
Raises:
ValueError: if labels and indicator are not 1D boolean tensors.
"""
if
len
(
indicator
.
get_shape
().
as_list
())
!=
1
:
raise
ValueError
(
'indicator must be 1 dimensional, got a tensor of '
'shape %s'
%
indicator
.
get_shape
())
if
len
(
labels
.
get_shape
().
as_list
())
!=
1
:
raise
ValueError
(
'labels must be 1 dimensional, got a tensor of '
'shape %s'
%
labels
.
get_shape
())
if
labels
.
dtype
!=
tf
.
bool
:
raise
ValueError
(
'labels should be of type bool. Received: %s'
%
labels
.
dtype
)
if
indicator
.
dtype
!=
tf
.
bool
:
raise
ValueError
(
'indicator should be of type bool. Received: %s'
%
indicator
.
dtype
)
scope
=
scope
or
'BalancedPositiveNegativeSampler'
with
tf
.
name_scope
(
scope
):
if
self
.
_is_static
:
return
self
.
_static_subsample
(
indicator
,
batch_size
,
labels
)
else
:
# Only sample from indicated samples
negative_idx
=
tf
.
logical_not
(
labels
)
positive_idx
=
tf
.
logical_and
(
labels
,
indicator
)
negative_idx
=
tf
.
logical_and
(
negative_idx
,
indicator
)
# Sample positive and negative samples separately
if
batch_size
is
None
:
max_num_pos
=
tf
.
reduce_sum
(
input_tensor
=
tf
.
cast
(
positive_idx
,
dtype
=
tf
.
int32
))
else
:
max_num_pos
=
int
(
self
.
_positive_fraction
*
batch_size
)
sampled_pos_idx
=
self
.
subsample_indicator
(
positive_idx
,
max_num_pos
)
num_sampled_pos
=
tf
.
reduce_sum
(
input_tensor
=
tf
.
cast
(
sampled_pos_idx
,
tf
.
int32
))
if
batch_size
is
None
:
negative_positive_ratio
=
(
1
-
self
.
_positive_fraction
)
/
self
.
_positive_fraction
max_num_neg
=
tf
.
cast
(
negative_positive_ratio
*
tf
.
cast
(
num_sampled_pos
,
dtype
=
tf
.
float32
),
dtype
=
tf
.
int32
)
else
:
max_num_neg
=
batch_size
-
num_sampled_pos
sampled_neg_idx
=
self
.
subsample_indicator
(
negative_idx
,
max_num_neg
)
return
tf
.
logical_or
(
sampled_pos_idx
,
sampled_neg_idx
)
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment