Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
cc748b2a
Commit
cc748b2a
authored
Sep 02, 2020
by
Abdullah Rashwan
Committed by
A. Unique TensorFlower
Sep 02, 2020
Browse files
Internal change
PiperOrigin-RevId: 329754787
parent
2f788e1d
Changes
110
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
4341 additions
and
0 deletions
+4341
-0
official/vision/beta/modeling/layers/box_matcher.py
official/vision/beta/modeling/layers/box_matcher.py
+141
-0
official/vision/beta/modeling/layers/box_matcher_test.py
official/vision/beta/modeling/layers/box_matcher_test.py
+109
-0
official/vision/beta/modeling/layers/box_sampler.py
official/vision/beta/modeling/layers/box_sampler.py
+92
-0
official/vision/beta/modeling/layers/box_sampler_test.py
official/vision/beta/modeling/layers/box_sampler_test.py
+69
-0
official/vision/beta/modeling/layers/detection_generator.py
official/vision/beta/modeling/layers/detection_generator.py
+635
-0
official/vision/beta/modeling/layers/detection_generator_test.py
...l/vision/beta/modeling/layers/detection_generator_test.py
+206
-0
official/vision/beta/modeling/layers/mask_sampler.py
official/vision/beta/modeling/layers/mask_sampler.py
+170
-0
official/vision/beta/modeling/layers/mask_sampler_test.py
official/vision/beta/modeling/layers/mask_sampler_test.py
+137
-0
official/vision/beta/modeling/layers/nn_blocks.py
official/vision/beta/modeling/layers/nn_blocks.py
+991
-0
official/vision/beta/modeling/layers/nn_blocks_3d.py
official/vision/beta/modeling/layers/nn_blocks_3d.py
+244
-0
official/vision/beta/modeling/layers/nn_blocks_3d_test.py
official/vision/beta/modeling/layers/nn_blocks_3d_test.py
+56
-0
official/vision/beta/modeling/layers/nn_blocks_test.py
official/vision/beta/modeling/layers/nn_blocks_test.py
+306
-0
official/vision/beta/modeling/layers/nn_layers.py
official/vision/beta/modeling/layers/nn_layers.py
+147
-0
official/vision/beta/modeling/layers/roi_aligner.py
official/vision/beta/modeling/layers/roi_aligner.py
+71
-0
official/vision/beta/modeling/layers/roi_aligner_test.py
official/vision/beta/modeling/layers/roi_aligner_test.py
+42
-0
official/vision/beta/modeling/layers/roi_generator.py
official/vision/beta/modeling/layers/roi_generator.py
+309
-0
official/vision/beta/modeling/layers/roi_generator_test.py
official/vision/beta/modeling/layers/roi_generator_test.py
+210
-0
official/vision/beta/modeling/layers/roi_sampler.py
official/vision/beta/modeling/layers/roi_sampler.py
+134
-0
official/vision/beta/modeling/layers/roi_sampler_test.py
official/vision/beta/modeling/layers/roi_sampler_test.py
+76
-0
official/vision/beta/modeling/maskrcnn_model.py
official/vision/beta/modeling/maskrcnn_model.py
+196
-0
No files found.
official/vision/beta/modeling/layers/box_matcher.py
0 → 100644
View file @
cc748b2a
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Box matcher."""
# Import libraries
import
tensorflow
as
tf
from
official.vision.beta.ops
import
box_ops
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
BoxMatcher
(
tf
.
keras
.
layers
.
Layer
):
"""Match boxes with groundtruth boxes."""
def
__init__
(
self
,
foreground_iou_threshold
=
0.5
,
background_iou_high_threshold
=
0.5
,
background_iou_low_threshold
=
0
,
**
kwargs
):
"""Initializes a box matcher.
Args:
foreground_iou_threshold: float, represent the IoU threshold for a box to
be considered as positive (if >= `foreground_iou_threshold`).
background_iou_high_threshold: float, represent the IoU threshold for a
box to be considered as negative (if overlap in
[`background_iou_low_threshold`, `background_iou_high_threshold`]).
background_iou_low_threshold: float, represent the IoU threshold for a box
to be considered as negative (if overlap in
[`background_iou_low_threshold`, `background_iou_high_threshold`])
**kwargs: other key word arguments passed to Layer.
"""
self
.
_config_dict
=
{
'foreground_iou_threshold'
:
foreground_iou_threshold
,
'background_iou_high_threshold'
:
background_iou_high_threshold
,
'background_iou_low_threshold'
:
background_iou_low_threshold
,
}
super
(
BoxMatcher
,
self
).
__init__
(
**
kwargs
)
def
call
(
self
,
boxes
,
gt_boxes
,
gt_classes
):
"""Match boxes to groundtruth boxes.
Given the proposal boxes and the groundtruth boxes and classes, perform the
groundtruth matching by taking the argmax of the IoU between boxes and
groundtruth boxes.
Args:
boxes: a tensor of shape of [batch_size, N, 4] representing the box
coordianates to be matched to groundtruth boxes.
gt_boxes: a tensor of shape of [batch_size, MAX_INSTANCES, 4] representing
the groundtruth box coordinates. It is padded with -1s to indicate the
invalid boxes.
gt_classes: [batch_size, MAX_INSTANCES] representing the groundtruth box
classes. It is padded with -1s to indicate the invalid classes.
Returns:
matched_gt_boxes: a tensor of shape of [batch, N, 4], representing
the matched groundtruth box coordinates for each input box. The box is
considered to match to a groundtruth box only if the IoU overlap is
greater than `foreground_iou_threshold`. If the box is a negative match,
or does not overlap with any groundtruth boxes, the matched boxes will
be set to all 0s.
matched_gt_classes: a tensor of shape of [batch, N], representing
the matched groundtruth classes for each input box. If the box is a
negative match or does not overlap with any groundtruth boxes, the
matched classes of it will be set to 0, which corresponds to the
background class.
matched_gt_indices: a tensor of shape of [batch, N], representing the
indices of the matched groundtruth boxes in the original gt_boxes
tensor. If the box is a negative match or does not overlap with any
groundtruth boxes, the index of the matched groundtruth will be set to
-1.
positive_matches: a bool tensor of shape of [batch, N], representing
whether each box is a positive matches or not. A positive match is the
case where IoU of a box with any groundtruth box is greater than
`foreground_iou_threshold`.
negative_matches: a bool tensor of shape of [batch, N], representing
whether each box is a negative matches or not. A negative match is the
case where IoU of a box with any groundtruth box is greater than
`background_iou_low_threshold` and less than
`background_iou_low_threshold`.
ignored_matches: a bool tensor of shape of [batch, N], representing
whether each box is an ignored matches or not. An ignored matches is the
match that is neither positive or negative.
"""
matched_gt_boxes
,
matched_gt_classes
,
matched_gt_indices
,
matched_iou
,
_
=
(
box_ops
.
box_matching
(
boxes
,
gt_boxes
,
gt_classes
))
positive_matches
=
tf
.
greater
(
matched_iou
,
self
.
_config_dict
[
'foreground_iou_threshold'
])
negative_matches
=
tf
.
logical_and
(
tf
.
greater_equal
(
matched_iou
,
self
.
_config_dict
[
'background_iou_low_threshold'
]),
tf
.
less
(
matched_iou
,
self
.
_config_dict
[
'background_iou_high_threshold'
]))
ignored_matches
=
tf
.
logical_and
(
tf
.
less
(
matched_iou
,
0.0
),
tf
.
greater_equal
(
matched_iou
,
self
.
_config_dict
[
'background_iou_high_threshold'
]))
ignored_matches
=
tf
.
logical_and
(
ignored_matches
,
tf
.
less
(
matched_iou
,
self
.
_config_dict
[
'foreground_iou_threshold'
]))
background_indicator
=
tf
.
logical_or
(
negative_matches
,
ignored_matches
)
# re-assign negatively matched boxes to the background class.
matched_gt_boxes
=
tf
.
where
(
tf
.
tile
(
tf
.
expand_dims
(
background_indicator
,
-
1
),
[
1
,
1
,
4
]),
tf
.
zeros_like
(
matched_gt_boxes
),
matched_gt_boxes
)
matched_gt_classes
=
tf
.
where
(
background_indicator
,
tf
.
zeros_like
(
matched_gt_classes
),
matched_gt_classes
)
matched_gt_indices
=
tf
.
where
(
background_indicator
,
-
tf
.
ones_like
(
matched_gt_indices
),
matched_gt_indices
)
return
(
matched_gt_boxes
,
matched_gt_classes
,
matched_gt_indices
,
positive_matches
,
negative_matches
,
ignored_matches
)
def
get_config
(
self
):
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
):
return
cls
(
**
config
)
official/vision/beta/modeling/layers/box_matcher_test.py
0 → 100644
View file @
cc748b2a
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for box_matcher.py."""
# Import libraries
import
numpy
as
np
import
tensorflow
as
tf
from
official.vision.beta.modeling.layers
import
box_matcher
class
BoxMatcherTest
(
tf
.
test
.
TestCase
):
def
test_box_matcher
(
self
):
boxes_np
=
np
.
array
(
[[
[
0
,
0
,
1
,
1
],
[
5
,
0
,
10
,
5
],
]])
boxes
=
tf
.
constant
(
boxes_np
,
dtype
=
tf
.
float32
)
gt_boxes_np
=
np
.
array
(
[[
[
0
,
0
,
5
,
5
],
[
0
,
5
,
5
,
10
],
[
5
,
0
,
10
,
5
],
[
5
,
5
,
10
,
10
],
]])
gt_boxes
=
tf
.
constant
(
gt_boxes_np
,
dtype
=
tf
.
float32
)
gt_classes_np
=
np
.
array
([[
2
,
10
,
3
,
-
1
]])
gt_classes
=
tf
.
constant
(
gt_classes_np
,
dtype
=
tf
.
int32
)
fg_threshold
=
0.5
bg_thresh_hi
=
0.2
bg_thresh_lo
=
0.0
matcher
=
box_matcher
.
BoxMatcher
(
fg_threshold
,
bg_thresh_hi
,
bg_thresh_lo
)
# Runs on TPU.
strategy
=
tf
.
distribute
.
experimental
.
TPUStrategy
()
with
strategy
.
scope
():
(
matched_gt_boxes_tpu
,
matched_gt_classes_tpu
,
matched_gt_indices_tpu
,
positive_matches_tpu
,
negative_matches_tpu
,
ignored_matches_tpu
)
=
(
matcher
(
boxes
,
gt_boxes
,
gt_classes
))
# Runs on CPU.
(
matched_gt_boxes_cpu
,
matched_gt_classes_cpu
,
matched_gt_indices_cpu
,
positive_matches_cpu
,
negative_matches_cpu
,
ignored_matches_cpu
)
=
(
matcher
(
boxes
,
gt_boxes
,
gt_classes
))
# correctness
self
.
assertNDArrayNear
(
matched_gt_boxes_cpu
.
numpy
(),
[[[
0
,
0
,
0
,
0
],
[
5
,
0
,
10
,
5
]]],
1e-4
)
self
.
assertAllEqual
(
matched_gt_classes_cpu
.
numpy
(),
[[
0
,
3
]])
self
.
assertAllEqual
(
matched_gt_indices_cpu
.
numpy
(),
[[
-
1
,
2
]])
self
.
assertAllEqual
(
positive_matches_cpu
.
numpy
(),
[[
False
,
True
]])
self
.
assertAllEqual
(
negative_matches_cpu
.
numpy
(),
[[
True
,
False
]])
self
.
assertAllEqual
(
ignored_matches_cpu
.
numpy
(),
[[
False
,
False
]])
# consistency.
self
.
assertNDArrayNear
(
matched_gt_boxes_cpu
.
numpy
(),
matched_gt_boxes_tpu
.
numpy
(),
1e-4
)
self
.
assertAllEqual
(
matched_gt_classes_cpu
.
numpy
(),
matched_gt_classes_tpu
.
numpy
())
self
.
assertAllEqual
(
matched_gt_indices_cpu
.
numpy
(),
matched_gt_indices_tpu
.
numpy
())
self
.
assertAllEqual
(
positive_matches_cpu
.
numpy
(),
positive_matches_tpu
.
numpy
())
self
.
assertAllEqual
(
negative_matches_cpu
.
numpy
(),
negative_matches_tpu
.
numpy
())
self
.
assertAllEqual
(
ignored_matches_cpu
.
numpy
(),
ignored_matches_tpu
.
numpy
())
def
test_serialize_deserialize
(
self
):
kwargs
=
dict
(
foreground_iou_threshold
=
0.5
,
background_iou_high_threshold
=
0.5
,
background_iou_low_threshold
=
0.5
,
)
matcher
=
box_matcher
.
BoxMatcher
(
**
kwargs
)
expected_config
=
dict
(
kwargs
)
self
.
assertEqual
(
matcher
.
get_config
(),
expected_config
)
new_matcher
=
box_matcher
.
BoxMatcher
.
from_config
(
matcher
.
get_config
())
self
.
assertAllEqual
(
matcher
.
get_config
(),
new_matcher
.
get_config
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/beta/modeling/layers/box_sampler.py
0 → 100644
View file @
cc748b2a
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Box sampler."""
# Import libraries
import
tensorflow
as
tf
from
official.vision.beta.ops
import
sampling_ops
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
BoxSampler
(
tf
.
keras
.
layers
.
Layer
):
"""Sample positive and negative boxes."""
def
__init__
(
self
,
num_samples
=
512
,
foreground_fraction
=
0.25
,
**
kwargs
):
"""Initializes a ROI sampler.
Args:
num_samples: int, the number of sampled boxes per image.
foreground_fraction: float in [0, 1], what percentage of boxes should be
sampled from the positive examples.
**kwargs: other key word arguments passed to Layer.
"""
self
.
_config_dict
=
{
'num_samples'
:
num_samples
,
'foreground_fraction'
:
foreground_fraction
,
}
super
(
BoxSampler
,
self
).
__init__
(
**
kwargs
)
def
call
(
self
,
positive_matches
,
negative_matches
,
ignored_matches
):
"""Sample and select positive and negative instances.
Args:
positive_matches: a `bool` tensor of shape of [batch, N] where N is the
number of instances. For each element, `True` means the instance
corresponds to a positive example.
negative_matches: a `bool` tensor of shape of [batch, N] where N is the
number of instances. For each element, `True` means the instance
corresponds to a negative example.
ignored_matches: a `bool` tensor of shape of [batch, N] where N is the
number of instances. For each element, `True` means the instance
should be ignored.
Returns:
selected_indices: a tensor of shape of [batch_size, K], storing the
indices of the sampled examples, where K is `num_samples`.
"""
sample_candidates
=
tf
.
logical_and
(
tf
.
logical_or
(
positive_matches
,
negative_matches
),
tf
.
logical_not
(
ignored_matches
))
sampler
=
sampling_ops
.
BalancedPositiveNegativeSampler
(
positive_fraction
=
self
.
_config_dict
[
'foreground_fraction'
],
is_static
=
True
)
batch_size
=
sample_candidates
.
shape
[
0
]
sampled_indicators
=
[]
for
i
in
range
(
batch_size
):
sampled_indicator
=
sampler
.
subsample
(
sample_candidates
[
i
],
self
.
_config_dict
[
'num_samples'
],
positive_matches
[
i
])
sampled_indicators
.
append
(
sampled_indicator
)
sampled_indicators
=
tf
.
stack
(
sampled_indicators
)
_
,
selected_indices
=
tf
.
nn
.
top_k
(
tf
.
cast
(
sampled_indicators
,
dtype
=
tf
.
int32
),
k
=
self
.
_config_dict
[
'num_samples'
],
sorted
=
True
)
return
selected_indices
def
get_config
(
self
):
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
):
return
cls
(
**
config
)
official/vision/beta/modeling/layers/box_sampler_test.py
0 → 100644
View file @
cc748b2a
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for roi_sampler.py."""
# Import libraries
import
numpy
as
np
import
tensorflow
as
tf
from
official.vision.beta.modeling.layers
import
box_sampler
class
BoxSamplerTest
(
tf
.
test
.
TestCase
):
def
test_box_sampler
(
self
):
positive_matches
=
np
.
array
(
[[
True
,
False
,
False
,
False
,
True
,
True
,
False
],
[
False
,
False
,
False
,
False
,
False
,
True
,
True
]])
negative_matches
=
np
.
array
(
[[
False
,
True
,
True
,
True
,
False
,
False
,
False
],
[
True
,
True
,
True
,
True
,
False
,
False
,
False
]])
ignored_matches
=
np
.
array
(
[[
False
,
False
,
False
,
False
,
False
,
False
,
True
],
[
False
,
False
,
False
,
False
,
True
,
False
,
False
]])
sampler
=
box_sampler
.
BoxSampler
(
num_samples
=
2
,
foreground_fraction
=
0.5
)
# Runs on TPU.
strategy
=
tf
.
distribute
.
experimental
.
TPUStrategy
()
with
strategy
.
scope
():
selected_indices_tpu
=
sampler
(
positive_matches
,
negative_matches
,
ignored_matches
)
self
.
assertEqual
(
2
,
tf
.
shape
(
selected_indices_tpu
)[
1
])
# Runs on CPU.
selected_indices_cpu
=
sampler
(
positive_matches
,
negative_matches
,
ignored_matches
)
self
.
assertEqual
(
2
,
tf
.
shape
(
selected_indices_cpu
)[
1
])
def
test_serialize_deserialize
(
self
):
kwargs
=
dict
(
num_samples
=
512
,
foreground_fraction
=
0.25
,
)
sampler
=
box_sampler
.
BoxSampler
(
**
kwargs
)
expected_config
=
dict
(
kwargs
)
self
.
assertEqual
(
sampler
.
get_config
(),
expected_config
)
new_sampler
=
box_sampler
.
BoxSampler
.
from_config
(
sampler
.
get_config
())
self
.
assertAllEqual
(
sampler
.
get_config
(),
new_sampler
.
get_config
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/beta/modeling/layers/detection_generator.py
0 → 100644
View file @
cc748b2a
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Generators to generate the final detections."""
# Import libraries
import
tensorflow
as
tf
from
official.vision.beta.ops
import
box_ops
from
official.vision.beta.ops
import
nms
def
_generate_detections_v1
(
boxes
,
scores
,
pre_nms_top_k
=
5000
,
pre_nms_score_threshold
=
0.05
,
nms_iou_threshold
=
0.5
,
max_num_detections
=
100
):
"""Generate the final detections given the model outputs.
The implementation unrolls the batch dimension and process images one by one.
It required the batch dimension to be statically known and it is TPU
compatible.
Args:
boxes: a tensor with shape [batch_size, N, num_classes, 4] or
[batch_size, N, 1, 4], which box predictions on all feature levels. The N
is the number of total anchors on all levels.
scores: a tensor with shape [batch_size, N, num_classes], which
stacks class probability on all feature levels. The N is the number of
total anchors on all levels. The num_classes is the number of classes
predicted by the model. Note that the class_outputs here is the raw score.
pre_nms_top_k: an int number of top candidate detections per class
before NMS.
pre_nms_score_threshold: a float representing the threshold for deciding
when to remove boxes based on score.
nms_iou_threshold: a float representing the threshold for deciding whether
boxes overlap too much with respect to IOU.
max_num_detections: a scalar representing maximum number of boxes retained
over all classes.
Returns:
nms_boxes: `float` Tensor of shape [batch_size, max_num_detections, 4]
representing top detected boxes in [y1, x1, y2, x2].
nms_scores: `float` Tensor of shape [batch_size, max_num_detections]
representing sorted confidence scores for detected boxes. The values are
between [0, 1].
nms_classes: `int` Tensor of shape [batch_size, max_num_detections]
representing classes for detected boxes.
valid_detections: `int` Tensor of shape [batch_size] only the top
`valid_detections` boxes are valid detections.
"""
with
tf
.
name_scope
(
'generate_detections'
):
batch_size
=
scores
.
get_shape
().
as_list
()[
0
]
nmsed_boxes
=
[]
nmsed_classes
=
[]
nmsed_scores
=
[]
valid_detections
=
[]
for
i
in
range
(
batch_size
):
(
nmsed_boxes_i
,
nmsed_scores_i
,
nmsed_classes_i
,
valid_detections_i
)
=
_generate_detections_per_image
(
boxes
[
i
],
scores
[
i
],
max_num_detections
,
nms_iou_threshold
,
pre_nms_score_threshold
,
pre_nms_top_k
)
nmsed_boxes
.
append
(
nmsed_boxes_i
)
nmsed_scores
.
append
(
nmsed_scores_i
)
nmsed_classes
.
append
(
nmsed_classes_i
)
valid_detections
.
append
(
valid_detections_i
)
nmsed_boxes
=
tf
.
stack
(
nmsed_boxes
,
axis
=
0
)
nmsed_scores
=
tf
.
stack
(
nmsed_scores
,
axis
=
0
)
nmsed_classes
=
tf
.
stack
(
nmsed_classes
,
axis
=
0
)
valid_detections
=
tf
.
stack
(
valid_detections
,
axis
=
0
)
return
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
def
_generate_detections_per_image
(
boxes
,
scores
,
pre_nms_top_k
=
5000
,
pre_nms_score_threshold
=
0.05
,
nms_iou_threshold
=
0.5
,
max_num_detections
=
100
):
"""Generate the final detections per image given the model outputs.
Args:
boxes: a tensor with shape [N, num_classes, 4] or [N, 1, 4], which box
predictions on all feature levels. The N is the number of total anchors on
all levels.
scores: a tensor with shape [N, num_classes], which stacks class probability
on all feature levels. The N is the number of total anchors on all levels.
The num_classes is the number of classes predicted by the model. Note that
the class_outputs here is the raw score.
pre_nms_top_k: an int number of top candidate detections per class
before NMS.
pre_nms_score_threshold: a float representing the threshold for deciding
when to remove boxes based on score.
nms_iou_threshold: a float representing the threshold for deciding whether
boxes overlap too much with respect to IOU.
max_num_detections: a scalar representing maximum number of boxes retained
over all classes.
Returns:
nms_boxes: `float` Tensor of shape [max_num_detections, 4] representing top
detected boxes in [y1, x1, y2, x2].
nms_scores: `float` Tensor of shape [max_num_detections] representing sorted
confidence scores for detected boxes. The values are between [0, 1].
nms_classes: `int` Tensor of shape [max_num_detections] representing classes
for detected boxes.
valid_detections: `int` Tensor of shape [1] only the top `valid_detections`
boxes are valid detections.
"""
nmsed_boxes
=
[]
nmsed_scores
=
[]
nmsed_classes
=
[]
num_classes_for_box
=
boxes
.
get_shape
().
as_list
()[
1
]
num_classes
=
scores
.
get_shape
().
as_list
()[
1
]
for
i
in
range
(
num_classes
):
boxes_i
=
boxes
[:,
min
(
num_classes_for_box
-
1
,
i
)]
scores_i
=
scores
[:,
i
]
# Obtains pre_nms_top_k before running NMS.
scores_i
,
indices
=
tf
.
nn
.
top_k
(
scores_i
,
k
=
tf
.
minimum
(
tf
.
shape
(
scores_i
)[
-
1
],
pre_nms_top_k
))
boxes_i
=
tf
.
gather
(
boxes_i
,
indices
)
(
nmsed_indices_i
,
nmsed_num_valid_i
)
=
tf
.
image
.
non_max_suppression_padded
(
tf
.
cast
(
boxes_i
,
tf
.
float32
),
tf
.
cast
(
scores_i
,
tf
.
float32
),
max_num_detections
,
iou_threshold
=
nms_iou_threshold
,
score_threshold
=
pre_nms_score_threshold
,
pad_to_max_output_size
=
True
,
name
=
'nms_detections_'
+
str
(
i
))
nmsed_boxes_i
=
tf
.
gather
(
boxes_i
,
nmsed_indices_i
)
nmsed_scores_i
=
tf
.
gather
(
scores_i
,
nmsed_indices_i
)
# Sets scores of invalid boxes to -1.
nmsed_scores_i
=
tf
.
where
(
tf
.
less
(
tf
.
range
(
max_num_detections
),
[
nmsed_num_valid_i
]),
nmsed_scores_i
,
-
tf
.
ones_like
(
nmsed_scores_i
))
nmsed_classes_i
=
tf
.
fill
([
max_num_detections
],
i
)
nmsed_boxes
.
append
(
nmsed_boxes_i
)
nmsed_scores
.
append
(
nmsed_scores_i
)
nmsed_classes
.
append
(
nmsed_classes_i
)
# Concats results from all classes and sort them.
nmsed_boxes
=
tf
.
concat
(
nmsed_boxes
,
axis
=
0
)
nmsed_scores
=
tf
.
concat
(
nmsed_scores
,
axis
=
0
)
nmsed_classes
=
tf
.
concat
(
nmsed_classes
,
axis
=
0
)
nmsed_scores
,
indices
=
tf
.
nn
.
top_k
(
nmsed_scores
,
k
=
max_num_detections
,
sorted
=
True
)
nmsed_boxes
=
tf
.
gather
(
nmsed_boxes
,
indices
)
nmsed_classes
=
tf
.
gather
(
nmsed_classes
,
indices
)
valid_detections
=
tf
.
reduce_sum
(
tf
.
cast
(
tf
.
greater
(
nmsed_scores
,
-
1
),
tf
.
int32
))
return
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
def
_select_top_k_scores
(
scores_in
,
pre_nms_num_detections
):
"""Select top_k scores and indices for each class.
Args:
scores_in: a Tensor with shape [batch_size, N, num_classes], which stacks
class logit outputs on all feature levels. The N is the number of total
anchors on all levels. The num_classes is the number of classes predicted
by the model.
pre_nms_num_detections: Number of candidates before NMS.
Returns:
scores and indices: Tensors with shape [batch_size, pre_nms_num_detections,
num_classes].
"""
batch_size
,
num_anchors
,
num_class
=
scores_in
.
get_shape
().
as_list
()
scores_trans
=
tf
.
transpose
(
scores_in
,
perm
=
[
0
,
2
,
1
])
scores_trans
=
tf
.
reshape
(
scores_trans
,
[
-
1
,
num_anchors
])
top_k_scores
,
top_k_indices
=
tf
.
nn
.
top_k
(
scores_trans
,
k
=
pre_nms_num_detections
,
sorted
=
True
)
top_k_scores
=
tf
.
reshape
(
top_k_scores
,
[
batch_size
,
num_class
,
pre_nms_num_detections
])
top_k_indices
=
tf
.
reshape
(
top_k_indices
,
[
batch_size
,
num_class
,
pre_nms_num_detections
])
return
tf
.
transpose
(
top_k_scores
,
[
0
,
2
,
1
]),
tf
.
transpose
(
top_k_indices
,
[
0
,
2
,
1
])
def
_generate_detections_v2
(
boxes
,
scores
,
pre_nms_top_k
=
5000
,
pre_nms_score_threshold
=
0.05
,
nms_iou_threshold
=
0.5
,
max_num_detections
=
100
):
"""Generate the final detections given the model outputs.
This implementation unrolls classes dimension while using the tf.while_loop
to implement the batched NMS, so that it can be parallelized at the batch
dimension. It should give better performance comparing to v1 implementation.
It is TPU compatible.
Args:
boxes: a tensor with shape [batch_size, N, num_classes, 4] or [batch_size,
N, 1, 4], which box predictions on all feature levels. The N is the number
of total anchors on all levels.
scores: a tensor with shape [batch_size, N, num_classes], which stacks class
probability on all feature levels. The N is the number of total anchors on
all levels. The num_classes is the number of classes predicted by the
model. Note that the class_outputs here is the raw score.
pre_nms_top_k: an int number of top candidate detections per class
before NMS.
pre_nms_score_threshold: a float representing the threshold for deciding
when to remove boxes based on score.
nms_iou_threshold: a float representing the threshold for deciding whether
boxes overlap too much with respect to IOU.
max_num_detections: a scalar representing maximum number of boxes retained
over all classes.
Returns:
nms_boxes: `float` Tensor of shape [batch_size, max_num_detections, 4]
representing top detected boxes in [y1, x1, y2, x2].
nms_scores: `float` Tensor of shape [batch_size, max_num_detections]
representing sorted confidence scores for detected boxes. The values are
between [0, 1].
nms_classes: `int` Tensor of shape [batch_size, max_num_detections]
representing classes for detected boxes.
valid_detections: `int` Tensor of shape [batch_size] only the top
`valid_detections` boxes are valid detections.
"""
with
tf
.
name_scope
(
'generate_detections'
):
nmsed_boxes
=
[]
nmsed_classes
=
[]
nmsed_scores
=
[]
valid_detections
=
[]
batch_size
,
_
,
num_classes_for_box
,
_
=
boxes
.
get_shape
().
as_list
()
_
,
total_anchors
,
num_classes
=
scores
.
get_shape
().
as_list
()
# Selects top pre_nms_num scores and indices before NMS.
scores
,
indices
=
_select_top_k_scores
(
scores
,
min
(
total_anchors
,
pre_nms_top_k
))
for
i
in
range
(
num_classes
):
boxes_i
=
boxes
[:,
:,
min
(
num_classes_for_box
-
1
,
i
),
:]
scores_i
=
scores
[:,
:,
i
]
# Obtains pre_nms_top_k before running NMS.
boxes_i
=
tf
.
gather
(
boxes_i
,
indices
[:,
:,
i
],
batch_dims
=
1
,
axis
=
1
)
# Filter out scores.
boxes_i
,
scores_i
=
box_ops
.
filter_boxes_by_scores
(
boxes_i
,
scores_i
,
min_score_threshold
=
pre_nms_score_threshold
)
(
nmsed_scores_i
,
nmsed_boxes_i
)
=
nms
.
sorted_non_max_suppression_padded
(
tf
.
cast
(
scores_i
,
tf
.
float32
),
tf
.
cast
(
boxes_i
,
tf
.
float32
),
max_num_detections
,
iou_threshold
=
nms_iou_threshold
)
nmsed_classes_i
=
tf
.
fill
([
batch_size
,
max_num_detections
],
i
)
nmsed_boxes
.
append
(
nmsed_boxes_i
)
nmsed_scores
.
append
(
nmsed_scores_i
)
nmsed_classes
.
append
(
nmsed_classes_i
)
nmsed_boxes
=
tf
.
concat
(
nmsed_boxes
,
axis
=
1
)
nmsed_scores
=
tf
.
concat
(
nmsed_scores
,
axis
=
1
)
nmsed_classes
=
tf
.
concat
(
nmsed_classes
,
axis
=
1
)
nmsed_scores
,
indices
=
tf
.
nn
.
top_k
(
nmsed_scores
,
k
=
max_num_detections
,
sorted
=
True
)
nmsed_boxes
=
tf
.
gather
(
nmsed_boxes
,
indices
,
batch_dims
=
1
,
axis
=
1
)
nmsed_classes
=
tf
.
gather
(
nmsed_classes
,
indices
,
batch_dims
=
1
)
valid_detections
=
tf
.
reduce_sum
(
input_tensor
=
tf
.
cast
(
tf
.
greater
(
nmsed_scores
,
-
1
),
tf
.
int32
),
axis
=
1
)
return
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
def
_generate_detections_batched
(
boxes
,
scores
,
pre_nms_score_threshold
,
nms_iou_threshold
,
max_num_detections
):
"""Generates detected boxes with scores and classes for one-stage detector.
The function takes output of multi-level ConvNets and anchor boxes and
generates detected boxes. Note that this used batched nms, which is not
supported on TPU currently.
Args:
boxes: a tensor with shape [batch_size, N, num_classes, 4] or
[batch_size, N, 1, 4], which box predictions on all feature levels. The N
is the number of total anchors on all levels.
scores: a tensor with shape [batch_size, N, num_classes], which
stacks class probability on all feature levels. The N is the number of
total anchors on all levels. The num_classes is the number of classes
predicted by the model. Note that the class_outputs here is the raw score.
pre_nms_score_threshold: a float representing the threshold for deciding
when to remove boxes based on score.
nms_iou_threshold: a float representing the threshold for deciding whether
boxes overlap too much with respect to IOU.
max_num_detections: a scalar representing maximum number of boxes retained
over all classes.
Returns:
nms_boxes: `float` Tensor of shape [batch_size, max_num_detections, 4]
representing top detected boxes in [y1, x1, y2, x2].
nms_scores: `float` Tensor of shape [batch_size, max_num_detections]
representing sorted confidence scores for detected boxes. The values are
between [0, 1].
nms_classes: `int` Tensor of shape [batch_size, max_num_detections]
representing classes for detected boxes.
valid_detections: `int` Tensor of shape [batch_size] only the top
`valid_detections` boxes are valid detections.
"""
with
tf
.
name_scope
(
'generate_detections'
):
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
=
(
tf
.
image
.
combined_non_max_suppression
(
boxes
,
scores
,
max_output_size_per_class
=
max_num_detections
,
max_total_size
=
max_num_detections
,
iou_threshold
=
nms_iou_threshold
,
score_threshold
=
pre_nms_score_threshold
,
pad_per_class
=
False
,
clip_boxes
=
False
))
return
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
DetectionGenerator
(
tf
.
keras
.
layers
.
Layer
):
"""Generates the final detected boxes with scores and classes."""
def
__init__
(
self
,
apply_nms
=
True
,
pre_nms_top_k
=
5000
,
pre_nms_score_threshold
=
0.05
,
nms_iou_threshold
=
0.5
,
max_num_detections
=
100
,
use_batched_nms
=
False
,
**
kwargs
):
"""Initializes a detection generator.
Args:
apply_nms: bool, whether or not apply non maximum suppression. If False,
the decoded boxes and their scores are returned.
pre_nms_top_k: int, the number of top scores proposals to be kept before
applying NMS.
pre_nms_score_threshold: float, the score threshold to apply before
applying NMS. Proposals whose scores are below this threshold are
thrown away.
nms_iou_threshold: float in [0, 1], the NMS IoU threshold.
max_num_detections: int, the final number of total detections to generate.
use_batched_nms: bool, whether or not use
`tf.image.combined_non_max_suppression`.
**kwargs: other key word arguments passed to Layer.
"""
self
.
_config_dict
=
{
'apply_nms'
:
apply_nms
,
'pre_nms_top_k'
:
pre_nms_top_k
,
'pre_nms_score_threshold'
:
pre_nms_score_threshold
,
'nms_iou_threshold'
:
nms_iou_threshold
,
'max_num_detections'
:
max_num_detections
,
'use_batched_nms'
:
use_batched_nms
,
}
super
(
DetectionGenerator
,
self
).
__init__
(
**
kwargs
)
def
__call__
(
self
,
raw_boxes
,
raw_scores
,
anchor_boxes
,
image_shape
):
"""Generate final detections.
Args:
raw_boxes: a tensor of shape of [batch_size, K, num_classes * 4]
representing the class-specific box coordinates relative to anchors.
raw_scores: a tensor of shape of [batch_size, K, num_classes]
representing the class logits before applying score activiation.
anchor_boxes: a tensor of shape of [batch_size, K, 4] representing the
corresponding anchor boxes w.r.t `box_outputs`.
image_shape: a tensor of shape of [batch_size, 2] storing the image height
and width w.r.t. the scaled image, i.e. the same image space as
`box_outputs` and `anchor_boxes`.
Returns:
If `apply_nms` = True, the return is a dictionary with keys:
`detection_boxes`: float Tensor of shape [batch, max_num_detections, 4]
representing top detected boxes in [y1, x1, y2, x2].
`detection_scores`: float Tensor of shape [batch, max_num_detections]
representing sorted confidence scores for detected boxes. The values
are between [0, 1].
`detection_classes`: int Tensor of shape [batch, max_num_detections]
representing classes for detected boxes.
`num_detections`: int Tensor of shape [batch] only the first
`num_detections` boxes are valid detections
If `apply_nms` = False, the return is a dictionary with keys:
`decoded_boxes`: float Tensor of shape [batch, num_raw_boxes, 4]
representing all the decoded boxes.
`decoded_box_scores`: float Tensor of shape [batch, num_raw_boxes]
representing socres of all the decoded boxes.
"""
box_scores
=
tf
.
nn
.
softmax
(
raw_scores
,
axis
=-
1
)
# Removes the background class.
box_scores_shape
=
tf
.
shape
(
box_scores
)
batch_size
=
box_scores_shape
[
0
]
num_locations
=
box_scores_shape
[
1
]
num_classes
=
box_scores_shape
[
-
1
]
num_detections
=
num_locations
*
(
num_classes
-
1
)
box_scores
=
tf
.
slice
(
box_scores
,
[
0
,
0
,
1
],
[
-
1
,
-
1
,
-
1
])
raw_boxes
=
tf
.
reshape
(
raw_boxes
,
tf
.
stack
([
batch_size
,
num_locations
,
num_classes
,
4
],
axis
=-
1
))
raw_boxes
=
tf
.
slice
(
raw_boxes
,
[
0
,
0
,
1
,
0
],
[
-
1
,
-
1
,
-
1
,
-
1
])
anchor_boxes
=
tf
.
tile
(
tf
.
expand_dims
(
anchor_boxes
,
axis
=
2
),
[
1
,
1
,
num_classes
-
1
,
1
])
raw_boxes
=
tf
.
reshape
(
raw_boxes
,
tf
.
stack
([
batch_size
,
num_detections
,
4
],
axis
=-
1
))
anchor_boxes
=
tf
.
reshape
(
anchor_boxes
,
tf
.
stack
([
batch_size
,
num_detections
,
4
],
axis
=-
1
))
# Box decoding.
decoded_boxes
=
box_ops
.
decode_boxes
(
raw_boxes
,
anchor_boxes
,
weights
=
[
10.0
,
10.0
,
5.0
,
5.0
])
# Box clipping
decoded_boxes
=
box_ops
.
clip_boxes
(
decoded_boxes
,
tf
.
expand_dims
(
image_shape
,
axis
=
1
))
decoded_boxes
=
tf
.
reshape
(
decoded_boxes
,
tf
.
stack
([
batch_size
,
num_locations
,
num_classes
-
1
,
4
],
axis
=-
1
))
if
not
self
.
_config_dict
[
'apply_nms'
]:
return
{
'decoded_boxes'
:
decoded_boxes
,
'decoded_box_scores'
:
box_scores
,
}
if
self
.
_config_dict
[
'use_batched_nms'
]:
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
=
(
_generate_detections_batched
(
decoded_boxes
,
box_scores
,
self
.
_config_dict
[
'pre_nms_score_threshold'
],
self
.
_config_dict
[
'nms_iou_threshold'
],
self
.
_config_dict
[
'max_num_detections'
]))
else
:
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
=
(
_generate_detections_v2
(
decoded_boxes
,
box_scores
,
self
.
_config_dict
[
'pre_nms_top_k'
],
self
.
_config_dict
[
'pre_nms_score_threshold'
],
self
.
_config_dict
[
'nms_iou_threshold'
],
self
.
_config_dict
[
'max_num_detections'
]))
# Adds 1 to offset the background class which has index 0.
nmsed_classes
+=
1
return
{
'num_detections'
:
valid_detections
,
'detection_boxes'
:
nmsed_boxes
,
'detection_classes'
:
nmsed_classes
,
'detection_scores'
:
nmsed_scores
,
}
def
get_config
(
self
):
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
):
return
cls
(
**
config
)
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
MultilevelDetectionGenerator
(
tf
.
keras
.
layers
.
Layer
):
"""Generates detected boxes with scores and classes for one-stage detector."""
def
__init__
(
self
,
apply_nms
=
True
,
pre_nms_top_k
=
5000
,
pre_nms_score_threshold
=
0.05
,
nms_iou_threshold
=
0.5
,
max_num_detections
=
100
,
use_batched_nms
=
False
,
**
kwargs
):
"""Initializes a detection generator.
Args:
apply_nms: bool, whether or not apply non maximum suppression. If False,
the decoded boxes and their scores are returned.
pre_nms_top_k: int, the number of top scores proposals to be kept before
applying NMS.
pre_nms_score_threshold: float, the score threshold to apply before
applying NMS. Proposals whose scores are below this threshold are
thrown away.
nms_iou_threshold: float in [0, 1], the NMS IoU threshold.
max_num_detections: int, the final number of total detections to generate.
use_batched_nms: bool, whether or not use
`tf.image.combined_non_max_suppression`.
**kwargs: other key word arguments passed to Layer.
"""
self
.
_config_dict
=
{
'apply_nms'
:
apply_nms
,
'pre_nms_top_k'
:
pre_nms_top_k
,
'pre_nms_score_threshold'
:
pre_nms_score_threshold
,
'nms_iou_threshold'
:
nms_iou_threshold
,
'max_num_detections'
:
max_num_detections
,
'use_batched_nms'
:
use_batched_nms
,
}
super
(
MultilevelDetectionGenerator
,
self
).
__init__
(
**
kwargs
)
def
__call__
(
self
,
raw_boxes
,
raw_scores
,
anchor_boxes
,
image_shape
):
"""Generate final detections.
Args:
raw_boxes: a dict with keys representing FPN levels and values
representing box tenors of shape
[batch, feature_h, feature_w, num_anchors * 4].
raw_scores: a dict with keys representing FPN levels and values
representing logit tensors of shape
[batch, feature_h, feature_w, num_anchors].
anchor_boxes: a tensor of shape of [batch_size, K, 4] representing the
corresponding anchor boxes w.r.t `box_outputs`.
image_shape: a tensor of shape of [batch_size, 2] storing the image height
and width w.r.t. the scaled image, i.e. the same image space as
`box_outputs` and `anchor_boxes`.
Returns:
If `apply_nms` = True, the return is a dictionary with keys:
`detection_boxes`: float Tensor of shape [batch, max_num_detections, 4]
representing top detected boxes in [y1, x1, y2, x2].
`detection_scores`: float Tensor of shape [batch, max_num_detections]
representing sorted confidence scores for detected boxes. The values
are between [0, 1].
`detection_classes`: int Tensor of shape [batch, max_num_detections]
representing classes for detected boxes.
`num_detections`: int Tensor of shape [batch] only the first
`num_detections` boxes are valid detections
If `apply_nms` = False, the return is a dictionary with keys:
`decoded_boxes`: float Tensor of shape [batch, num_raw_boxes, 4]
representing all the decoded boxes.
`decoded_box_scores`: float Tensor of shape [batch, num_raw_boxes]
representing socres of all the decoded boxes.
"""
# Collects outputs from all levels into a list.
boxes
=
[]
scores
=
[]
levels
=
list
(
raw_boxes
.
keys
())
min_level
=
min
(
levels
)
max_level
=
max
(
levels
)
for
i
in
range
(
min_level
,
max_level
+
1
):
raw_boxes_i_shape
=
tf
.
shape
(
raw_boxes
[
i
])
batch_size
=
raw_boxes_i_shape
[
0
]
num_anchors_per_locations
=
raw_boxes_i_shape
[
-
1
]
//
4
num_classes
=
tf
.
shape
(
raw_scores
[
i
])[
-
1
]
//
num_anchors_per_locations
# Applies score transformation and remove the implicit background class.
scores_i
=
tf
.
sigmoid
(
tf
.
reshape
(
raw_scores
[
i
],
[
batch_size
,
-
1
,
num_classes
]))
scores_i
=
tf
.
slice
(
scores_i
,
[
0
,
0
,
1
],
[
-
1
,
-
1
,
-
1
])
# Box decoding.
# The anchor boxes are shared for all data in a batch.
# One stage detector only supports class agnostic box regression.
anchor_boxes_i
=
tf
.
reshape
(
anchor_boxes
[
i
],
[
batch_size
,
-
1
,
4
])
raw_boxes_i
=
tf
.
reshape
(
raw_boxes
[
i
],
[
batch_size
,
-
1
,
4
])
boxes_i
=
box_ops
.
decode_boxes
(
raw_boxes_i
,
anchor_boxes_i
)
# Box clipping.
boxes_i
=
box_ops
.
clip_boxes
(
boxes_i
,
tf
.
expand_dims
(
image_shape
,
axis
=
1
))
boxes
.
append
(
boxes_i
)
scores
.
append
(
scores_i
)
boxes
=
tf
.
concat
(
boxes
,
axis
=
1
)
boxes
=
tf
.
expand_dims
(
boxes
,
axis
=
2
)
scores
=
tf
.
concat
(
scores
,
axis
=
1
)
if
not
self
.
_config_dict
[
'apply_nms'
]:
return
{
'decoded_boxes'
:
boxes
,
'decoded_box_scores'
:
scores
,
}
if
self
.
_config_dict
[
'use_batched_nms'
]:
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
=
(
_generate_detections_batched
(
boxes
,
scores
,
self
.
_config_dict
[
'pre_nms_score_threshold'
],
self
.
_config_dict
[
'nms_iou_threshold'
],
self
.
_config_dict
[
'max_num_detections'
]))
else
:
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
=
(
_generate_detections_v2
(
boxes
,
scores
,
self
.
_config_dict
[
'pre_nms_top_k'
],
self
.
_config_dict
[
'pre_nms_score_threshold'
],
self
.
_config_dict
[
'nms_iou_threshold'
],
self
.
_config_dict
[
'max_num_detections'
]))
# Adds 1 to offset the background class which has index 0.
nmsed_classes
+=
1
return
{
'num_detections'
:
valid_detections
,
'detection_boxes'
:
nmsed_boxes
,
'detection_classes'
:
nmsed_classes
,
'detection_scores'
:
nmsed_scores
,
}
def
get_config
(
self
):
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
):
return
cls
(
**
config
)
official/vision/beta/modeling/layers/detection_generator_test.py
0 → 100644
View file @
cc748b2a
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for detection_generator.py."""
# Import libraries
from
absl.testing
import
parameterized
import
numpy
as
np
import
tensorflow
as
tf
from
official.vision.beta.modeling.layers
import
detection_generator
from
official.vision.beta.ops
import
anchor
class
SelectTopKScoresTest
(
tf
.
test
.
TestCase
):
def
testSelectTopKScores
(
self
):
pre_nms_num_boxes
=
2
scores_data
=
[[[
0.2
,
0.2
],
[
0.1
,
0.9
],
[
0.5
,
0.1
],
[
0.3
,
0.5
]]]
scores_in
=
tf
.
constant
(
scores_data
,
dtype
=
tf
.
float32
)
top_k_scores
,
top_k_indices
=
detection_generator
.
_select_top_k_scores
(
scores_in
,
pre_nms_num_detections
=
pre_nms_num_boxes
)
expected_top_k_scores
=
np
.
array
([[[
0.5
,
0.9
],
[
0.3
,
0.5
]]],
dtype
=
np
.
float32
)
expected_top_k_indices
=
[[[
2
,
1
],
[
3
,
3
]]]
self
.
assertAllEqual
(
top_k_scores
.
numpy
(),
expected_top_k_scores
)
self
.
assertAllEqual
(
top_k_indices
.
numpy
(),
expected_top_k_indices
)
class
DetectionGeneratorTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
True
),
(
False
),
)
def
testDetectionsOutputShape
(
self
,
use_batched_nms
):
max_num_detections
=
100
num_classes
=
4
pre_nms_top_k
=
5000
pre_nms_score_threshold
=
0.01
batch_size
=
1
kwargs
=
{
'apply_nms'
:
True
,
'pre_nms_top_k'
:
pre_nms_top_k
,
'pre_nms_score_threshold'
:
pre_nms_score_threshold
,
'nms_iou_threshold'
:
0.5
,
'max_num_detections'
:
max_num_detections
,
'use_batched_nms'
:
use_batched_nms
,
}
generator
=
detection_generator
.
DetectionGenerator
(
**
kwargs
)
cls_outputs_all
=
(
np
.
random
.
rand
(
84
,
num_classes
)
-
0.5
)
*
3
# random 84x3 outputs.
box_outputs_all
=
np
.
random
.
rand
(
84
,
4
*
num_classes
)
# random 84 boxes.
anchor_boxes_all
=
np
.
random
.
rand
(
84
,
4
)
# random 84 boxes.
class_outputs
=
tf
.
reshape
(
tf
.
convert_to_tensor
(
cls_outputs_all
,
dtype
=
tf
.
float32
),
[
1
,
84
,
num_classes
])
box_outputs
=
tf
.
reshape
(
tf
.
convert_to_tensor
(
box_outputs_all
,
dtype
=
tf
.
float32
),
[
1
,
84
,
4
*
num_classes
])
anchor_boxes
=
tf
.
reshape
(
tf
.
convert_to_tensor
(
anchor_boxes_all
,
dtype
=
tf
.
float32
),
[
1
,
84
,
4
])
image_info
=
tf
.
constant
(
[[[
1000
,
1000
],
[
100
,
100
],
[
0.1
,
0.1
],
[
0
,
0
]]],
dtype
=
tf
.
float32
)
results
=
generator
(
box_outputs
,
class_outputs
,
anchor_boxes
,
image_info
[:,
1
,
:])
boxes
=
results
[
'detection_boxes'
]
classes
=
results
[
'detection_classes'
]
scores
=
results
[
'detection_scores'
]
valid_detections
=
results
[
'num_detections'
]
self
.
assertEqual
(
boxes
.
numpy
().
shape
,
(
batch_size
,
max_num_detections
,
4
))
self
.
assertEqual
(
scores
.
numpy
().
shape
,
(
batch_size
,
max_num_detections
,))
self
.
assertEqual
(
classes
.
numpy
().
shape
,
(
batch_size
,
max_num_detections
,))
self
.
assertEqual
(
valid_detections
.
numpy
().
shape
,
(
batch_size
,))
def
test_serialize_deserialize
(
self
):
kwargs
=
{
'apply_nms'
:
True
,
'pre_nms_top_k'
:
1000
,
'pre_nms_score_threshold'
:
0.1
,
'nms_iou_threshold'
:
0.5
,
'max_num_detections'
:
10
,
'use_batched_nms'
:
False
,
}
generator
=
detection_generator
.
DetectionGenerator
(
**
kwargs
)
expected_config
=
dict
(
kwargs
)
self
.
assertEqual
(
generator
.
get_config
(),
expected_config
)
new_generator
=
(
detection_generator
.
DetectionGenerator
.
from_config
(
generator
.
get_config
()))
self
.
assertAllEqual
(
generator
.
get_config
(),
new_generator
.
get_config
())
class
MultilevelDetectionGeneratorTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
True
),
(
False
),
)
def
testDetectionsOutputShape
(
self
,
use_batched_nms
):
min_level
=
4
max_level
=
6
num_scales
=
2
max_num_detections
=
100
aspect_ratios
=
[
1.0
,
2.0
,]
anchor_scale
=
2.0
output_size
=
[
64
,
64
]
num_classes
=
4
pre_nms_top_k
=
5000
pre_nms_score_threshold
=
0.01
batch_size
=
1
kwargs
=
{
'apply_nms'
:
True
,
'pre_nms_top_k'
:
pre_nms_top_k
,
'pre_nms_score_threshold'
:
pre_nms_score_threshold
,
'nms_iou_threshold'
:
0.5
,
'max_num_detections'
:
max_num_detections
,
'use_batched_nms'
:
use_batched_nms
,
}
input_anchor
=
anchor
.
build_anchor_generator
(
min_level
,
max_level
,
num_scales
,
aspect_ratios
,
anchor_scale
)
anchor_boxes
=
input_anchor
(
output_size
)
cls_outputs_all
=
(
np
.
random
.
rand
(
84
,
num_classes
)
-
0.5
)
*
3
# random 84x3 outputs.
box_outputs_all
=
np
.
random
.
rand
(
84
,
4
)
# random 84 boxes.
class_outputs
=
{
4
:
tf
.
reshape
(
tf
.
convert_to_tensor
(
cls_outputs_all
[
0
:
64
],
dtype
=
tf
.
float32
),
[
1
,
8
,
8
,
num_classes
]),
5
:
tf
.
reshape
(
tf
.
convert_to_tensor
(
cls_outputs_all
[
64
:
80
],
dtype
=
tf
.
float32
),
[
1
,
4
,
4
,
num_classes
]),
6
:
tf
.
reshape
(
tf
.
convert_to_tensor
(
cls_outputs_all
[
80
:
84
],
dtype
=
tf
.
float32
),
[
1
,
2
,
2
,
num_classes
]),
}
box_outputs
=
{
4
:
tf
.
reshape
(
tf
.
convert_to_tensor
(
box_outputs_all
[
0
:
64
],
dtype
=
tf
.
float32
),
[
1
,
8
,
8
,
4
]),
5
:
tf
.
reshape
(
tf
.
convert_to_tensor
(
box_outputs_all
[
64
:
80
],
dtype
=
tf
.
float32
),
[
1
,
4
,
4
,
4
]),
6
:
tf
.
reshape
(
tf
.
convert_to_tensor
(
box_outputs_all
[
80
:
84
],
dtype
=
tf
.
float32
),
[
1
,
2
,
2
,
4
]),
}
image_info
=
tf
.
constant
([[[
1000
,
1000
],
[
100
,
100
],
[
0.1
,
0.1
],
[
0
,
0
]]],
dtype
=
tf
.
float32
)
generator
=
detection_generator
.
MultilevelDetectionGenerator
(
**
kwargs
)
results
=
generator
(
box_outputs
,
class_outputs
,
anchor_boxes
,
image_info
[:,
1
,
:])
boxes
=
results
[
'detection_boxes'
]
classes
=
results
[
'detection_classes'
]
scores
=
results
[
'detection_scores'
]
valid_detections
=
results
[
'num_detections'
]
self
.
assertEqual
(
boxes
.
numpy
().
shape
,
(
batch_size
,
max_num_detections
,
4
))
self
.
assertEqual
(
scores
.
numpy
().
shape
,
(
batch_size
,
max_num_detections
,))
self
.
assertEqual
(
classes
.
numpy
().
shape
,
(
batch_size
,
max_num_detections
,))
self
.
assertEqual
(
valid_detections
.
numpy
().
shape
,
(
batch_size
,))
def
test_serialize_deserialize
(
self
):
kwargs
=
{
'apply_nms'
:
True
,
'pre_nms_top_k'
:
1000
,
'pre_nms_score_threshold'
:
0.1
,
'nms_iou_threshold'
:
0.5
,
'max_num_detections'
:
10
,
'use_batched_nms'
:
False
,
}
generator
=
detection_generator
.
MultilevelDetectionGenerator
(
**
kwargs
)
expected_config
=
dict
(
kwargs
)
self
.
assertEqual
(
generator
.
get_config
(),
expected_config
)
new_generator
=
(
detection_generator
.
MultilevelDetectionGenerator
.
from_config
(
generator
.
get_config
()))
self
.
assertAllEqual
(
generator
.
get_config
(),
new_generator
.
get_config
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/beta/modeling/layers/mask_sampler.py
0 → 100644
View file @
cc748b2a
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Mask sampler."""
# Import libraries
import
tensorflow
as
tf
from
official.vision.beta.ops
import
spatial_transform_ops
def
_sample_and_crop_foreground_masks
(
candidate_rois
,
candidate_gt_boxes
,
candidate_gt_classes
,
candidate_gt_indices
,
gt_masks
,
num_sampled_masks
=
128
,
mask_target_size
=
28
):
"""Samples and creates cropped foreground masks for training.
Args:
candidate_rois: a tensor of shape of [batch_size, N, 4], where N is the
number of candidate RoIs to be considered for mask sampling. It includes
both positive and negative RoIs. The `num_mask_samples_per_image` positive
RoIs will be sampled to create mask training targets.
candidate_gt_boxes: a tensor of shape of [batch_size, N, 4], storing the
corresponding groundtruth boxes to the `candidate_rois`.
candidate_gt_classes: a tensor of shape of [batch_size, N], storing the
corresponding groundtruth classes to the `candidate_rois`. 0 in the tensor
corresponds to the background class, i.e. negative RoIs.
candidate_gt_indices: a tensor of shape [batch_size, N], storing the
corresponding groundtruth instance indices to the `candidate_gt_boxes`,
i.e. gt_boxes[candidate_gt_indices[:, i]] = candidate_gt_boxes[:, i] and
gt_boxes which is of shape [batch_size, MAX_INSTANCES, 4], M >= N, is the
superset of candidate_gt_boxes.
gt_masks: a tensor of [batch_size, MAX_INSTANCES, mask_height, mask_width]
containing all the groundtruth masks which sample masks are drawn from.
num_sampled_masks: an integer which specifies the number of masks
to sample.
mask_target_size: an integer which specifies the final cropped mask size
after sampling. The output masks are resized w.r.t the sampled RoIs.
Returns:
foreground_rois: a tensor of shape of [batch_size, K, 4] storing the RoI
that corresponds to the sampled foreground masks, where
K = num_mask_samples_per_image.
foreground_classes: a tensor of shape of [batch_size, K] storing the classes
corresponding to the sampled foreground masks.
cropoped_foreground_masks: a tensor of shape of
[batch_size, K, mask_target_size, mask_target_size] storing the cropped
foreground masks used for training.
"""
_
,
fg_instance_indices
=
tf
.
nn
.
top_k
(
tf
.
cast
(
tf
.
greater
(
candidate_gt_classes
,
0
),
dtype
=
tf
.
int32
),
k
=
num_sampled_masks
)
fg_instance_indices_shape
=
tf
.
shape
(
fg_instance_indices
)
batch_indices
=
(
tf
.
expand_dims
(
tf
.
range
(
fg_instance_indices_shape
[
0
]),
axis
=-
1
)
*
tf
.
ones
([
1
,
fg_instance_indices_shape
[
-
1
]],
dtype
=
tf
.
int32
))
gather_nd_instance_indices
=
tf
.
stack
(
[
batch_indices
,
fg_instance_indices
],
axis
=-
1
)
foreground_rois
=
tf
.
gather_nd
(
candidate_rois
,
gather_nd_instance_indices
)
foreground_boxes
=
tf
.
gather_nd
(
candidate_gt_boxes
,
gather_nd_instance_indices
)
foreground_classes
=
tf
.
gather_nd
(
candidate_gt_classes
,
gather_nd_instance_indices
)
foreground_gt_indices
=
tf
.
gather_nd
(
candidate_gt_indices
,
gather_nd_instance_indices
)
foreground_gt_indices
=
tf
.
where
(
tf
.
equal
(
foreground_gt_indices
,
-
1
),
tf
.
zeros_like
(
foreground_gt_indices
),
foreground_gt_indices
)
foreground_gt_indices_shape
=
tf
.
shape
(
foreground_gt_indices
)
batch_indices
=
(
tf
.
expand_dims
(
tf
.
range
(
foreground_gt_indices_shape
[
0
]),
axis
=-
1
)
*
tf
.
ones
([
1
,
foreground_gt_indices_shape
[
-
1
]],
dtype
=
tf
.
int32
))
gather_nd_gt_indices
=
tf
.
stack
(
[
batch_indices
,
foreground_gt_indices
],
axis
=-
1
)
foreground_masks
=
tf
.
gather_nd
(
gt_masks
,
gather_nd_gt_indices
)
cropped_foreground_masks
=
spatial_transform_ops
.
crop_mask_in_target_box
(
foreground_masks
,
foreground_boxes
,
foreground_rois
,
mask_target_size
,
sample_offset
=
0.5
)
return
foreground_rois
,
foreground_classes
,
cropped_foreground_masks
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
MaskSampler
(
tf
.
keras
.
layers
.
Layer
):
"""Samples and creates mask training targets."""
def
__init__
(
self
,
mask_target_size
,
num_sampled_masks
,
**
kwargs
):
self
.
_config_dict
=
{
'mask_target_size'
:
mask_target_size
,
'num_sampled_masks'
:
num_sampled_masks
,
}
super
(
MaskSampler
,
self
).
__init__
(
**
kwargs
)
def
call
(
self
,
candidate_rois
,
candidate_gt_boxes
,
candidate_gt_classes
,
candidate_gt_indices
,
gt_masks
):
"""Sample and create mask targets for training.
Args:
candidate_rois: a tensor of shape of [batch_size, N, 4], where N is the
number of candidate RoIs to be considered for mask sampling. It includes
both positive and negative RoIs. The `num_mask_samples_per_image`
positive RoIs will be sampled to create mask training targets.
candidate_gt_boxes: a tensor of shape of [batch_size, N, 4], storing the
corresponding groundtruth boxes to the `candidate_rois`.
candidate_gt_classes: a tensor of shape of [batch_size, N], storing the
corresponding groundtruth classes to the `candidate_rois`. 0 in the
tensor corresponds to the background class, i.e. negative RoIs.
candidate_gt_indices: a tensor of shape [batch_size, N], storing the
corresponding groundtruth instance indices to the `candidate_gt_boxes`,
i.e. gt_boxes[candidate_gt_indices[:, i]] = candidate_gt_boxes[:, i],
where gt_boxes which is of shape [batch_size, MAX_INSTANCES, 4], M >= N,
is the superset of candidate_gt_boxes.
gt_masks: a tensor of [batch_size, MAX_INSTANCES, mask_height, mask_width]
containing all the groundtruth masks which sample masks are drawn from.
after sampling. The output masks are resized w.r.t the sampled RoIs.
Returns:
foreground_rois: a tensor of shape of [batch_size, K, 4] storing the RoI
that corresponds to the sampled foreground masks, where
K = num_mask_samples_per_image.
foreground_classes: a tensor of shape of [batch_size, K] storing the
classes corresponding to the sampled foreground masks.
cropoped_foreground_masks: a tensor of shape of
[batch_size, K, mask_target_size, mask_target_size] storing the
cropped foreground masks used for training.
"""
foreground_rois
,
foreground_classes
,
cropped_foreground_masks
=
(
_sample_and_crop_foreground_masks
(
candidate_rois
,
candidate_gt_boxes
,
candidate_gt_classes
,
candidate_gt_indices
,
gt_masks
,
self
.
_config_dict
[
'num_sampled_masks'
],
self
.
_config_dict
[
'mask_target_size'
]))
return
foreground_rois
,
foreground_classes
,
cropped_foreground_masks
def
get_config
(
self
):
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
):
return
cls
(
**
config
)
official/vision/beta/modeling/layers/mask_sampler_test.py
0 → 100644
View file @
cc748b2a
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for mask_sampler.py."""
# Import libraries
import
numpy
as
np
import
tensorflow
as
tf
from
official.vision.beta.modeling.layers
import
mask_sampler
class
SampleAndCropForegroundMasksTest
(
tf
.
test
.
TestCase
):
def
test_sample_and_crop_foreground_masks
(
self
):
candidate_rois_np
=
np
.
array
(
[[[
0
,
0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
1
,
1
],
[
2
,
2
,
4
,
4
],
[
1
,
1
,
5
,
5
]]])
candidate_rois
=
tf
.
constant
(
candidate_rois_np
,
dtype
=
tf
.
float32
)
candidate_gt_boxes_np
=
np
.
array
(
[[[
0
,
0
,
0.6
,
0.6
],
[
0
,
0
,
0
,
0
],
[
1
,
1
,
3
,
3
],
[
1
,
1
,
3
,
3
]]])
candidate_gt_boxes
=
tf
.
constant
(
candidate_gt_boxes_np
,
dtype
=
tf
.
float32
)
candidate_gt_classes_np
=
np
.
array
([[
4
,
0
,
0
,
2
]])
candidate_gt_classes
=
tf
.
constant
(
candidate_gt_classes_np
,
dtype
=
tf
.
float32
)
candidate_gt_indices_np
=
np
.
array
([[
10
,
-
1
,
-
1
,
20
]])
candidate_gt_indices
=
tf
.
constant
(
candidate_gt_indices_np
,
dtype
=
tf
.
int32
)
gt_masks_np
=
np
.
random
.
rand
(
1
,
100
,
32
,
32
)
gt_masks
=
tf
.
constant
(
gt_masks_np
,
dtype
=
tf
.
float32
)
num_mask_samples_per_image
=
2
mask_target_size
=
28
# Runs on TPU.
strategy
=
tf
.
distribute
.
experimental
.
TPUStrategy
()
with
strategy
.
scope
():
foreground_rois
,
foreground_classes
,
cropped_foreground_masks
=
(
mask_sampler
.
_sample_and_crop_foreground_masks
(
candidate_rois
,
candidate_gt_boxes
,
candidate_gt_classes
,
candidate_gt_indices
,
gt_masks
,
num_mask_samples_per_image
,
mask_target_size
))
foreground_rois_tpu
=
foreground_rois
.
numpy
()
foreground_classes_tpu
=
foreground_classes
.
numpy
()
cropped_foreground_masks_tpu
=
cropped_foreground_masks
.
numpy
()
foreground_rois
,
foreground_classes
,
cropped_foreground_masks
=
(
mask_sampler
.
_sample_and_crop_foreground_masks
(
candidate_rois
,
candidate_gt_boxes
,
candidate_gt_classes
,
candidate_gt_indices
,
gt_masks
,
num_mask_samples_per_image
,
mask_target_size
))
foreground_rois_cpu
=
foreground_rois
.
numpy
()
foreground_classes_cpu
=
foreground_classes
.
numpy
()
cropped_foreground_masks_cpu
=
cropped_foreground_masks
.
numpy
()
# consistency.
self
.
assertAllEqual
(
foreground_rois_tpu
.
shape
,
foreground_rois_cpu
.
shape
)
self
.
assertAllEqual
(
foreground_classes_tpu
.
shape
,
foreground_classes_cpu
.
shape
)
self
.
assertAllEqual
(
cropped_foreground_masks_tpu
.
shape
,
cropped_foreground_masks_cpu
.
shape
)
# correctnesss.
self
.
assertAllEqual
(
foreground_rois_tpu
.
shape
,
[
1
,
2
,
4
])
self
.
assertAllEqual
(
foreground_classes_tpu
.
shape
,
[
1
,
2
])
self
.
assertAllEqual
(
cropped_foreground_masks_tpu
.
shape
,
[
1
,
2
,
28
,
28
])
class
MaskSamplerTest
(
tf
.
test
.
TestCase
):
def
test_mask_sampler
(
self
):
candidate_rois_np
=
np
.
array
(
[[[
0
,
0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
1
,
1
],
[
2
,
2
,
4
,
4
],
[
1
,
1
,
5
,
5
]]])
candidate_rois
=
tf
.
constant
(
candidate_rois_np
,
dtype
=
tf
.
float32
)
candidate_gt_boxes_np
=
np
.
array
(
[[[
0
,
0
,
0.6
,
0.6
],
[
0
,
0
,
0
,
0
],
[
1
,
1
,
3
,
3
],
[
1
,
1
,
3
,
3
]]])
candidate_gt_boxes
=
tf
.
constant
(
candidate_gt_boxes_np
,
dtype
=
tf
.
float32
)
candidate_gt_classes_np
=
np
.
array
([[
4
,
0
,
0
,
2
]])
candidate_gt_classes
=
tf
.
constant
(
candidate_gt_classes_np
,
dtype
=
tf
.
float32
)
candidate_gt_indices_np
=
np
.
array
([[
10
,
-
1
,
-
1
,
20
]])
candidate_gt_indices
=
tf
.
constant
(
candidate_gt_indices_np
,
dtype
=
tf
.
int32
)
gt_masks_np
=
np
.
random
.
rand
(
1
,
100
,
32
,
32
)
gt_masks
=
tf
.
constant
(
gt_masks_np
,
dtype
=
tf
.
float32
)
sampler
=
mask_sampler
.
MaskSampler
(
28
,
2
)
foreground_rois
,
foreground_classes
,
cropped_foreground_masks
=
sampler
(
candidate_rois
,
candidate_gt_boxes
,
candidate_gt_classes
,
candidate_gt_indices
,
gt_masks
)
# correctnesss.
self
.
assertAllEqual
(
foreground_rois
.
numpy
().
shape
,
[
1
,
2
,
4
])
self
.
assertAllEqual
(
foreground_classes
.
numpy
().
shape
,
[
1
,
2
])
self
.
assertAllEqual
(
cropped_foreground_masks
.
numpy
().
shape
,
[
1
,
2
,
28
,
28
])
def
test_serialize_deserialize
(
self
):
kwargs
=
dict
(
mask_target_size
=
7
,
num_sampled_masks
=
10
,
)
sampler
=
mask_sampler
.
MaskSampler
(
**
kwargs
)
expected_config
=
dict
(
kwargs
)
self
.
assertEqual
(
sampler
.
get_config
(),
expected_config
)
new_sampler
=
mask_sampler
.
MaskSampler
.
from_config
(
sampler
.
get_config
())
self
.
assertAllEqual
(
sampler
.
get_config
(),
new_sampler
.
get_config
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/beta/modeling/layers/nn_blocks.py
0 → 100644
View file @
cc748b2a
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains common building blocks for neural networks."""
from
typing
import
Any
,
Callable
,
Dict
,
List
,
Optional
,
Tuple
,
Union
# Import libraries
import
tensorflow
as
tf
from
official.modeling
import
tf_utils
from
official.vision.beta.modeling.layers
import
nn_layers
def
_pad_strides
(
strides
:
int
,
axis
:
int
)
->
Tuple
[
int
,
int
,
int
,
int
]:
"""Converts int to len 4 strides (`tf.nn.avg_pool` uses length 4)."""
if
axis
==
1
:
return
(
1
,
1
,
strides
,
strides
)
else
:
return
(
1
,
strides
,
strides
,
1
)
def
_maybe_downsample
(
x
:
tf
.
Tensor
,
out_filter
:
int
,
strides
:
int
,
axis
:
int
)
->
tf
.
Tensor
:
"""Downsamples feature map and 0-pads tensor if in_filter != out_filter."""
data_format
=
'NCHW'
if
axis
==
1
else
'NHWC'
strides
=
_pad_strides
(
strides
,
axis
=
axis
)
x
=
tf
.
nn
.
avg_pool
(
x
,
strides
,
strides
,
'VALID'
,
data_format
=
data_format
)
in_filter
=
x
.
shape
[
axis
]
if
in_filter
<
out_filter
:
# Pad on channel dimension with 0s: half on top half on bottom.
pad_size
=
[(
out_filter
-
in_filter
)
//
2
,
(
out_filter
-
in_filter
)
//
2
]
if
axis
==
1
:
x
=
tf
.
pad
(
x
,
[[
0
,
0
],
pad_size
,
[
0
,
0
],
[
0
,
0
]])
else
:
x
=
tf
.
pad
(
x
,
[[
0
,
0
],
[
0
,
0
],
[
0
,
0
],
pad_size
])
return
x
+
0.
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
ResidualBlock
(
tf
.
keras
.
layers
.
Layer
):
"""A residual block."""
def
__init__
(
self
,
filters
,
strides
,
use_projection
=
False
,
stochastic_depth_drop_rate
=
None
,
kernel_initializer
=
'VarianceScaling'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
activation
=
'relu'
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
**
kwargs
):
"""A residual block with BN after convolutions.
Args:
filters: `int` number of filters for the first two convolutions. Note that
the third and final convolution will use 4 times as many filters.
strides: `int` block stride. If greater than 1, this block will ultimately
downsample the input.
use_projection: `bool` for whether this block should use a projection
shortcut (versus the default identity shortcut). This is usually `True`
for the first block of a block group, which may change the number of
filters and the resolution.
stochastic_depth_drop_rate: `float` or None. if not None, drop rate for
the stochastic depth layer.
kernel_initializer: kernel_initializer for convolutional layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
Default to None.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
Default to None.
activation: `str` name of the activation function.
use_sync_bn: if True, use synchronized batch normalization.
norm_momentum: `float` normalization omentum for the moving average.
norm_epsilon: `float` small float added to variance to avoid dividing by
zero.
**kwargs: keyword arguments to be passed.
"""
super
(
ResidualBlock
,
self
).
__init__
(
**
kwargs
)
self
.
_filters
=
filters
self
.
_strides
=
strides
self
.
_use_projection
=
use_projection
self
.
_use_sync_bn
=
use_sync_bn
self
.
_activation
=
activation
self
.
_stochastic_depth_drop_rate
=
stochastic_depth_drop_rate
self
.
_kernel_initializer
=
kernel_initializer
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_bias_regularizer
=
bias_regularizer
if
use_sync_bn
:
self
.
_norm
=
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
else
:
self
.
_norm
=
tf
.
keras
.
layers
.
BatchNormalization
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_bn_axis
=
-
1
else
:
self
.
_bn_axis
=
1
self
.
_activation_fn
=
tf_utils
.
get_activation
(
activation
)
def
build
(
self
,
input_shape
):
if
self
.
_use_projection
:
self
.
_shortcut
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_filters
,
kernel_size
=
1
,
strides
=
self
.
_strides
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm0
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
self
.
_conv1
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_filters
,
kernel_size
=
3
,
strides
=
self
.
_strides
,
padding
=
'same'
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm1
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
self
.
_conv2
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_filters
,
kernel_size
=
3
,
strides
=
1
,
padding
=
'same'
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm2
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
if
self
.
_stochastic_depth_drop_rate
:
self
.
_stochastic_depth
=
nn_layers
.
StochasticDepth
(
self
.
_stochastic_depth_drop_rate
)
else
:
self
.
_stochastic_depth
=
None
super
(
ResidualBlock
,
self
).
build
(
input_shape
)
def
get_config
(
self
):
config
=
{
'filters'
:
self
.
_filters
,
'strides'
:
self
.
_strides
,
'use_projection'
:
self
.
_use_projection
,
'stochastic_depth_drop_rate'
:
self
.
_stochastic_depth_drop_rate
,
'kernel_initializer'
:
self
.
_kernel_initializer
,
'kernel_regularizer'
:
self
.
_kernel_regularizer
,
'bias_regularizer'
:
self
.
_bias_regularizer
,
'activation'
:
self
.
_activation
,
'use_sync_bn'
:
self
.
_use_sync_bn
,
'norm_momentum'
:
self
.
_norm_momentum
,
'norm_epsilon'
:
self
.
_norm_epsilon
}
base_config
=
super
(
ResidualBlock
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
call
(
self
,
inputs
,
training
=
None
):
shortcut
=
inputs
if
self
.
_use_projection
:
shortcut
=
self
.
_shortcut
(
shortcut
)
shortcut
=
self
.
_norm0
(
shortcut
)
x
=
self
.
_conv1
(
inputs
)
x
=
self
.
_norm1
(
x
)
x
=
self
.
_activation_fn
(
x
)
x
=
self
.
_conv2
(
x
)
x
=
self
.
_norm2
(
x
)
if
self
.
_stochastic_depth
:
x
=
self
.
_stochastic_depth
(
x
,
training
=
training
)
return
self
.
_activation_fn
(
x
+
shortcut
)
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
BottleneckBlock
(
tf
.
keras
.
layers
.
Layer
):
"""A standard bottleneck block."""
def
__init__
(
self
,
filters
,
strides
,
use_projection
=
False
,
stochastic_depth_drop_rate
=
None
,
kernel_initializer
=
'VarianceScaling'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
activation
=
'relu'
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
**
kwargs
):
"""A standard bottleneck block with BN after convolutions.
Args:
filters: `int` number of filters for the first two convolutions. Note that
the third and final convolution will use 4 times as many filters.
strides: `int` block stride. If greater than 1, this block will ultimately
downsample the input.
use_projection: `bool` for whether this block should use a projection
shortcut (versus the default identity shortcut). This is usually `True`
for the first block of a block group, which may change the number of
filters and the resolution.
stochastic_depth_drop_rate: `float` or None. if not None, drop rate for
the stochastic depth layer.
kernel_initializer: kernel_initializer for convolutional layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
Default to None.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
Default to None.
activation: `str` name of the activation function.
use_sync_bn: if True, use synchronized batch normalization.
norm_momentum: `float` normalization omentum for the moving average.
norm_epsilon: `float` small float added to variance to avoid dividing by
zero.
**kwargs: keyword arguments to be passed.
"""
super
(
BottleneckBlock
,
self
).
__init__
(
**
kwargs
)
self
.
_filters
=
filters
self
.
_strides
=
strides
self
.
_use_projection
=
use_projection
self
.
_use_sync_bn
=
use_sync_bn
self
.
_activation
=
activation
self
.
_stochastic_depth_drop_rate
=
stochastic_depth_drop_rate
self
.
_kernel_initializer
=
kernel_initializer
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_bias_regularizer
=
bias_regularizer
if
use_sync_bn
:
self
.
_norm
=
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
else
:
self
.
_norm
=
tf
.
keras
.
layers
.
BatchNormalization
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_bn_axis
=
-
1
else
:
self
.
_bn_axis
=
1
self
.
_activation_fn
=
tf_utils
.
get_activation
(
activation
)
def
build
(
self
,
input_shape
):
if
self
.
_use_projection
:
self
.
_shortcut
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_filters
*
4
,
kernel_size
=
1
,
strides
=
self
.
_strides
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm0
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
self
.
_conv1
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_filters
,
kernel_size
=
1
,
strides
=
1
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm1
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
self
.
_conv2
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_filters
,
kernel_size
=
3
,
strides
=
self
.
_strides
,
padding
=
'same'
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm2
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
self
.
_conv3
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_filters
*
4
,
kernel_size
=
1
,
strides
=
1
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm3
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
if
self
.
_stochastic_depth_drop_rate
:
self
.
_stochastic_depth
=
nn_layers
.
StochasticDepth
(
self
.
_stochastic_depth_drop_rate
)
else
:
self
.
_stochastic_depth
=
None
super
(
BottleneckBlock
,
self
).
build
(
input_shape
)
def
get_config
(
self
):
config
=
{
'filters'
:
self
.
_filters
,
'strides'
:
self
.
_strides
,
'use_projection'
:
self
.
_use_projection
,
'stochastic_depth_drop_rate'
:
self
.
_stochastic_depth_drop_rate
,
'kernel_initializer'
:
self
.
_kernel_initializer
,
'kernel_regularizer'
:
self
.
_kernel_regularizer
,
'bias_regularizer'
:
self
.
_bias_regularizer
,
'activation'
:
self
.
_activation
,
'use_sync_bn'
:
self
.
_use_sync_bn
,
'norm_momentum'
:
self
.
_norm_momentum
,
'norm_epsilon'
:
self
.
_norm_epsilon
}
base_config
=
super
(
BottleneckBlock
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
call
(
self
,
inputs
,
training
=
None
):
shortcut
=
inputs
if
self
.
_use_projection
:
shortcut
=
self
.
_shortcut
(
shortcut
)
shortcut
=
self
.
_norm0
(
shortcut
)
x
=
self
.
_conv1
(
inputs
)
x
=
self
.
_norm1
(
x
)
x
=
self
.
_activation_fn
(
x
)
x
=
self
.
_conv2
(
x
)
x
=
self
.
_norm2
(
x
)
x
=
self
.
_activation_fn
(
x
)
x
=
self
.
_conv3
(
x
)
x
=
self
.
_norm3
(
x
)
if
self
.
_stochastic_depth
:
x
=
self
.
_stochastic_depth
(
x
,
training
=
training
)
return
self
.
_activation_fn
(
x
+
shortcut
)
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
InvertedBottleneckBlock
(
tf
.
keras
.
layers
.
Layer
):
"""An inverted bottleneck block."""
def
__init__
(
self
,
in_filters
,
out_filters
,
expand_ratio
,
strides
,
kernel_size
=
3
,
se_ratio
=
None
,
stochastic_depth_drop_rate
=
None
,
kernel_initializer
=
'VarianceScaling'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
activation
=
'relu'
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
**
kwargs
):
"""An inverted bottleneck block with BN after convolutions.
Args:
in_filters: `int` number of filters of the input tensor.
out_filters: `int` number of filters of the output tensor.
expand_ratio: `int` expand_ratio for an inverted bottleneck block.
strides: `int` block stride. If greater than 1, this block will ultimately
downsample the input.
kernel_size: `int` kernel_size of the depthwise conv layer.
se_ratio: `float` or None. If not None, se ratio for the squeeze and
excitation layer.
stochastic_depth_drop_rate: `float` or None. if not None, drop rate for
the stochastic depth layer.
kernel_initializer: kernel_initializer for convolutional layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
Default to None.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
Default to None.
activation: `str` name of the activation function.
use_sync_bn: if True, use synchronized batch normalization.
norm_momentum: `float` normalization omentum for the moving average.
norm_epsilon: `float` small float added to variance to avoid dividing by
zero.
**kwargs: keyword arguments to be passed.
"""
super
(
InvertedBottleneckBlock
,
self
).
__init__
(
**
kwargs
)
self
.
_in_filters
=
in_filters
self
.
_out_filters
=
out_filters
self
.
_expand_ratio
=
expand_ratio
self
.
_strides
=
strides
self
.
_kernel_size
=
kernel_size
self
.
_se_ratio
=
se_ratio
self
.
_stochastic_depth_drop_rate
=
stochastic_depth_drop_rate
self
.
_use_sync_bn
=
use_sync_bn
self
.
_activation
=
activation
self
.
_kernel_initializer
=
kernel_initializer
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_bias_regularizer
=
bias_regularizer
if
use_sync_bn
:
self
.
_norm
=
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
else
:
self
.
_norm
=
tf
.
keras
.
layers
.
BatchNormalization
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_bn_axis
=
-
1
else
:
self
.
_bn_axis
=
1
self
.
_activation_fn
=
tf_utils
.
get_activation
(
activation
)
def
build
(
self
,
input_shape
):
if
self
.
_expand_ratio
!=
1
:
# First 1x1 conv for channel expansion.
self
.
_conv0
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_in_filters
*
self
.
_expand_ratio
,
kernel_size
=
1
,
strides
=
1
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm0
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
# Depthwise conv.
self
.
_conv1
=
tf
.
keras
.
layers
.
DepthwiseConv2D
(
kernel_size
=
(
self
.
_kernel_size
,
self
.
_kernel_size
),
strides
=
self
.
_strides
,
padding
=
'same'
,
use_bias
=
False
,
depthwise_initializer
=
self
.
_kernel_initializer
,
depthwise_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm1
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
# Squeeze and excitation.
if
self
.
_se_ratio
is
not
None
and
self
.
_se_ratio
>
0
and
self
.
_se_ratio
<=
1
:
self
.
_squeeze_excitation
=
nn_layers
.
SqueezeExcitation
(
in_filters
=
self
.
_in_filters
,
se_ratio
=
self
.
_se_ratio
,
expand_ratio
=
self
.
_expand_ratio
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
else
:
self
.
_squeeze_excitation
=
None
# Last 1x1 conv.
self
.
_conv2
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_out_filters
,
kernel_size
=
1
,
strides
=
1
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm2
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
if
self
.
_stochastic_depth_drop_rate
:
self
.
_stochastic_depth
=
nn_layers
.
StochasticDepth
(
self
.
_stochastic_depth_drop_rate
)
else
:
self
.
_stochastic_depth
=
None
super
(
InvertedBottleneckBlock
,
self
).
build
(
input_shape
)
def
get_config
(
self
):
config
=
{
'in_filters'
:
self
.
_in_filters
,
'out_filters'
:
self
.
_out_filters
,
'expand_ratio'
:
self
.
_expand_ratio
,
'strides'
:
self
.
_strides
,
'kernel_size'
:
self
.
_kernel_size
,
'se_ratio'
:
self
.
_se_ratio
,
'stochastic_depth_drop_rate'
:
self
.
_stochastic_depth_drop_rate
,
'kernel_initializer'
:
self
.
_kernel_initializer
,
'kernel_regularizer'
:
self
.
_kernel_regularizer
,
'bias_regularizer'
:
self
.
_bias_regularizer
,
'activation'
:
self
.
_activation
,
'use_sync_bn'
:
self
.
_use_sync_bn
,
'norm_momentum'
:
self
.
_norm_momentum
,
'norm_epsilon'
:
self
.
_norm_epsilon
}
base_config
=
super
(
InvertedBottleneckBlock
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
call
(
self
,
inputs
,
training
=
None
):
shortcut
=
inputs
if
self
.
_expand_ratio
!=
1
:
x
=
self
.
_conv0
(
inputs
)
x
=
self
.
_norm0
(
x
)
x
=
self
.
_activation_fn
(
x
)
else
:
x
=
inputs
x
=
self
.
_conv1
(
x
)
x
=
self
.
_norm1
(
x
)
x
=
self
.
_activation_fn
(
x
)
if
self
.
_squeeze_excitation
:
x
=
self
.
_squeeze_excitation
(
x
)
x
=
self
.
_conv2
(
x
)
x
=
self
.
_norm2
(
x
)
if
self
.
_in_filters
==
self
.
_out_filters
and
self
.
_strides
==
1
:
if
self
.
_stochastic_depth
:
x
=
self
.
_stochastic_depth
(
x
,
training
=
training
)
x
=
tf
.
add
(
x
,
shortcut
)
return
x
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
ResidualInner
(
tf
.
keras
.
layers
.
Layer
):
"""Single inner block of a residual.
This corresponds to `F`/`G` functions in the RevNet paper:
https://arxiv.org/pdf/1707.04585.pdf
"""
def
__init__
(
self
,
filters
:
int
,
strides
:
int
,
kernel_initializer
:
Union
[
str
,
Callable
[...,
tf
.
keras
.
initializers
.
Initializer
]]
=
'VarianceScaling'
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
activation
:
Union
[
str
,
Callable
[...,
tf
.
Tensor
]]
=
'relu'
,
use_sync_bn
:
bool
=
False
,
norm_momentum
:
float
=
0.99
,
norm_epsilon
:
float
=
0.001
,
batch_norm_first
:
bool
=
True
,
**
kwargs
):
"""ResidualInner Initialization.
Args:
filters: `int` output filter size.
strides: `int` stride size for convolution for the residual block.
kernel_initializer: `str` or `tf.keras.initializers.Initializer` instance
for convolutional layers.
kernel_regularizer: `tf.keras.regularizers.Regularizer` for Conv2D.
activation: `str` or `callable` instance of the activation function.
use_sync_bn: `bool` if True, use synchronized batch normalization.
norm_momentum: `float` normalization omentum for the moving average.
norm_epsilon: `float` small float added to variance to avoid dividing by
zero.
batch_norm_first: `bool` whether to apply activation and batch norm
before conv.
**kwargs: additional keyword arguments to be passed.
"""
super
(
ResidualInner
,
self
).
__init__
(
**
kwargs
)
self
.
strides
=
strides
self
.
filters
=
filters
self
.
_kernel_initializer
=
tf
.
keras
.
initializers
.
get
(
kernel_initializer
)
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_activation
=
tf
.
keras
.
activations
.
get
(
activation
)
self
.
_use_sync_bn
=
use_sync_bn
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
self
.
_batch_norm_first
=
batch_norm_first
if
use_sync_bn
:
self
.
_norm
=
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
else
:
self
.
_norm
=
tf
.
keras
.
layers
.
BatchNormalization
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_bn_axis
=
-
1
else
:
self
.
_bn_axis
=
1
self
.
_activation_fn
=
tf_utils
.
get_activation
(
activation
)
def
build
(
self
,
input_shape
:
tf
.
TensorShape
):
if
self
.
_batch_norm_first
:
self
.
_batch_norm_0
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
self
.
_conv2d_1
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
filters
,
kernel_size
=
3
,
strides
=
self
.
strides
,
use_bias
=
False
,
padding
=
'same'
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
)
self
.
_batch_norm_1
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
self
.
_conv2d_2
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
filters
,
kernel_size
=
3
,
strides
=
1
,
use_bias
=
False
,
padding
=
'same'
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
)
super
(
ResidualInner
,
self
).
build
(
input_shape
)
def
get_config
(
self
)
->
Dict
[
str
,
Any
]:
config
=
{
'filters'
:
self
.
filters
,
'strides'
:
self
.
strides
,
'kernel_initializer'
:
self
.
_kernel_initializer
,
'kernel_regularizer'
:
self
.
_kernel_regularizer
,
'activation'
:
self
.
_activation
,
'use_sync_bn'
:
self
.
_use_sync_bn
,
'norm_momentum'
:
self
.
_norm_momentum
,
'norm_epsilon'
:
self
.
_norm_epsilon
,
'batch_norm_first'
:
self
.
_batch_norm_first
,
}
base_config
=
super
(
ResidualInner
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
call
(
self
,
inputs
:
tf
.
Tensor
,
training
:
Optional
[
bool
]
=
None
)
->
tf
.
Tensor
:
x
=
inputs
if
self
.
_batch_norm_first
:
x
=
self
.
_batch_norm_0
(
x
,
training
=
training
)
x
=
self
.
_activation_fn
(
x
)
x
=
self
.
_conv2d_1
(
x
)
x
=
self
.
_batch_norm_1
(
x
,
training
=
training
)
x
=
self
.
_activation_fn
(
x
)
x
=
self
.
_conv2d_2
(
x
)
return
x
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
BottleneckResidualInner
(
tf
.
keras
.
layers
.
Layer
):
"""Single inner block of a bottleneck residual.
This corresponds to `F`/`G` functions in the RevNet paper:
https://arxiv.org/pdf/1707.04585.pdf
"""
def
__init__
(
self
,
filters
:
int
,
strides
:
int
,
kernel_initializer
:
Union
[
str
,
Callable
[...,
tf
.
keras
.
initializers
.
Initializer
]]
=
'VarianceScaling'
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
activation
:
Union
[
str
,
Callable
[...,
tf
.
Tensor
]]
=
'relu'
,
use_sync_bn
:
bool
=
False
,
norm_momentum
:
float
=
0.99
,
norm_epsilon
:
float
=
0.001
,
batch_norm_first
:
bool
=
True
,
**
kwargs
):
"""BottleneckResidualInner Initialization.
Args:
filters: `int` number of filters for first 2 convolutions. Last
Last, and thus the number of output channels from the bottlneck
block is `4*filters`
strides: `int` stride size for convolution for the residual block.
kernel_initializer: `str` or `tf.keras.initializers.Initializer` instance
for convolutional layers.
kernel_regularizer: `tf.keras.regularizers.Regularizer` for Conv2D.
activation: `str` or `callable` instance of the activation function.
use_sync_bn: `bool` if True, use synchronized batch normalization.
norm_momentum: `float` normalization omentum for the moving average.
norm_epsilon: `float` small float added to variance to avoid dividing by
zero.
batch_norm_first: `bool` whether to apply activation and batch norm
before conv.
**kwargs: additional keyword arguments to be passed.
"""
super
(
BottleneckResidualInner
,
self
).
__init__
(
**
kwargs
)
self
.
strides
=
strides
self
.
filters
=
filters
self
.
_kernel_initializer
=
tf
.
keras
.
initializers
.
get
(
kernel_initializer
)
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_activation
=
tf
.
keras
.
activations
.
get
(
activation
)
self
.
_use_sync_bn
=
use_sync_bn
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
self
.
_batch_norm_first
=
batch_norm_first
if
use_sync_bn
:
self
.
_norm
=
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
else
:
self
.
_norm
=
tf
.
keras
.
layers
.
BatchNormalization
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_bn_axis
=
-
1
else
:
self
.
_bn_axis
=
1
self
.
_activation_fn
=
tf_utils
.
get_activation
(
activation
)
def
build
(
self
,
input_shape
:
tf
.
TensorShape
):
if
self
.
_batch_norm_first
:
self
.
_batch_norm_0
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
self
.
_conv2d_1
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
filters
,
kernel_size
=
1
,
strides
=
self
.
strides
,
use_bias
=
False
,
padding
=
'same'
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
)
self
.
_batch_norm_1
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
self
.
_conv2d_2
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
filters
,
kernel_size
=
3
,
strides
=
1
,
use_bias
=
False
,
padding
=
'same'
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
)
self
.
_batch_norm_2
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
self
.
_conv2d_3
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
filters
*
4
,
kernel_size
=
1
,
strides
=
1
,
use_bias
=
False
,
padding
=
'same'
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
)
super
(
BottleneckResidualInner
,
self
).
build
(
input_shape
)
def
get_config
(
self
)
->
Dict
[
str
,
Any
]:
config
=
{
'filters'
:
self
.
filters
,
'strides'
:
self
.
strides
,
'kernel_initializer'
:
self
.
_kernel_initializer
,
'kernel_regularizer'
:
self
.
_kernel_regularizer
,
'activation'
:
self
.
_activation
,
'use_sync_bn'
:
self
.
_use_sync_bn
,
'norm_momentum'
:
self
.
_norm_momentum
,
'norm_epsilon'
:
self
.
_norm_epsilon
,
'batch_norm_first'
:
self
.
_batch_norm_first
,
}
base_config
=
super
(
BottleneckResidualInner
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
call
(
self
,
inputs
:
tf
.
Tensor
,
training
:
Optional
[
bool
]
=
None
)
->
tf
.
Tensor
:
x
=
inputs
if
self
.
_batch_norm_first
:
x
=
self
.
_batch_norm_0
(
x
,
training
=
training
)
x
=
self
.
_activation_fn
(
x
)
x
=
self
.
_conv2d_1
(
x
)
x
=
self
.
_batch_norm_1
(
x
,
training
=
training
)
x
=
self
.
_activation_fn
(
x
)
x
=
self
.
_conv2d_2
(
x
)
x
=
self
.
_batch_norm_2
(
x
,
training
=
training
)
x
=
self
.
_activation_fn
(
x
)
x
=
self
.
_conv2d_3
(
x
)
return
x
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
ReversibleLayer
(
tf
.
keras
.
layers
.
Layer
):
"""A reversible layer.
Computes y1 = x1 + f(x2), y2 = x2 + g(y1), where f and g can be arbitrary
layers that are stateless, which in this case are `ResidualInner` layers.
"""
def
__init__
(
self
,
f
:
tf
.
keras
.
layers
.
Layer
,
g
:
tf
.
keras
.
layers
.
Layer
,
manual_grads
:
bool
=
True
,
**
kwargs
):
"""ReversibleLayer Initialization.
Args:
f: `tf.keras.layers.Layer` f inner block referred to in paper. Each
reversible layer consists of two inner functions. For example, in RevNet
the reversible residual consists of two f/g inner (bottleneck) residual
functions. Where the input to the reversible layer is x, the input gets
partitioned in the channel dimension and the forward pass follows (eq8):
x = [x1; x2], z1 = x1 + f(x2), y2 = x2 + g(z1), y1 = stop_gradient(z1).
g: `tf.keras.layers.Layer` g inner block referred to in paper. Detailed
explanation same as above as `f` arg.
manual_grads: `bool` [Testing Only] whether to manually take gradients
as in Algorithm 1 or defer to autograd.
**kwargs: additional keyword arguments to be passed.
"""
super
(
ReversibleLayer
,
self
).
__init__
(
**
kwargs
)
self
.
_f
=
f
self
.
_g
=
g
self
.
_manual_grads
=
manual_grads
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_axis
=
-
1
else
:
self
.
_axis
=
1
def
get_config
(
self
)
->
Dict
[
str
,
Any
]:
config
=
{
'f'
:
self
.
_f
,
'g'
:
self
.
_g
,
'manual_grads'
:
self
.
_manual_grads
,
}
base_config
=
super
(
ReversibleLayer
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
_ckpt_non_trainable_vars
(
self
):
self
.
_f_non_trainable_vars
=
[
v
.
read_value
()
for
v
in
self
.
_f
.
non_trainable_variables
]
self
.
_g_non_trainable_vars
=
[
v
.
read_value
()
for
v
in
self
.
_g
.
non_trainable_variables
]
def
_load_ckpt_non_trainable_vars
(
self
):
for
v
,
v_chkpt
in
zip
(
self
.
_f
.
non_trainable_variables
,
self
.
_f_non_trainable_vars
):
v
.
assign
(
v_chkpt
)
for
v
,
v_chkpt
in
zip
(
self
.
_g
.
non_trainable_variables
,
self
.
_g_non_trainable_vars
):
v
.
assign
(
v_chkpt
)
def
call
(
self
,
inputs
:
tf
.
Tensor
,
training
:
Optional
[
bool
]
=
None
)
->
tf
.
Tensor
:
@
tf
.
custom_gradient
def
reversible
(
x
:
tf
.
Tensor
)
->
Tuple
[
tf
.
Tensor
,
Callable
[[
Any
],
Tuple
[
List
[
tf
.
Tensor
],
List
[
tf
.
Tensor
]]]]:
"""Implements Algorithm 1 in RevNet paper.
Paper: https://arxiv.org/pdf/1707.04585.pdf
Args:
x: input tensor.
Returns:
y: the output [y1; y2] in algorithm 1.
grad_fn: callable function that computes the gradients.
"""
with
tf
.
GradientTape
()
as
fwdtape
:
fwdtape
.
watch
(
x
)
x1
,
x2
=
tf
.
split
(
x
,
num_or_size_splits
=
2
,
axis
=
self
.
_axis
)
f_x2
=
self
.
_f
(
x2
,
training
=
training
)
x1_down
=
_maybe_downsample
(
x1
,
f_x2
.
shape
[
self
.
_axis
],
self
.
_f
.
strides
,
self
.
_axis
)
z1
=
f_x2
+
x1_down
g_z1
=
self
.
_g
(
z1
,
training
=
training
)
x2_down
=
_maybe_downsample
(
x2
,
g_z1
.
shape
[
self
.
_axis
],
self
.
_f
.
strides
,
self
.
_axis
)
y2
=
x2_down
+
g_z1
# Equation 8: https://arxiv.org/pdf/1707.04585.pdf
# Decouple y1 and z1 so that their derivatives are different.
y1
=
tf
.
identity
(
z1
)
y
=
tf
.
concat
([
y1
,
y2
],
axis
=
self
.
_axis
)
irreversible
=
(
(
self
.
_f
.
strides
!=
1
or
self
.
_g
.
strides
!=
1
)
or
(
y
.
shape
[
self
.
_axis
]
!=
inputs
.
shape
[
self
.
_axis
]))
# Checkpointing moving mean/variance for batch normalization layers
# as they shouldn't be updated during the custom gradient pass of f/g.
self
.
_ckpt_non_trainable_vars
()
def
grad_fn
(
dy
:
tf
.
Tensor
,
variables
:
Optional
[
List
[
tf
.
Variable
]]
=
None
,
)
->
Tuple
[
List
[
tf
.
Tensor
],
List
[
tf
.
Tensor
]]:
"""Given dy calculate (dy/dx)|_{x_{input}} using f/g."""
if
irreversible
or
not
self
.
_manual_grads
:
grads_combined
=
fwdtape
.
gradient
(
y
,
[
x
]
+
variables
,
output_gradients
=
dy
)
dx
=
grads_combined
[
0
]
grad_vars
=
grads_combined
[
1
:]
else
:
y1_nograd
=
tf
.
stop_gradient
(
y1
)
y2_nograd
=
tf
.
stop_gradient
(
y2
)
dy1
,
dy2
=
tf
.
split
(
dy
,
num_or_size_splits
=
2
,
axis
=
self
.
_axis
)
# Index mapping from self.f/g.trainable_variables to grad_fn
# input `variables` kwarg so that we can reorder dwf + dwg
# variable gradient list to match `variables` order.
f_var_refs
=
[
v
.
ref
()
for
v
in
self
.
_f
.
trainable_variables
]
g_var_refs
=
[
v
.
ref
()
for
v
in
self
.
_g
.
trainable_variables
]
fg_var_refs
=
f_var_refs
+
g_var_refs
self_to_var_index
=
[
fg_var_refs
.
index
(
v
.
ref
())
for
v
in
variables
]
# Algorithm 1 in paper (line # documented in-line)
z1
=
y1_nograd
# line 2
with
tf
.
GradientTape
()
as
gtape
:
gtape
.
watch
(
z1
)
g_z1
=
self
.
_g
(
z1
,
training
=
training
)
x2
=
y2_nograd
-
g_z1
# line 3
with
tf
.
GradientTape
()
as
ftape
:
ftape
.
watch
(
x2
)
f_x2
=
self
.
_f
(
x2
,
training
=
training
)
x1
=
z1
-
f_x2
# pylint: disable=unused-variable # line 4
# Compute gradients
g_grads_combined
=
gtape
.
gradient
(
g_z1
,
[
z1
]
+
self
.
_g
.
trainable_variables
,
output_gradients
=
dy2
)
dz1
=
dy1
+
g_grads_combined
[
0
]
# line 5
dwg
=
g_grads_combined
[
1
:]
# line 9
f_grads_combined
=
ftape
.
gradient
(
f_x2
,
[
x2
]
+
self
.
_f
.
trainable_variables
,
output_gradients
=
dz1
)
dx2
=
dy2
+
f_grads_combined
[
0
]
# line 6
dwf
=
f_grads_combined
[
1
:]
# line 8
dx1
=
dz1
# line 7
# Pack the input and variable gradients.
dx
=
tf
.
concat
([
dx1
,
dx2
],
axis
=
self
.
_axis
)
grad_vars
=
dwf
+
dwg
# Reorder gradients (trainable_variables to variables kwarg order)
grad_vars
=
[
grad_vars
[
i
]
for
i
in
self_to_var_index
]
# Restore batch normalization moving mean/variance for correctness.
self
.
_load_ckpt_non_trainable_vars
()
return
dx
,
grad_vars
# grad_fn end
return
y
,
grad_fn
# reversible end
activations
=
reversible
(
inputs
)
return
activations
official/vision/beta/modeling/layers/nn_blocks_3d.py
0 → 100644
View file @
cc748b2a
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains common building blocks for 3D networks."""
# Import libraries
import
tensorflow
as
tf
from
official.modeling
import
tf_utils
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
SelfGating
(
tf
.
keras
.
layers
.
Layer
):
"""Feature gating as used in S3D-G (https://arxiv.org/pdf/1712.04851.pdf)."""
def
__init__
(
self
,
filters
,
**
kwargs
):
"""Constructor.
Args:
filters: `int` number of filters for the convolutional layer.
**kwargs: keyword arguments to be passed.
"""
super
(
SelfGating
,
self
).
__init__
(
**
kwargs
)
self
.
_filters
=
filters
def
build
(
self
,
input_shape
):
self
.
_spatial_temporal_average
=
tf
.
keras
.
layers
.
GlobalAveragePooling3D
()
# No BN and activation after conv.
self
.
_transformer_w
=
tf
.
keras
.
layers
.
Conv3D
(
filters
=
self
.
_filters
,
kernel_size
=
[
1
,
1
,
1
],
use_bias
=
True
,
kernel_initializer
=
tf
.
keras
.
initializers
.
TruncatedNormal
(
mean
=
0.0
,
stddev
=
0.01
))
super
(
SelfGating
,
self
).
build
(
input_shape
)
def
call
(
self
,
inputs
):
x
=
self
.
_spatial_temporal_average
(
inputs
)
x
=
tf
.
expand_dims
(
x
,
1
)
x
=
tf
.
expand_dims
(
x
,
2
)
x
=
tf
.
expand_dims
(
x
,
3
)
x
=
self
.
_transformer_w
(
x
)
x
=
tf
.
nn
.
sigmoid
(
x
)
return
tf
.
math
.
multiply
(
x
,
inputs
)
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
BottleneckBlock3D
(
tf
.
keras
.
layers
.
Layer
):
"""A 3D bottleneck block."""
def
__init__
(
self
,
filters
,
temporal_kernel_size
,
temporal_strides
,
spatial_strides
,
use_self_gating
=
False
,
kernel_initializer
=
'VarianceScaling'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
activation
=
'relu'
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
**
kwargs
):
"""A 3D bottleneck block with BN after convolutions.
Args:
filters: `int` number of filters for the first two convolutions. Note that
the third and final convolution will use 4 times as many filters.
temporal_kernel_size: `int` kernel size for the temporal convolutional
layer.
temporal_strides: `int` temporal stride for the temporal convolutional
layer.
spatial_strides: `int` spatial stride for the spatial convolutional layer.
use_self_gating: `bool` apply self-gating module or not.
kernel_initializer: kernel_initializer for convolutional layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
Default to None.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
Default to None.
activation: `str` name of the activation function.
use_sync_bn: if True, use synchronized batch normalization.
norm_momentum: `float` normalization omentum for the moving average.
norm_epsilon: `float` small float added to variance to avoid dividing by
zero.
**kwargs: keyword arguments to be passed.
"""
super
(
BottleneckBlock3D
,
self
).
__init__
(
**
kwargs
)
self
.
_filters
=
filters
self
.
_temporal_kernel_size
=
temporal_kernel_size
self
.
_spatial_strides
=
spatial_strides
self
.
_temporal_strides
=
temporal_strides
self
.
_use_self_gating
=
use_self_gating
self
.
_use_sync_bn
=
use_sync_bn
self
.
_activation
=
activation
self
.
_kernel_initializer
=
kernel_initializer
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_bias_regularizer
=
bias_regularizer
if
use_sync_bn
:
self
.
_norm
=
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
else
:
self
.
_norm
=
tf
.
keras
.
layers
.
BatchNormalization
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_bn_axis
=
-
1
else
:
self
.
_bn_axis
=
1
self
.
_activation_fn
=
tf_utils
.
get_activation
(
activation
)
def
build
(
self
,
input_shape
):
self
.
_shortcut_maxpool
=
tf
.
keras
.
layers
.
MaxPool3D
(
pool_size
=
[
1
,
1
,
1
],
strides
=
[
self
.
_temporal_strides
,
self
.
_spatial_strides
,
self
.
_spatial_strides
])
self
.
_shortcut_conv
=
tf
.
keras
.
layers
.
Conv3D
(
filters
=
4
*
self
.
_filters
,
kernel_size
=
1
,
strides
=
[
self
.
_temporal_strides
,
self
.
_spatial_strides
,
self
.
_spatial_strides
],
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm0
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
self
.
_temporal_conv
=
tf
.
keras
.
layers
.
Conv3D
(
filters
=
self
.
_filters
,
kernel_size
=
[
self
.
_temporal_kernel_size
,
1
,
1
],
strides
=
[
self
.
_temporal_strides
,
1
,
1
],
padding
=
'same'
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm1
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
self
.
_spatial_conv
=
tf
.
keras
.
layers
.
Conv3D
(
filters
=
self
.
_filters
,
kernel_size
=
[
1
,
3
,
3
],
strides
=
[
1
,
self
.
_spatial_strides
,
self
.
_spatial_strides
],
padding
=
'same'
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm2
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
self
.
_expand_conv
=
tf
.
keras
.
layers
.
Conv3D
(
filters
=
4
*
self
.
_filters
,
kernel_size
=
[
1
,
1
,
1
],
strides
=
[
1
,
1
,
1
],
padding
=
'same'
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm3
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
if
self
.
_use_self_gating
:
self
.
_self_gating
=
SelfGating
(
filters
=
4
*
self
.
_filters
)
else
:
self
.
_self_gating
=
None
super
(
BottleneckBlock3D
,
self
).
build
(
input_shape
)
def
get_config
(
self
):
config
=
{
'filters'
:
self
.
_filters
,
'temporal_kernel_size'
:
self
.
_temporal_kernel_size
,
'temporal_strides'
:
self
.
_temporal_strides
,
'spatial_strides'
:
self
.
_spatial_strides
,
'use_projection'
:
self
.
_use_projection
,
'kernel_initializer'
:
self
.
_kernel_initializer
,
'kernel_regularizer'
:
self
.
_kernel_regularizer
,
'bias_regularizer'
:
self
.
_bias_regularizer
,
'activation'
:
self
.
_activation
,
'use_sync_bn'
:
self
.
_use_sync_bn
,
'norm_momentum'
:
self
.
_norm_momentum
,
'norm_epsilon'
:
self
.
_norm_epsilon
}
base_config
=
super
(
BottleneckBlock3D
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
call
(
self
,
inputs
,
training
=
None
):
in_filters
=
inputs
.
shape
.
as_list
()[
-
1
]
if
in_filters
==
4
*
self
.
_filters
:
if
self
.
_temporal_strides
==
1
and
self
.
_spatial_strides
==
1
:
shortcut
=
inputs
else
:
shortcut
=
self
.
_shortcut_maxpool
(
inputs
)
else
:
shortcut
=
self
.
_shortcut_conv
(
inputs
)
shortcut
=
self
.
_norm0
(
shortcut
)
x
=
self
.
_temporal_conv
(
inputs
)
x
=
self
.
_norm1
(
x
)
x
=
self
.
_activation_fn
(
x
)
x
=
self
.
_spatial_conv
(
x
)
x
=
self
.
_norm2
(
x
)
x
=
self
.
_activation_fn
(
x
)
x
=
self
.
_expand_conv
(
x
)
x
=
self
.
_norm3
(
x
)
# Apply activation before additional modules.
x
=
self
.
_activation_fn
(
x
+
shortcut
)
if
self
.
_self_gating
:
x
=
self
.
_self_gating
(
x
)
return
x
official/vision/beta/modeling/layers/nn_blocks_3d_test.py
0 → 100644
View file @
cc748b2a
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for resnet."""
# Import libraries
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
official.vision.beta.modeling.layers
import
nn_blocks_3d
class
NNBlocksTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
nn_blocks_3d
.
BottleneckBlock3D
,
1
,
1
,
2
,
True
),
(
nn_blocks_3d
.
BottleneckBlock3D
,
3
,
2
,
1
,
False
),
)
def
test_bottleneck_block_creation
(
self
,
block_fn
,
temporal_kernel_size
,
temporal_strides
,
spatial_strides
,
use_self_gating
):
temporal_size
=
16
spatial_size
=
128
filters
=
256
inputs
=
tf
.
keras
.
Input
(
shape
=
(
temporal_size
,
spatial_size
,
spatial_size
,
filters
*
4
),
batch_size
=
1
)
block
=
block_fn
(
filters
=
filters
,
temporal_kernel_size
=
temporal_kernel_size
,
temporal_strides
=
temporal_strides
,
spatial_strides
=
spatial_strides
,
use_self_gating
=
use_self_gating
)
features
=
block
(
inputs
)
self
.
assertAllEqual
([
1
,
temporal_size
//
temporal_strides
,
spatial_size
//
spatial_strides
,
spatial_size
//
spatial_strides
,
filters
*
4
],
features
.
shape
.
as_list
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/beta/modeling/layers/nn_blocks_test.py
0 → 100644
View file @
cc748b2a
# Lint as: python3
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for nn_blocks."""
from
typing
import
Any
,
Iterable
,
Tuple
# Import libraries
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
tensorflow.python.distribute
import
combinations
from
tensorflow.python.distribute
import
strategy_combinations
from
official.vision.beta.modeling.layers
import
nn_blocks
def
distribution_strategy_combinations
()
->
Iterable
[
Tuple
[
Any
,
...]]:
"""Returns the combinations of end-to-end tests to run."""
return
combinations
.
combine
(
distribution
=
[
strategy_combinations
.
default_strategy
,
strategy_combinations
.
tpu_strategy
,
strategy_combinations
.
one_device_strategy_gpu
,
],
)
class
NNBlocksTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
nn_blocks
.
ResidualBlock
,
1
,
False
,
0.0
),
(
nn_blocks
.
ResidualBlock
,
2
,
True
,
0.2
),
)
def
test_residual_block_creation
(
self
,
block_fn
,
strides
,
use_projection
,
stochastic_depth_drop_rate
):
input_size
=
128
filter_size
=
256
inputs
=
tf
.
keras
.
Input
(
shape
=
(
input_size
,
input_size
,
filter_size
),
batch_size
=
1
)
block
=
block_fn
(
filter_size
,
strides
,
use_projection
=
use_projection
,
stochastic_depth_drop_rate
=
stochastic_depth_drop_rate
,
)
features
=
block
(
inputs
)
self
.
assertAllEqual
(
[
1
,
input_size
//
strides
,
input_size
//
strides
,
filter_size
],
features
.
shape
.
as_list
())
@
parameterized
.
parameters
(
(
nn_blocks
.
BottleneckBlock
,
1
,
False
,
0.0
),
(
nn_blocks
.
BottleneckBlock
,
2
,
True
,
0.2
),
)
def
test_bottleneck_block_creation
(
self
,
block_fn
,
strides
,
use_projection
,
stochastic_depth_drop_rate
):
input_size
=
128
filter_size
=
256
inputs
=
tf
.
keras
.
Input
(
shape
=
(
input_size
,
input_size
,
filter_size
*
4
),
batch_size
=
1
)
block
=
block_fn
(
filter_size
,
strides
,
use_projection
=
use_projection
,
stochastic_depth_drop_rate
=
stochastic_depth_drop_rate
)
features
=
block
(
inputs
)
self
.
assertAllEqual
(
[
1
,
input_size
//
strides
,
input_size
//
strides
,
filter_size
*
4
],
features
.
shape
.
as_list
())
@
parameterized
.
parameters
(
(
nn_blocks
.
InvertedBottleneckBlock
,
1
,
1
,
None
,
None
),
(
nn_blocks
.
InvertedBottleneckBlock
,
6
,
1
,
None
,
None
),
(
nn_blocks
.
InvertedBottleneckBlock
,
1
,
2
,
None
,
None
),
(
nn_blocks
.
InvertedBottleneckBlock
,
1
,
1
,
0.2
,
None
),
(
nn_blocks
.
InvertedBottleneckBlock
,
1
,
1
,
None
,
0.2
),
)
def
test_invertedbottleneck_block_creation
(
self
,
block_fn
,
expand_ratio
,
strides
,
se_ratio
,
stochastic_depth_drop_rate
):
input_size
=
128
in_filters
=
24
out_filters
=
40
inputs
=
tf
.
keras
.
Input
(
shape
=
(
input_size
,
input_size
,
in_filters
),
batch_size
=
1
)
block
=
block_fn
(
in_filters
=
in_filters
,
out_filters
=
out_filters
,
expand_ratio
=
expand_ratio
,
strides
=
strides
,
se_ratio
=
se_ratio
,
stochastic_depth_drop_rate
=
stochastic_depth_drop_rate
)
features
=
block
(
inputs
)
self
.
assertAllEqual
(
[
1
,
input_size
//
strides
,
input_size
//
strides
,
out_filters
],
features
.
shape
.
as_list
())
class
ResidualInnerTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
combinations
.
generate
(
distribution_strategy_combinations
())
def
test_shape
(
self
,
distribution
):
bsz
,
h
,
w
,
c
=
8
,
32
,
32
,
32
filters
=
64
strides
=
2
input_tensor
=
tf
.
random
.
uniform
(
shape
=
[
bsz
,
h
,
w
,
c
])
with
distribution
.
scope
():
test_layer
=
nn_blocks
.
ResidualInner
(
filters
,
strides
)
output
=
test_layer
(
input_tensor
)
expected_output_shape
=
[
bsz
,
h
//
strides
,
w
//
strides
,
filters
]
self
.
assertEqual
(
expected_output_shape
,
output
.
shape
.
as_list
())
class
BottleneckResidualInnerTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
combinations
.
generate
(
distribution_strategy_combinations
())
def
test_shape
(
self
,
distribution
):
bsz
,
h
,
w
,
c
=
8
,
32
,
32
,
32
filters
=
64
strides
=
2
input_tensor
=
tf
.
random
.
uniform
(
shape
=
[
bsz
,
h
,
w
,
c
])
with
distribution
.
scope
():
test_layer
=
nn_blocks
.
BottleneckResidualInner
(
filters
,
strides
)
output
=
test_layer
(
input_tensor
)
expected_output_shape
=
[
bsz
,
h
//
strides
,
w
//
strides
,
filters
*
4
]
self
.
assertEqual
(
expected_output_shape
,
output
.
shape
.
as_list
())
class
ReversibleLayerTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
combinations
.
generate
(
distribution_strategy_combinations
())
def
test_downsampling_non_reversible_step
(
self
,
distribution
):
bsz
,
h
,
w
,
c
=
8
,
32
,
32
,
32
filters
=
64
strides
=
2
input_tensor
=
tf
.
random
.
uniform
(
shape
=
[
bsz
,
h
,
w
,
c
])
with
distribution
.
scope
():
f
=
nn_blocks
.
ResidualInner
(
filters
=
filters
//
2
,
strides
=
strides
,
batch_norm_first
=
True
)
g
=
nn_blocks
.
ResidualInner
(
filters
=
filters
//
2
,
strides
=
1
,
batch_norm_first
=
True
)
test_layer
=
nn_blocks
.
ReversibleLayer
(
f
,
g
)
test_layer
.
build
(
input_tensor
.
shape
)
optimizer
=
tf
.
keras
.
optimizers
.
SGD
(
learning_rate
=
0.01
)
@
tf
.
function
def
step_fn
():
with
tf
.
GradientTape
()
as
tape
:
output
=
test_layer
(
input_tensor
,
training
=
True
)
grads
=
tape
.
gradient
(
output
,
test_layer
.
trainable_variables
)
# Test applying gradients with optimizer works
optimizer
.
apply_gradients
(
zip
(
grads
,
test_layer
.
trainable_variables
))
return
output
replica_output
=
distribution
.
run
(
step_fn
)
outputs
=
distribution
.
experimental_local_results
(
replica_output
)
# Assert forward pass shape
expected_output_shape
=
[
bsz
,
h
//
strides
,
w
//
strides
,
filters
]
for
output
in
outputs
:
self
.
assertEqual
(
expected_output_shape
,
output
.
shape
.
as_list
())
@
combinations
.
generate
(
distribution_strategy_combinations
())
def
test_reversible_step
(
self
,
distribution
):
# Reversible layers satisfy: (a) strides = 1 (b) in_filter = out_filter
bsz
,
h
,
w
,
c
=
8
,
32
,
32
,
32
filters
=
c
strides
=
1
input_tensor
=
tf
.
random
.
uniform
(
shape
=
[
bsz
,
h
,
w
,
c
])
with
distribution
.
scope
():
f
=
nn_blocks
.
ResidualInner
(
filters
=
filters
//
2
,
strides
=
strides
,
batch_norm_first
=
False
)
g
=
nn_blocks
.
ResidualInner
(
filters
=
filters
//
2
,
strides
=
1
,
batch_norm_first
=
False
)
test_layer
=
nn_blocks
.
ReversibleLayer
(
f
,
g
)
test_layer
(
input_tensor
,
training
=
False
)
# init weights
optimizer
=
tf
.
keras
.
optimizers
.
SGD
(
learning_rate
=
0.01
)
@
tf
.
function
def
step_fn
():
with
tf
.
GradientTape
()
as
tape
:
output
=
test_layer
(
input_tensor
,
training
=
True
)
grads
=
tape
.
gradient
(
output
,
test_layer
.
trainable_variables
)
# Test applying gradients with optimizer works
optimizer
.
apply_gradients
(
zip
(
grads
,
test_layer
.
trainable_variables
))
return
output
@
tf
.
function
def
fwd
():
test_layer
(
input_tensor
)
distribution
.
run
(
fwd
)
# Initialize variables
prev_variables
=
tf
.
identity_n
(
test_layer
.
trainable_variables
)
replica_output
=
distribution
.
run
(
step_fn
)
outputs
=
distribution
.
experimental_local_results
(
replica_output
)
# Assert variables values have changed values
for
v0
,
v1
in
zip
(
prev_variables
,
test_layer
.
trainable_variables
):
self
.
assertNotAllEqual
(
v0
,
v1
)
# Assert forward pass shape
expected_output_shape
=
[
bsz
,
h
//
strides
,
w
//
strides
,
filters
]
for
output
in
outputs
:
self
.
assertEqual
(
expected_output_shape
,
output
.
shape
.
as_list
())
@
combinations
.
generate
(
distribution_strategy_combinations
())
def
test_manual_gradients_correctness
(
self
,
distribution
):
bsz
,
h
,
w
,
c
=
8
,
32
,
32
,
32
filters
=
c
strides
=
1
input_tensor
=
tf
.
random
.
uniform
(
shape
=
[
bsz
,
h
,
w
,
c
*
4
])
# bottleneck
with
distribution
.
scope
():
f_manual
=
nn_blocks
.
BottleneckResidualInner
(
filters
=
filters
//
2
,
strides
=
strides
,
batch_norm_first
=
False
)
g_manual
=
nn_blocks
.
BottleneckResidualInner
(
filters
=
filters
//
2
,
strides
=
1
,
batch_norm_first
=
False
)
manual_grad_layer
=
nn_blocks
.
ReversibleLayer
(
f_manual
,
g_manual
)
manual_grad_layer
(
input_tensor
,
training
=
False
)
# init weights
f_auto
=
nn_blocks
.
BottleneckResidualInner
(
filters
=
filters
//
2
,
strides
=
strides
,
batch_norm_first
=
False
)
g_auto
=
nn_blocks
.
BottleneckResidualInner
(
filters
=
filters
//
2
,
strides
=
1
,
batch_norm_first
=
False
)
auto_grad_layer
=
nn_blocks
.
ReversibleLayer
(
f_auto
,
g_auto
,
manual_grads
=
False
)
auto_grad_layer
(
input_tensor
)
# init weights
# Clone all weights (tf.keras.layers.Layer has no .clone())
auto_grad_layer
.
_f
.
set_weights
(
manual_grad_layer
.
_f
.
get_weights
())
auto_grad_layer
.
_g
.
set_weights
(
manual_grad_layer
.
_g
.
get_weights
())
@
tf
.
function
def
manual_fn
():
with
tf
.
GradientTape
()
as
tape
:
output
=
manual_grad_layer
(
input_tensor
,
training
=
True
)
grads
=
tape
.
gradient
(
output
,
manual_grad_layer
.
trainable_variables
)
return
grads
@
tf
.
function
def
auto_fn
():
with
tf
.
GradientTape
()
as
tape
:
output
=
auto_grad_layer
(
input_tensor
,
training
=
True
)
grads
=
tape
.
gradient
(
output
,
auto_grad_layer
.
trainable_variables
)
return
grads
manual_grads
=
distribution
.
run
(
manual_fn
)
auto_grads
=
distribution
.
run
(
auto_fn
)
# Assert gradients calculated manually are close to that from autograd
for
manual_grad
,
auto_grad
in
zip
(
manual_grads
,
auto_grads
):
self
.
assertAllClose
(
distribution
.
experimental_local_results
(
manual_grad
),
distribution
.
experimental_local_results
(
auto_grad
),
atol
=
5e-3
,
rtol
=
5e-3
)
# Verify that BN moving mean and variance is correct.
for
manual_var
,
auto_var
in
zip
(
manual_grad_layer
.
non_trainable_variables
,
auto_grad_layer
.
non_trainable_variables
):
self
.
assertAllClose
(
manual_var
,
auto_var
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/beta/modeling/layers/nn_layers.py
0 → 100644
View file @
cc748b2a
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains common building blocks for neural networks."""
# Import libraries
import
tensorflow
as
tf
from
official.modeling
import
tf_utils
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
SqueezeExcitation
(
tf
.
keras
.
layers
.
Layer
):
"""Squeeze and excitation layer."""
def
__init__
(
self
,
in_filters
,
se_ratio
,
expand_ratio
,
kernel_initializer
=
'VarianceScaling'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
activation
=
'relu'
,
**
kwargs
):
"""Implementation for squeeze and excitation.
Args:
in_filters: `int` number of filters of the input tensor.
se_ratio: `float` or None. If not None, se ratio for the squeeze and
excitation layer.
expand_ratio: `int` expand_ratio for a MBConv block.
kernel_initializer: kernel_initializer for convolutional layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
Default to None.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
Default to None.
activation: `str` name of the activation function.
**kwargs: keyword arguments to be passed.
"""
super
(
SqueezeExcitation
,
self
).
__init__
(
**
kwargs
)
self
.
_in_filters
=
in_filters
self
.
_se_ratio
=
se_ratio
self
.
_expand_ratio
=
expand_ratio
self
.
_activation
=
activation
self
.
_kernel_initializer
=
kernel_initializer
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_bias_regularizer
=
bias_regularizer
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_spatial_axis
=
[
1
,
2
]
else
:
self
.
_spatial_axis
=
[
2
,
3
]
self
.
_activation_fn
=
tf_utils
.
get_activation
(
activation
)
def
build
(
self
,
input_shape
):
num_reduced_filters
=
max
(
1
,
int
(
self
.
_in_filters
*
self
.
_se_ratio
))
self
.
_se_reduce
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
num_reduced_filters
,
kernel_size
=
1
,
strides
=
1
,
padding
=
'same'
,
use_bias
=
True
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_se_expand
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_in_filters
*
self
.
_expand_ratio
,
kernel_size
=
1
,
strides
=
1
,
padding
=
'same'
,
use_bias
=
True
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
super
(
SqueezeExcitation
,
self
).
build
(
input_shape
)
def
get_config
(
self
):
config
=
{
'in_filters'
:
self
.
_in_filters
,
'se_ratio'
:
self
.
_se_ratio
,
'expand_ratio'
:
self
.
_expand_ratio
,
'strides'
:
self
.
_strides
,
'kernel_initializer'
:
self
.
_kernel_initializer
,
'kernel_regularizer'
:
self
.
_kernel_regularizer
,
'bias_regularizer'
:
self
.
_bias_regularizer
,
'activation'
:
self
.
_activation
,
}
base_config
=
super
(
SqueezeExcitation
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
call
(
self
,
inputs
):
x
=
tf
.
reduce_mean
(
inputs
,
self
.
_spatial_axis
,
keepdims
=
True
)
x
=
self
.
_se_expand
(
self
.
_activation_fn
(
self
.
_se_reduce
(
x
)))
return
tf
.
sigmoid
(
x
)
*
inputs
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
StochasticDepth
(
tf
.
keras
.
layers
.
Layer
):
"""Stochastic depth layer."""
def
__init__
(
self
,
stochastic_depth_drop_rate
,
**
kwargs
):
"""Initialize stochastic depth.
Args:
stochastic_depth_drop_rate: `float` drop rate.
**kwargs: keyword arguments to be passed.
Returns:
A output tensor, which should have the same shape as input.
"""
super
(
StochasticDepth
,
self
).
__init__
(
**
kwargs
)
self
.
_drop_rate
=
stochastic_depth_drop_rate
def
get_config
(
self
):
config
=
{
'drop_rate'
:
self
.
_drop_rate
}
base_config
=
super
(
StochasticDepth
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
call
(
self
,
inputs
,
training
=
None
):
if
training
is
None
:
is_training
=
tf
.
keras
.
backend
.
learning_phase
()
if
not
is_training
or
self
.
_drop_rate
is
None
or
self
.
_drop_rate
==
0
:
return
inputs
keep_prob
=
1.0
-
self
.
_drop_rate
batch_size
=
tf
.
shape
(
inputs
)[
0
]
random_tensor
=
keep_prob
random_tensor
+=
tf
.
random
.
uniform
(
[
batch_size
,
1
,
1
,
1
],
dtype
=
inputs
.
dtype
)
binary_tensor
=
tf
.
floor
(
random_tensor
)
output
=
tf
.
math
.
divide
(
inputs
,
keep_prob
)
*
binary_tensor
return
output
official/vision/beta/modeling/layers/roi_aligner.py
0 → 100644
View file @
cc748b2a
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""ROI align."""
import
tensorflow
as
tf
from
official.vision.beta.ops
import
spatial_transform_ops
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
MultilevelROIAligner
(
tf
.
keras
.
layers
.
Layer
):
"""Performs ROIAlign for the second stage processing."""
def
__init__
(
self
,
crop_size
=
7
,
sample_offset
=
0.5
,
**
kwargs
):
"""Initializes a ROI aligner.
Args:
crop_size: int, the output size of the cropped features.
sample_offset: float in [0, 1], the subpixel sample offset.
**kwargs: other key word arguments passed to Layer.
"""
self
.
_config_dict
=
{
'crop_size'
:
crop_size
,
'sample_offset'
:
sample_offset
,
}
super
(
MultilevelROIAligner
,
self
).
__init__
(
**
kwargs
)
def
call
(
self
,
features
,
boxes
,
training
=
None
):
"""Generates ROIs.
Args:
features: A dictionary with key as pyramid level and value as features.
The features are in shape of
[batch_size, height_l, width_l, num_filters].
boxes: A 3-D Tensor of shape [batch_size, num_boxes, 4]. Each row
represents a box with [y1, x1, y2, x2] in un-normalized coordinates.
from grid point.
training: bool, whether it is in training mode.
Returns:
roi_features: A 5-D tensor representing feature crop of shape
[batch_size, num_boxes, crop_size, crop_size, num_filters].
"""
roi_features
=
spatial_transform_ops
.
multilevel_crop_and_resize
(
features
,
boxes
,
output_size
=
self
.
_config_dict
[
'crop_size'
],
sample_offset
=
self
.
_config_dict
[
'sample_offset'
])
return
roi_features
def
get_config
(
self
):
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
,
custom_objects
=
None
):
return
cls
(
**
config
)
official/vision/beta/modeling/layers/roi_aligner_test.py
0 → 100644
View file @
cc748b2a
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for roi_aligner.py."""
# Import libraries
import
tensorflow
as
tf
from
official.vision.beta.modeling.layers
import
roi_aligner
class
MultilevelROIAlignerTest
(
tf
.
test
.
TestCase
):
def
test_serialize_deserialize
(
self
):
kwargs
=
dict
(
crop_size
=
7
,
sample_offset
=
0.5
,
)
aligner
=
roi_aligner
.
MultilevelROIAligner
(
**
kwargs
)
expected_config
=
dict
(
kwargs
)
self
.
assertEqual
(
aligner
.
get_config
(),
expected_config
)
new_aligner
=
roi_aligner
.
MultilevelROIAligner
.
from_config
(
aligner
.
get_config
())
self
.
assertAllEqual
(
aligner
.
get_config
(),
new_aligner
.
get_config
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/beta/modeling/layers/roi_generator.py
0 → 100644
View file @
cc748b2a
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""ROI generator."""
# Import libraries
import
tensorflow
as
tf
from
official.vision.beta.ops
import
box_ops
from
official.vision.beta.ops
import
nms
def
_multilevel_propose_rois
(
raw_boxes
,
raw_scores
,
anchor_boxes
,
image_shape
,
pre_nms_top_k
=
2000
,
pre_nms_score_threshold
=
0.0
,
pre_nms_min_size_threshold
=
0.0
,
nms_iou_threshold
=
0.7
,
num_proposals
=
1000
,
use_batched_nms
=
False
,
decode_boxes
=
True
,
clip_boxes
=
True
,
apply_sigmoid_to_score
=
True
):
"""Proposes RoIs given a group of candidates from different FPN levels.
The following describes the steps:
1. For each individual level:
a. Apply sigmoid transform if specified.
b. Decode boxes if specified.
c. Clip boxes if specified.
d. Filter small boxes and those fall outside image if specified.
e. Apply pre-NMS filtering including pre-NMS top k and score thresholding.
f. Apply NMS.
2. Aggregate post-NMS boxes from each level.
3. Apply an overall top k to generate the final selected RoIs.
Args:
raw_boxes: a dict with keys representing FPN levels and values representing
box tenors of shape [batch_size, feature_h, feature_w, num_anchors * 4].
raw_scores: a dict with keys representing FPN levels and values representing
logit tensors of shape [batch_size, feature_h, feature_w, num_anchors].
anchor_boxes: a dict with keys representing FPN levels and values
representing anchor box tensors of shape
[batch_size, feature_h * feature_w * num_anchors, 4].
image_shape: a tensor of shape [batch_size, 2] where the last dimension are
[height, width] of the scaled image.
pre_nms_top_k: an integer of top scoring RPN proposals *per level* to
keep before applying NMS. Default: 2000.
pre_nms_score_threshold: a float between 0 and 1 representing the minimal
box score to keep before applying NMS. This is often used as a
pre-filtering step for better performance. Default: 0, no filtering is
applied.
pre_nms_min_size_threshold: a float representing the minimal box size in
each side (w.r.t. the scaled image) to keep before applying NMS. This is
often used as a pre-filtering step for better performance. Default: 0, no
filtering is applied.
nms_iou_threshold: a float between 0 and 1 representing the IoU threshold
used for NMS. If 0.0, no NMS is applied. Default: 0.7.
num_proposals: an integer of top scoring RPN proposals *in total* to
keep after applying NMS. Default: 1000.
use_batched_nms: a boolean indicating whether NMS is applied in batch using
`tf.image.combined_non_max_suppression`. Currently only available in
CPU/GPU. Default: False.
decode_boxes: a boolean indicating whether `raw_boxes` needs to be decoded
using `anchor_boxes`. If False, use `raw_boxes` directly and ignore
`anchor_boxes`. Default: True.
clip_boxes: a boolean indicating whether boxes are first clipped to the
scaled image size before appliying NMS. If False, no clipping is applied
and `image_shape` is ignored. Default: True.
apply_sigmoid_to_score: a boolean indicating whether apply sigmoid to
`raw_scores` before applying NMS. Default: True.
Returns:
selected_rois: a tensor of shape [batch_size, num_proposals, 4],
representing the box coordinates of the selected proposals w.r.t. the
scaled image.
selected_roi_scores: a tensor of shape [batch_size, num_proposals, 1],
representing the scores of the selected proposals.
"""
with
tf
.
name_scope
(
'multilevel_propose_rois'
):
rois
=
[]
roi_scores
=
[]
image_shape
=
tf
.
expand_dims
(
image_shape
,
axis
=
1
)
for
level
in
sorted
(
raw_scores
.
keys
()):
with
tf
.
name_scope
(
'level_%d'
%
level
):
_
,
feature_h
,
feature_w
,
num_anchors_per_location
=
(
raw_scores
[
level
].
get_shape
().
as_list
())
num_boxes
=
feature_h
*
feature_w
*
num_anchors_per_location
this_level_scores
=
tf
.
reshape
(
raw_scores
[
level
],
[
-
1
,
num_boxes
])
this_level_boxes
=
tf
.
reshape
(
raw_boxes
[
level
],
[
-
1
,
num_boxes
,
4
])
this_level_anchors
=
tf
.
cast
(
tf
.
reshape
(
anchor_boxes
[
level
],
[
-
1
,
num_boxes
,
4
]),
dtype
=
this_level_scores
.
dtype
)
if
apply_sigmoid_to_score
:
this_level_scores
=
tf
.
sigmoid
(
this_level_scores
)
if
decode_boxes
:
this_level_boxes
=
box_ops
.
decode_boxes
(
this_level_boxes
,
this_level_anchors
)
if
clip_boxes
:
this_level_boxes
=
box_ops
.
clip_boxes
(
this_level_boxes
,
image_shape
)
if
pre_nms_min_size_threshold
>
0.0
:
this_level_boxes
,
this_level_scores
=
box_ops
.
filter_boxes
(
this_level_boxes
,
this_level_scores
,
image_shape
,
pre_nms_min_size_threshold
)
this_level_pre_nms_top_k
=
min
(
num_boxes
,
pre_nms_top_k
)
this_level_post_nms_top_k
=
min
(
num_boxes
,
num_proposals
)
if
nms_iou_threshold
>
0.0
:
if
use_batched_nms
:
this_level_rois
,
this_level_roi_scores
,
_
,
_
=
(
tf
.
image
.
combined_non_max_suppression
(
tf
.
expand_dims
(
this_level_boxes
,
axis
=
2
),
tf
.
expand_dims
(
this_level_scores
,
axis
=-
1
),
max_output_size_per_class
=
this_level_pre_nms_top_k
,
max_total_size
=
this_level_post_nms_top_k
,
iou_threshold
=
nms_iou_threshold
,
score_threshold
=
pre_nms_score_threshold
,
pad_per_class
=
False
,
clip_boxes
=
False
))
else
:
if
pre_nms_score_threshold
>
0.0
:
this_level_boxes
,
this_level_scores
=
(
box_ops
.
filter_boxes_by_scores
(
this_level_boxes
,
this_level_scores
,
pre_nms_score_threshold
))
this_level_boxes
,
this_level_scores
=
box_ops
.
top_k_boxes
(
this_level_boxes
,
this_level_scores
,
k
=
this_level_pre_nms_top_k
)
this_level_roi_scores
,
this_level_rois
=
(
nms
.
sorted_non_max_suppression_padded
(
this_level_scores
,
this_level_boxes
,
max_output_size
=
this_level_post_nms_top_k
,
iou_threshold
=
nms_iou_threshold
))
else
:
this_level_rois
,
this_level_roi_scores
=
box_ops
.
top_k_boxes
(
this_level_boxes
,
this_level_scores
,
k
=
this_level_post_nms_top_k
)
rois
.
append
(
this_level_rois
)
roi_scores
.
append
(
this_level_roi_scores
)
all_rois
=
tf
.
concat
(
rois
,
axis
=
1
)
all_roi_scores
=
tf
.
concat
(
roi_scores
,
axis
=
1
)
with
tf
.
name_scope
(
'top_k_rois'
):
_
,
num_valid_rois
=
all_roi_scores
.
get_shape
().
as_list
()
overall_top_k
=
min
(
num_valid_rois
,
num_proposals
)
selected_rois
,
selected_roi_scores
=
box_ops
.
top_k_boxes
(
all_rois
,
all_roi_scores
,
k
=
overall_top_k
)
return
selected_rois
,
selected_roi_scores
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
MultilevelROIGenerator
(
tf
.
keras
.
layers
.
Layer
):
"""Proposes RoIs for the second stage processing."""
def
__init__
(
self
,
pre_nms_top_k
=
2000
,
pre_nms_score_threshold
=
0.0
,
pre_nms_min_size_threshold
=
0.0
,
nms_iou_threshold
=
0.7
,
num_proposals
=
1000
,
test_pre_nms_top_k
=
1000
,
test_pre_nms_score_threshold
=
0.0
,
test_pre_nms_min_size_threshold
=
0.0
,
test_nms_iou_threshold
=
0.7
,
test_num_proposals
=
1000
,
use_batched_nms
=
False
,
**
kwargs
):
"""Initializes a ROI generator.
The ROI generator transforms the raw predictions from RPN to ROIs.
Args:
pre_nms_top_k: int, the number of top scores proposals to be kept before
applying NMS.
pre_nms_score_threshold: float, the score threshold to apply before
applying NMS. Proposals whose scores are below this threshold are
thrown away.
pre_nms_min_size_threshold: float, the threshold of each side of the box
(w.r.t. the scaled image). Proposals whose sides are below this
threshold are thrown away.
nms_iou_threshold: float in [0, 1], the NMS IoU threshold.
num_proposals: int, the final number of proposals to generate.
test_pre_nms_top_k: int, the number of top scores proposals to be kept
before applying NMS in testing.
test_pre_nms_score_threshold: float, the score threshold to apply before
applying NMS in testing. Proposals whose scores are below this threshold
are thrown away.
test_pre_nms_min_size_threshold: float, the threshold of each side of the
box (w.r.t. the scaled image) in testing. Proposals whose sides are
below this threshold are thrown away.
test_nms_iou_threshold: float in [0, 1], the NMS IoU threshold in testing.
test_num_proposals: int, the final number of proposals to generate in
testing.
use_batched_nms: bool, whether or not use
`tf.image.combined_non_max_suppression`.
**kwargs: other key word arguments passed to Layer.
"""
self
.
_config_dict
=
{
'pre_nms_top_k'
:
pre_nms_top_k
,
'pre_nms_score_threshold'
:
pre_nms_score_threshold
,
'pre_nms_min_size_threshold'
:
pre_nms_min_size_threshold
,
'nms_iou_threshold'
:
nms_iou_threshold
,
'num_proposals'
:
num_proposals
,
'test_pre_nms_top_k'
:
test_pre_nms_top_k
,
'test_pre_nms_score_threshold'
:
test_pre_nms_score_threshold
,
'test_pre_nms_min_size_threshold'
:
test_pre_nms_min_size_threshold
,
'test_nms_iou_threshold'
:
test_nms_iou_threshold
,
'test_num_proposals'
:
test_num_proposals
,
'use_batched_nms'
:
use_batched_nms
,
}
super
(
MultilevelROIGenerator
,
self
).
__init__
(
**
kwargs
)
def
call
(
self
,
raw_boxes
,
raw_scores
,
anchor_boxes
,
image_shape
,
training
=
None
):
"""Proposes RoIs given a group of candidates from different FPN levels.
The following describes the steps:
1. For each individual level:
a. Apply sigmoid transform if specified.
b. Decode boxes if specified.
c. Clip boxes if specified.
d. Filter small boxes and those fall outside image if specified.
e. Apply pre-NMS filtering including pre-NMS top k and score
thresholding.
f. Apply NMS.
2. Aggregate post-NMS boxes from each level.
3. Apply an overall top k to generate the final selected RoIs.
Args:
raw_boxes: a dict with keys representing FPN levels and values
representing box tenors of shape
[batch, feature_h, feature_w, num_anchors * 4].
raw_scores: a dict with keys representing FPN levels and values
representing logit tensors of shape
[batch, feature_h, feature_w, num_anchors].
anchor_boxes: a dict with keys representing FPN levels and values
representing anchor box tensors of shape
[batch, feature_h * feature_w * num_anchors, 4].
image_shape: a tensor of shape [batch, 2] where the last dimension are
[height, width] of the scaled image.
training: a bool indicat whether it is in training mode.
Returns:
roi_boxes: [batch, num_proposals, 4], the proposed ROIs in the scaled
image coordinate.
roi_scores: [batch, num_proposals], scores of the proposed ROIs.
"""
roi_boxes
,
roi_scores
=
_multilevel_propose_rois
(
raw_boxes
,
raw_scores
,
anchor_boxes
,
image_shape
,
pre_nms_top_k
=
(
self
.
_config_dict
[
'pre_nms_top_k'
]
if
training
else
self
.
_config_dict
[
'test_pre_nms_top_k'
]),
pre_nms_score_threshold
=
(
self
.
_config_dict
[
'pre_nms_score_threshold'
]
if
training
else
self
.
_config_dict
[
'test_pre_nms_score_threshold'
]),
pre_nms_min_size_threshold
=
(
self
.
_config_dict
[
'pre_nms_min_size_threshold'
]
if
training
else
self
.
_config_dict
[
'test_pre_nms_min_size_threshold'
]),
nms_iou_threshold
=
(
self
.
_config_dict
[
'nms_iou_threshold'
]
if
training
else
self
.
_config_dict
[
'test_nms_iou_threshold'
]),
num_proposals
=
(
self
.
_config_dict
[
'num_proposals'
]
if
training
else
self
.
_config_dict
[
'test_num_proposals'
]),
use_batched_nms
=
self
.
_config_dict
[
'use_batched_nms'
],
decode_boxes
=
True
,
clip_boxes
=
True
,
apply_sigmoid_to_score
=
True
)
return
roi_boxes
,
roi_scores
def
get_config
(
self
):
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
,
custom_objects
=
None
):
return
cls
(
**
config
)
official/vision/beta/modeling/layers/roi_generator_test.py
0 → 100644
View file @
cc748b2a
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for roi_generator.py."""
# Import libraries
import
numpy
as
np
import
tensorflow
as
tf
from
official.vision.beta.modeling.layers
import
roi_generator
class
MultilevelProposeRoisTest
(
tf
.
test
.
TestCase
):
def
test_multilevel_propose_rois_single_level
(
self
):
rpn_boxes_np
=
np
.
array
(
[[[[
0
,
0
,
10
,
10
],
[
0.01
,
0.01
,
9.9
,
9.9
]],
[[
5
,
5
,
10
,
10
],
[
2
,
2
,
8
,
8
]]],
[[[
2
,
2
,
4
,
4
],
[
3
,
3
,
6
,
6
]],
[[
3.1
,
3.1
,
6.1
,
6.1
],
[
1
,
1
,
8
,
8
]]]])
rpn_boxes
=
{
2
:
tf
.
constant
(
rpn_boxes_np
,
dtype
=
tf
.
float32
)
}
rpn_scores_np
=
np
.
array
(
[[[[
0.6
],
[
0.9
]],
[[
0.2
],
[
0.3
]]],
[[[
0.1
],
[
0.8
]],
[[
0.3
],
[
0.5
]]]])
rpn_scores
=
{
2
:
tf
.
constant
(
rpn_scores_np
,
dtype
=
tf
.
float32
)
}
anchor_boxes_np
=
np
.
array
(
[[[[
0
,
0
,
10
,
10
],
[
0.01
,
0.01
,
9.9
,
9.9
]],
[[
5
,
5
,
10
,
10
],
[
2
,
2
,
8
,
8
]]],
[[[
2
,
2
,
4
,
4
],
[
3
,
3
,
6
,
6
]],
[[
3.1
,
3.1
,
6.1
,
6.1
],
[
1
,
1
,
8
,
8
]]]])
anchor_boxes
=
{
2
:
tf
.
constant
(
anchor_boxes_np
,
dtype
=
tf
.
float32
)
}
image_shape
=
tf
.
constant
([[
20
,
20
],
[
20
,
20
]],
dtype
=
tf
.
int32
)
selected_rois_np
=
np
.
array
(
[[[
0.01
,
0.01
,
9.9
,
9.9
],
[
2
,
2
,
8
,
8
],
[
5
,
5
,
10
,
10
],
[
0
,
0
,
0
,
0
]],
[[
3
,
3
,
6
,
6
],
[
1
,
1
,
8
,
8
],
[
2
,
2
,
4
,
4
],
[
0
,
0
,
0
,
0
]]])
selected_roi_scores_np
=
np
.
array
(
[[
0.9
,
0.3
,
0.2
,
0
],
[
0.8
,
0.5
,
0.1
,
0
]])
# Runs on TPU.
strategy
=
tf
.
distribute
.
experimental
.
TPUStrategy
()
with
strategy
.
scope
():
selected_rois_tpu
,
selected_roi_scores_tpu
=
(
roi_generator
.
_multilevel_propose_rois
(
rpn_boxes
,
rpn_scores
,
anchor_boxes
=
anchor_boxes
,
image_shape
=
image_shape
,
pre_nms_top_k
=
4
,
pre_nms_score_threshold
=
0.0
,
pre_nms_min_size_threshold
=
0.0
,
nms_iou_threshold
=
0.5
,
num_proposals
=
4
,
use_batched_nms
=
False
,
decode_boxes
=
False
,
clip_boxes
=
False
,
apply_sigmoid_to_score
=
False
))
# Runs on CPU.
selected_rois_cpu
,
selected_roi_scores_cpu
=
(
roi_generator
.
_multilevel_propose_rois
(
rpn_boxes
,
rpn_scores
,
anchor_boxes
=
anchor_boxes
,
image_shape
=
image_shape
,
pre_nms_top_k
=
4
,
pre_nms_score_threshold
=
0.0
,
pre_nms_min_size_threshold
=
0.0
,
nms_iou_threshold
=
0.5
,
num_proposals
=
4
,
use_batched_nms
=
False
,
decode_boxes
=
False
,
clip_boxes
=
False
,
apply_sigmoid_to_score
=
False
))
self
.
assertNDArrayNear
(
selected_rois_tpu
.
numpy
(),
selected_rois_cpu
.
numpy
(),
1e-5
)
self
.
assertNDArrayNear
(
selected_roi_scores_tpu
.
numpy
(),
selected_roi_scores_cpu
.
numpy
(),
1e-5
)
self
.
assertNDArrayNear
(
selected_rois_tpu
.
numpy
(),
selected_rois_np
,
1e-5
)
self
.
assertNDArrayNear
(
selected_roi_scores_tpu
.
numpy
(),
selected_roi_scores_np
,
1e-5
)
def
test_multilevel_propose_rois_two_levels
(
self
):
rpn_boxes_1_np
=
np
.
array
(
[[[[
0
,
0
,
10
,
10
],
[
0.01
,
0.01
,
9.99
,
9.99
]],
[[
5
,
5
,
10
,
10
],
[
2
,
2
,
8
,
8
]]],
[[[
2
,
2
,
2.5
,
2.5
],
[
3
,
3
,
6
,
6
]],
[[
3.1
,
3.1
,
6.1
,
6.1
],
[
1
,
1
,
8
,
8
]]]])
rpn_boxes_2_np
=
np
.
array
(
[[[[
0
,
0
,
10.01
,
10.01
]]],
[[[
2
,
2
,
4.5
,
4.5
]]]])
rpn_boxes
=
{
2
:
tf
.
constant
(
rpn_boxes_1_np
,
dtype
=
tf
.
float32
),
3
:
tf
.
constant
(
rpn_boxes_2_np
,
dtype
=
tf
.
float32
),
}
rpn_scores_1_np
=
np
.
array
(
[[[[
0.6
],
[
0.9
]],
[[
0.2
],
[
0.3
]]],
[[[
0.1
],
[
0.8
]],
[[
0.3
],
[
0.5
]]]])
rpn_scores_2_np
=
np
.
array
([[[[
0.95
]]],
[[[
0.99
]]]])
rpn_scores
=
{
2
:
tf
.
constant
(
rpn_scores_1_np
,
dtype
=
tf
.
float32
),
3
:
tf
.
constant
(
rpn_scores_2_np
,
dtype
=
tf
.
float32
),
}
anchor_boxes_1_np
=
np
.
array
(
[[[[
0
,
0
,
10
,
10
],
[
0.01
,
0.01
,
9.99
,
9.99
]],
[[
5
,
5
,
10
,
10
],
[
2
,
2
,
8
,
8
]]],
[[[
2
,
2
,
2.5
,
2.5
],
[
3
,
3
,
6
,
6
]],
[[
3.1
,
3.1
,
6.1
,
6.1
],
[
1
,
1
,
8
,
8
]]]])
anchor_boxes_2_np
=
np
.
array
(
[[[[
0
,
0
,
10.01
,
10.01
]]],
[[[
2
,
2
,
4.5
,
4.5
]]]])
anchor_boxes
=
{
2
:
tf
.
constant
(
anchor_boxes_1_np
,
dtype
=
tf
.
float32
),
3
:
tf
.
constant
(
anchor_boxes_2_np
,
dtype
=
tf
.
float32
),
}
image_shape
=
tf
.
constant
([[
20
,
20
],
[
20
,
20
]],
dtype
=
tf
.
int32
)
selected_rois_np
=
np
.
array
(
[[[
0
,
0
,
10.01
,
10.01
],
[
0.01
,
0.01
,
9.99
,
9.99
]],
[[
2
,
2
,
4.5
,
4.5
],
[
3
,
3
,
6
,
6
]]])
selected_roi_scores_np
=
np
.
array
([[
0.95
,
0.9
],
[
0.99
,
0.8
]])
# Runs on TPU.
strategy
=
tf
.
distribute
.
experimental
.
TPUStrategy
()
with
strategy
.
scope
():
selected_rois_tpu
,
selected_roi_scores_tpu
=
(
roi_generator
.
_multilevel_propose_rois
(
rpn_boxes
,
rpn_scores
,
anchor_boxes
=
anchor_boxes
,
image_shape
=
image_shape
,
pre_nms_top_k
=
4
,
pre_nms_score_threshold
=
0.0
,
pre_nms_min_size_threshold
=
0.0
,
nms_iou_threshold
=
0.5
,
num_proposals
=
2
,
use_batched_nms
=
False
,
decode_boxes
=
False
,
clip_boxes
=
False
,
apply_sigmoid_to_score
=
False
))
# Runs on CPU.
selected_rois_cpu
,
selected_roi_scores_cpu
=
(
roi_generator
.
_multilevel_propose_rois
(
rpn_boxes
,
rpn_scores
,
anchor_boxes
=
anchor_boxes
,
image_shape
=
image_shape
,
pre_nms_top_k
=
4
,
pre_nms_score_threshold
=
0.0
,
pre_nms_min_size_threshold
=
0.0
,
nms_iou_threshold
=
0.5
,
num_proposals
=
2
,
use_batched_nms
=
False
,
decode_boxes
=
False
,
clip_boxes
=
False
,
apply_sigmoid_to_score
=
False
))
self
.
assertNDArrayNear
(
selected_rois_tpu
.
numpy
(),
selected_rois_cpu
.
numpy
(),
1e-5
)
self
.
assertNDArrayNear
(
selected_roi_scores_tpu
.
numpy
(),
selected_roi_scores_cpu
.
numpy
(),
1e-5
)
self
.
assertNDArrayNear
(
selected_rois_tpu
.
numpy
(),
selected_rois_np
,
1e-5
)
self
.
assertNDArrayNear
(
selected_roi_scores_tpu
.
numpy
(),
selected_roi_scores_np
,
1e-5
)
class
MultilevelROIGeneratorTest
(
tf
.
test
.
TestCase
):
def
test_serialize_deserialize
(
self
):
kwargs
=
dict
(
pre_nms_top_k
=
2000
,
pre_nms_score_threshold
=
0.0
,
pre_nms_min_size_threshold
=
0.0
,
nms_iou_threshold
=
0.7
,
num_proposals
=
1000
,
test_pre_nms_top_k
=
1000
,
test_pre_nms_score_threshold
=
0.0
,
test_pre_nms_min_size_threshold
=
0.0
,
test_nms_iou_threshold
=
0.7
,
test_num_proposals
=
1000
,
use_batched_nms
=
False
,
)
generator
=
roi_generator
.
MultilevelROIGenerator
(
**
kwargs
)
expected_config
=
dict
(
kwargs
)
self
.
assertEqual
(
generator
.
get_config
(),
expected_config
)
new_generator
=
roi_generator
.
MultilevelROIGenerator
.
from_config
(
generator
.
get_config
())
self
.
assertAllEqual
(
generator
.
get_config
(),
new_generator
.
get_config
())
official/vision/beta/modeling/layers/roi_sampler.py
0 → 100644
View file @
cc748b2a
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""ROI sampler."""
# Import libraries
import
tensorflow
as
tf
from
official.vision.beta.modeling.layers
import
box_matcher
from
official.vision.beta.modeling.layers
import
box_sampler
from
official.vision.beta.ops
import
box_ops
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
ROISampler
(
tf
.
keras
.
layers
.
Layer
):
"""Sample ROIs and assign targets to the sampled ROIs."""
def
__init__
(
self
,
mix_gt_boxes
=
True
,
num_sampled_rois
=
512
,
foreground_fraction
=
0.25
,
foreground_iou_threshold
=
0.5
,
background_iou_high_threshold
=
0.5
,
background_iou_low_threshold
=
0
,
**
kwargs
):
"""Initializes a ROI sampler.
Args:
mix_gt_boxes: bool, whether to mix the groundtruth boxes with proposed
ROIs.
num_sampled_rois: int, the number of sampled ROIs per image.
foreground_fraction: float in [0, 1], what percentage of proposed ROIs
should be sampled from the foreground boxes.
foreground_iou_threshold: float, represent the IoU threshold for a box to
be considered as positive (if >= `foreground_iou_threshold`).
background_iou_high_threshold: float, represent the IoU threshold for a
box to be considered as negative (if overlap in
[`background_iou_low_threshold`, `background_iou_high_threshold`]).
background_iou_low_threshold: float, represent the IoU threshold for a box
to be considered as negative (if overlap in
[`background_iou_low_threshold`, `background_iou_high_threshold`])
**kwargs: other key word arguments passed to Layer.
"""
self
.
_config_dict
=
{
'mix_gt_boxes'
:
mix_gt_boxes
,
'num_sampled_rois'
:
num_sampled_rois
,
'foreground_fraction'
:
foreground_fraction
,
'foreground_iou_threshold'
:
foreground_iou_threshold
,
'background_iou_high_threshold'
:
background_iou_high_threshold
,
'background_iou_low_threshold'
:
background_iou_low_threshold
,
}
self
.
_matcher
=
box_matcher
.
BoxMatcher
(
foreground_iou_threshold
,
background_iou_high_threshold
,
background_iou_low_threshold
)
self
.
_sampler
=
box_sampler
.
BoxSampler
(
num_sampled_rois
,
foreground_fraction
)
super
(
ROISampler
,
self
).
__init__
(
**
kwargs
)
def
call
(
self
,
boxes
,
gt_boxes
,
gt_classes
):
"""Assigns the proposals with groundtruth classes and performs subsmpling.
Given `proposed_boxes`, `gt_boxes`, and `gt_classes`, the function uses the
following algorithm to generate the final `num_samples_per_image` RoIs.
1. Calculates the IoU between each proposal box and each gt_boxes.
2. Assigns each proposed box with a groundtruth class and box by choosing
the largest IoU overlap.
3. Samples `num_samples_per_image` boxes from all proposed boxes, and
returns box_targets, class_targets, and RoIs.
Args:
boxes: a tensor of shape of [batch_size, N, 4]. N is the number of
proposals before groundtruth assignment. The last dimension is the
box coordinates w.r.t. the scaled images in [ymin, xmin, ymax, xmax]
format.
gt_boxes: a tensor of shape of [batch_size, MAX_NUM_INSTANCES, 4].
The coordinates of gt_boxes are in the pixel coordinates of the scaled
image. This tensor might have padding of values -1 indicating the
invalid box coordinates.
gt_classes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This
tensor might have paddings with values of -1 indicating the invalid
classes.
Returns:
sampled_rois: a tensor of shape of [batch_size, K, 4], representing the
coordinates of the sampled RoIs, where K is the number of the sampled
RoIs, i.e. K = num_samples_per_image.
sampled_gt_boxes: a tensor of shape of [batch_size, K, 4], storing the
box coordinates of the matched groundtruth boxes of the samples RoIs.
sampled_gt_classes: a tensor of shape of [batch_size, K], storing the
classes of the matched groundtruth boxes of the sampled RoIs.
sampled_gt_indices: a tensor of shape of [batch_size, K], storing the
indices of the sampled groudntruth boxes in the original `gt_boxes`
tensor, i.e.
gt_boxes[sampled_gt_indices[:, i]] = sampled_gt_boxes[:, i].
"""
if
self
.
_config_dict
[
'mix_gt_boxes'
]:
gt_boxes
=
tf
.
cast
(
gt_boxes
,
dtype
=
boxes
.
dtype
)
boxes
=
tf
.
concat
([
boxes
,
gt_boxes
],
axis
=
1
)
(
matched_gt_boxes
,
matched_gt_classes
,
matched_gt_indices
,
positive_matches
,
negative_matches
,
ignored_matches
)
=
(
self
.
_matcher
(
boxes
,
gt_boxes
,
gt_classes
))
sampled_indices
=
self
.
_sampler
(
positive_matches
,
negative_matches
,
ignored_matches
)
sampled_rois
,
sampled_gt_boxes
,
sampled_gt_classes
,
sampled_gt_indices
=
(
box_ops
.
gather_instances
(
sampled_indices
,
boxes
,
matched_gt_boxes
,
matched_gt_classes
,
matched_gt_indices
))
return
(
sampled_rois
,
sampled_gt_boxes
,
sampled_gt_classes
,
sampled_gt_indices
)
def
get_config
(
self
):
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
):
return
cls
(
**
config
)
official/vision/beta/modeling/layers/roi_sampler_test.py
0 → 100644
View file @
cc748b2a
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for roi_sampler.py."""
# Import libraries
import
numpy
as
np
import
tensorflow
as
tf
from
official.vision.beta.modeling.layers
import
roi_sampler
class
ROISamplerTest
(
tf
.
test
.
TestCase
):
def
test_roi_sampler
(
self
):
boxes_np
=
np
.
array
(
[[[
0
,
0
,
5
,
5
],
[
2.5
,
2.5
,
7.5
,
7.5
],
[
5
,
5
,
10
,
10
],
[
7.5
,
7.5
,
12.5
,
12.5
]]])
boxes
=
tf
.
constant
(
boxes_np
,
dtype
=
tf
.
float32
)
gt_boxes_np
=
np
.
array
(
[[[
10
,
10
,
15
,
15
],
[
2.5
,
2.5
,
7.5
,
7.5
],
[
-
1
,
-
1
,
-
1
,
-
1
]]])
gt_boxes
=
tf
.
constant
(
gt_boxes_np
,
dtype
=
tf
.
float32
)
gt_classes_np
=
np
.
array
([[
2
,
10
,
-
1
]])
gt_classes
=
tf
.
constant
(
gt_classes_np
,
dtype
=
tf
.
int32
)
generator
=
roi_sampler
.
ROISampler
(
mix_gt_boxes
=
True
,
num_sampled_rois
=
2
,
foreground_fraction
=
0.5
,
foreground_iou_threshold
=
0.5
,
background_iou_high_threshold
=
0.5
,
background_iou_low_threshold
=
0.0
)
# Runs on TPU.
strategy
=
tf
.
distribute
.
experimental
.
TPUStrategy
()
with
strategy
.
scope
():
_
=
generator
(
boxes
,
gt_boxes
,
gt_classes
)
# Runs on CPU.
_
=
generator
(
boxes
,
gt_boxes
,
gt_classes
)
def
test_serialize_deserialize
(
self
):
kwargs
=
dict
(
mix_gt_boxes
=
True
,
num_sampled_rois
=
512
,
foreground_fraction
=
0.25
,
foreground_iou_threshold
=
0.5
,
background_iou_high_threshold
=
0.5
,
background_iou_low_threshold
=
0.5
,
)
generator
=
roi_sampler
.
ROISampler
(
**
kwargs
)
expected_config
=
dict
(
kwargs
)
self
.
assertEqual
(
generator
.
get_config
(),
expected_config
)
new_generator
=
roi_sampler
.
ROISampler
.
from_config
(
generator
.
get_config
())
self
.
assertAllEqual
(
generator
.
get_config
(),
new_generator
.
get_config
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/beta/modeling/maskrcnn_model.py
0 → 100644
View file @
cc748b2a
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Mask R-CNN model."""
# Import libraries
import
tensorflow
as
tf
from
official.vision.beta.ops
import
box_ops
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
MaskRCNNModel
(
tf
.
keras
.
Model
):
"""The Mask R-CNN model."""
def
__init__
(
self
,
backbone
,
decoder
,
rpn_head
,
detection_head
,
roi_generator
,
roi_sampler
,
roi_aligner
,
detection_generator
,
mask_head
=
None
,
mask_sampler
=
None
,
mask_roi_aligner
=
None
,
**
kwargs
):
"""Initializes the Mask R-CNN model.
Args:
backbone: `tf.keras.Model`, the backbone network.
decoder: `tf.keras.Model`, the decoder network.
rpn_head: the RPN head.
detection_head: the detection head.
roi_generator: the ROI generator.
roi_sampler: the ROI sampler.
roi_aligner: the ROI aligner.
detection_generator: the detection generator.
mask_head: the mask head.
mask_sampler: the mask sampler.
mask_roi_aligner: the ROI alginer for mask prediction.
**kwargs: keyword arguments to be passed.
"""
super
(
MaskRCNNModel
,
self
).
__init__
(
**
kwargs
)
self
.
_config_dict
=
{
'backbone'
:
backbone
,
'decoder'
:
decoder
,
'rpn_head'
:
rpn_head
,
'detection_head'
:
detection_head
,
'roi_generator'
:
roi_generator
,
'roi_sampler'
:
roi_sampler
,
'roi_aligner'
:
roi_aligner
,
'detection_generator'
:
detection_generator
,
'mask_head'
:
mask_head
,
'mask_sampler'
:
mask_sampler
,
'mask_roi_aligner'
:
mask_roi_aligner
,
}
self
.
backbone
=
backbone
self
.
decoder
=
decoder
self
.
rpn_head
=
rpn_head
self
.
detection_head
=
detection_head
self
.
roi_generator
=
roi_generator
self
.
roi_sampler
=
roi_sampler
self
.
roi_aligner
=
roi_aligner
self
.
detection_generator
=
detection_generator
self
.
_include_mask
=
mask_head
is
not
None
self
.
mask_head
=
mask_head
if
self
.
_include_mask
and
mask_sampler
is
None
:
raise
ValueError
(
'`mask_sampler` is not provided in Mask R-CNN.'
)
self
.
mask_sampler
=
mask_sampler
if
self
.
_include_mask
and
mask_roi_aligner
is
None
:
raise
ValueError
(
'`mask_roi_aligner` is not provided in Mask R-CNN.'
)
self
.
mask_roi_aligner
=
mask_roi_aligner
def
call
(
self
,
images
,
image_shape
,
anchor_boxes
=
None
,
gt_boxes
=
None
,
gt_classes
=
None
,
gt_masks
=
None
,
training
=
None
):
model_outputs
=
{}
# Feature extraction.
features
=
self
.
backbone
(
images
)
if
self
.
decoder
:
features
=
self
.
decoder
(
features
)
# Region proposal network.
rpn_scores
,
rpn_boxes
=
self
.
rpn_head
(
features
)
model_outputs
.
update
({
'rpn_boxes'
:
rpn_boxes
,
'rpn_scores'
:
rpn_scores
})
# Generate RoIs.
rois
,
_
=
self
.
roi_generator
(
rpn_boxes
,
rpn_scores
,
anchor_boxes
,
image_shape
,
training
)
if
training
:
rois
=
tf
.
stop_gradient
(
rois
)
rois
,
matched_gt_boxes
,
matched_gt_classes
,
matched_gt_indices
=
(
self
.
roi_sampler
(
rois
,
gt_boxes
,
gt_classes
))
# Assign target for the 2nd stage classification.
box_targets
=
box_ops
.
encode_boxes
(
matched_gt_boxes
,
rois
,
weights
=
[
10.0
,
10.0
,
5.0
,
5.0
])
# If the target is background, the box target is set to all 0s.
box_targets
=
tf
.
where
(
tf
.
tile
(
tf
.
expand_dims
(
tf
.
equal
(
matched_gt_classes
,
0
),
axis
=-
1
),
[
1
,
1
,
4
]),
tf
.
zeros_like
(
box_targets
),
box_targets
)
model_outputs
.
update
({
'class_targets'
:
matched_gt_classes
,
'box_targets'
:
box_targets
,
})
# RoI align.
roi_features
=
self
.
roi_aligner
(
features
,
rois
)
# Detection head.
raw_scores
,
raw_boxes
=
self
.
detection_head
(
roi_features
)
if
training
:
model_outputs
.
update
({
'class_outputs'
:
raw_scores
,
'box_outputs'
:
raw_boxes
,
})
else
:
# Post-processing.
detections
=
self
.
detection_generator
(
raw_boxes
,
raw_scores
,
rois
,
image_shape
)
model_outputs
.
update
({
'detection_boxes'
:
detections
[
'detection_boxes'
],
'detection_scores'
:
detections
[
'detection_scores'
],
'detection_classes'
:
detections
[
'detection_classes'
],
'num_detections'
:
detections
[
'num_detections'
],
})
if
not
self
.
_include_mask
:
return
model_outputs
if
training
:
rois
,
roi_classes
,
roi_masks
=
self
.
mask_sampler
(
rois
,
matched_gt_boxes
,
matched_gt_classes
,
matched_gt_indices
,
gt_masks
)
roi_masks
=
tf
.
stop_gradient
(
roi_masks
)
model_outputs
.
update
({
'mask_class_targets'
:
roi_classes
,
'mask_targets'
:
roi_masks
,
})
else
:
rois
=
model_outputs
[
'detection_boxes'
]
roi_classes
=
model_outputs
[
'detection_classes'
]
# Mask RoI align.
mask_roi_features
=
self
.
mask_roi_aligner
(
features
,
rois
)
# Mask head.
raw_masks
=
self
.
mask_head
([
mask_roi_features
,
roi_classes
])
if
training
:
model_outputs
.
update
({
'mask_outputs'
:
raw_masks
,
})
else
:
model_outputs
.
update
({
'detection_masks'
:
tf
.
math
.
sigmoid
(
raw_masks
),
})
return
model_outputs
def
get_config
(
self
):
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
):
return
cls
(
**
config
)
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment