Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
d305396d
"docs/vscode:/vscode.git/clone" did not exist on "1994dbcb5e62bd8d0c60e5d5d6bf4b580653c74c"
Commit
d305396d
authored
May 20, 2020
by
A. Unique TensorFlower
Browse files
ShapeMask Heads and Losses.
PiperOrigin-RevId: 312624281
parent
eb49ae73
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
324 additions
and
366 deletions
+324
-366
official/vision/detection/evaluation/factory.py
official/vision/detection/evaluation/factory.py
+5
-0
official/vision/detection/modeling/architecture/factory.py
official/vision/detection/modeling/architecture/factory.py
+36
-1
official/vision/detection/modeling/architecture/heads.py
official/vision/detection/modeling/architecture/heads.py
+228
-359
official/vision/detection/modeling/architecture/nn_ops.py
official/vision/detection/modeling/architecture/nn_ops.py
+1
-2
official/vision/detection/modeling/losses.py
official/vision/detection/modeling/losses.py
+54
-4
No files found.
official/vision/detection/evaluation/factory.py
View file @
d305396d
...
@@ -29,6 +29,11 @@ def evaluator_generator(params):
...
@@ -29,6 +29,11 @@ def evaluator_generator(params):
elif
params
.
type
==
'box_and_mask'
:
elif
params
.
type
==
'box_and_mask'
:
evaluator
=
coco_evaluator
.
COCOEvaluator
(
evaluator
=
coco_evaluator
.
COCOEvaluator
(
annotation_file
=
params
.
val_json_file
,
include_mask
=
True
)
annotation_file
=
params
.
val_json_file
,
include_mask
=
True
)
elif
params
.
type
==
'shapemask_box_and_mask'
:
evaluator
=
coco_evaluator
.
ShapeMaskCOCOEvaluator
(
mask_eval_class
=
params
.
mask_eval_class
,
annotation_file
=
params
.
val_json_file
,
include_mask
=
True
)
else
:
else
:
raise
ValueError
(
'Evaluator %s is not supported.'
%
params
.
type
)
raise
ValueError
(
'Evaluator %s is not supported.'
%
params
.
type
)
...
...
official/vision/detection/modeling/architecture/factory.py
View file @
d305396d
...
@@ -85,8 +85,8 @@ def retinanet_head_generator(params):
...
@@ -85,8 +85,8 @@ def retinanet_head_generator(params):
def
rpn_head_generator
(
params
):
def
rpn_head_generator
(
params
):
head_params
=
params
.
rpn_head
"""Generator function for RPN head architecture."""
"""Generator function for RPN head architecture."""
head_params
=
params
.
rpn_head
return
heads
.
RpnHead
(
return
heads
.
RpnHead
(
params
.
architecture
.
min_level
,
params
.
architecture
.
min_level
,
params
.
architecture
.
max_level
,
params
.
architecture
.
max_level
,
...
@@ -126,3 +126,38 @@ def mask_rcnn_head_generator(params):
...
@@ -126,3 +126,38 @@ def mask_rcnn_head_generator(params):
params
.
norm_activation
.
activation
,
params
.
norm_activation
.
activation
,
head_params
.
use_batch_norm
,
head_params
.
use_batch_norm
,
norm_activation
=
norm_activation_generator
(
params
.
norm_activation
))
norm_activation
=
norm_activation_generator
(
params
.
norm_activation
))
def
shapeprior_head_generator
(
params
):
"""Generator function for shape prior head architecture."""
head_params
=
params
.
shapemask_head
return
heads
.
ShapemaskPriorHead
(
params
.
architecture
.
num_classes
,
head_params
.
num_downsample_channels
,
head_params
.
mask_crop_size
,
head_params
.
use_category_for_mask
,
head_params
.
shape_prior_path
)
def
coarsemask_head_generator
(
params
):
"""Generator function for ShapeMask coarse mask head architecture."""
head_params
=
params
.
shapemask_head
return
heads
.
ShapemaskCoarsemaskHead
(
params
.
architecture
.
num_classes
,
head_params
.
num_downsample_channels
,
head_params
.
mask_crop_size
,
head_params
.
use_category_for_mask
,
head_params
.
num_convs
,
norm_activation
=
norm_activation_generator
(
params
.
norm_activation
))
def
finemask_head_generator
(
params
):
"""Generator function for Shapemask fine mask head architecture."""
head_params
=
params
.
shapemask_head
return
heads
.
ShapemaskFinemaskHead
(
params
.
architecture
.
num_classes
,
head_params
.
num_downsample_channels
,
head_params
.
mask_crop_size
,
head_params
.
use_category_for_mask
,
head_params
.
num_convs
,
head_params
.
upsample_factor
)
official/vision/detection/modeling/architecture/heads.py
View file @
d305396d
...
@@ -19,9 +19,7 @@ from __future__ import division
...
@@ -19,9 +19,7 @@ from __future__ import division
from
__future__
import
print_function
from
__future__
import
print_function
import
functools
import
functools
import
pickle
from
absl
import
logging
import
numpy
as
np
import
numpy
as
np
import
tensorflow
as
tf
import
tensorflow
as
tf
from
tensorflow.python.keras
import
backend
from
tensorflow.python.keras
import
backend
...
@@ -56,6 +54,7 @@ class RpnHead(tf.keras.layers.Layer):
...
@@ -56,6 +54,7 @@ class RpnHead(tf.keras.layers.Layer):
intermediate conv layers.
intermediate conv layers.
use_separable_conv: `bool`, indicating whether the separable conv layers
use_separable_conv: `bool`, indicating whether the separable conv layers
is used.
is used.
activation: activation function. Support 'relu' and 'swish'.
use_batch_norm: 'bool', indicating whether batchnorm layers are added.
use_batch_norm: 'bool', indicating whether batchnorm layers are added.
norm_activation: an operation that includes a normalization layer
norm_activation: an operation that includes a normalization layer
followed by an optional activation layer.
followed by an optional activation layer.
...
@@ -165,6 +164,7 @@ class FastrcnnHead(tf.keras.layers.Layer):
...
@@ -165,6 +164,7 @@ class FastrcnnHead(tf.keras.layers.Layer):
predictions.
predictions.
fc_dims: `int` number that represents the number of dimension of the FC
fc_dims: `int` number that represents the number of dimension of the FC
layers.
layers.
activation: activation function. Support 'relu' and 'swish'.
use_batch_norm: 'bool', indicating whether batchnorm layers are added.
use_batch_norm: 'bool', indicating whether batchnorm layers are added.
norm_activation: an operation that includes a normalization layer
norm_activation: an operation that includes a normalization layer
followed by an optional activation layer.
followed by an optional activation layer.
...
@@ -296,6 +296,7 @@ class MaskrcnnHead(tf.keras.layers.Layer):
...
@@ -296,6 +296,7 @@ class MaskrcnnHead(tf.keras.layers.Layer):
intermediate conv layers.
intermediate conv layers.
use_separable_conv: `bool`, indicating whether the separable conv layers
use_separable_conv: `bool`, indicating whether the separable conv layers
is used.
is used.
activation: activation function. Support 'relu' and 'swish'.
use_batch_norm: 'bool', indicating whether batchnorm layers are added.
use_batch_norm: 'bool', indicating whether batchnorm layers are added.
norm_activation: an operation that includes a normalization layer
norm_activation: an operation that includes a normalization layer
followed by an optional activation layer.
followed by an optional activation layer.
...
@@ -566,8 +567,8 @@ class RetinanetHead(object):
...
@@ -566,8 +567,8 @@ class RetinanetHead(object):
with
self
.
_class_name_scope
:
with
self
.
_class_name_scope
:
for
i
in
range
(
self
.
_num_convs
):
for
i
in
range
(
self
.
_num_convs
):
features
=
self
.
_class_conv
[
i
](
features
)
features
=
self
.
_class_conv
[
i
](
features
)
# The convolution layers in the class net are shared among all levels,
but
# The convolution layers in the class net are shared among all levels,
# each level has its batch normlization to capture the statistical
#
but
each level has its batch normlization to capture the statistical
# difference among different levels.
# difference among different levels.
name
=
self
.
_class_net_batch_norm_name
(
i
,
level
)
name
=
self
.
_class_net_batch_norm_name
(
i
,
level
)
features
=
self
.
_class_norm_activation
[
name
](
features
=
self
.
_class_norm_activation
[
name
](
...
@@ -601,12 +602,7 @@ class ShapemaskPriorHead(object):
...
@@ -601,12 +602,7 @@ class ShapemaskPriorHead(object):
num_downsample_channels
,
num_downsample_channels
,
mask_crop_size
,
mask_crop_size
,
use_category_for_mask
,
use_category_for_mask
,
num_of_instances
,
shape_prior_path
):
min_mask_level
,
max_mask_level
,
num_clusters
,
temperature
,
shape_prior_path
=
None
):
"""Initialize params to build RetinaNet head.
"""Initialize params to build RetinaNet head.
Args:
Args:
...
@@ -614,30 +610,18 @@ class ShapemaskPriorHead(object):
...
@@ -614,30 +610,18 @@ class ShapemaskPriorHead(object):
num_downsample_channels: number of channels in mask branch.
num_downsample_channels: number of channels in mask branch.
mask_crop_size: feature crop size.
mask_crop_size: feature crop size.
use_category_for_mask: use class information in mask branch.
use_category_for_mask: use class information in mask branch.
num_of_instances: number of instances to sample in training time.
min_mask_level: minimum FPN level to crop mask feature from.
max_mask_level: maximum FPN level to crop mask feature from.
num_clusters: number of clusters to use in K-Means.
temperature: the temperature for shape prior learning.
shape_prior_path: the path to load shape priors.
shape_prior_path: the path to load shape priors.
"""
"""
self
.
_mask_num_classes
=
num_classes
self
.
_mask_num_classes
=
num_classes
if
use_category_for_mask
else
1
self
.
_num_downsample_channels
=
num_downsample_channels
self
.
_num_downsample_channels
=
num_downsample_channels
self
.
_mask_crop_size
=
mask_crop_size
self
.
_mask_crop_size
=
mask_crop_size
self
.
_use_category_for_mask
=
use_category_for_mask
self
.
_num_of_instances
=
num_of_instances
self
.
_min_mask_level
=
min_mask_level
self
.
_max_mask_level
=
max_mask_level
self
.
_num_clusters
=
num_clusters
self
.
_temperature
=
temperature
self
.
_shape_prior_path
=
shape_prior_path
self
.
_shape_prior_path
=
shape_prior_path
self
.
_use_category_for_mask
=
use_category_for_mask
self
.
_shape_prior_fc
=
tf
.
keras
.
layers
.
Dense
(
self
.
_num_downsample_channels
,
name
=
'shape-prior-fc'
)
def
__call__
(
self
,
def
__call__
(
self
,
fpn_features
,
boxes
,
outer_boxes
,
classes
,
is_training
):
fpn_features
,
boxes
,
outer_boxes
,
classes
,
is_training
=
None
):
"""Generate the detection priors from the box detections and FPN features.
"""Generate the detection priors from the box detections and FPN features.
This corresponds to the Fig. 4 of the ShapeMask paper at
This corresponds to the Fig. 4 of the ShapeMask paper at
...
@@ -654,221 +638,96 @@ class ShapemaskPriorHead(object):
...
@@ -654,221 +638,96 @@ class ShapemaskPriorHead(object):
is_training: training mode or not.
is_training: training mode or not.
Returns:
Returns:
crop
_features: a float Tensor of shape [batch_size * num_instances,
instance
_features: a float Tensor of shape [batch_size * num_instances,
mask_crop_size, mask_crop_size, num_downsample_channels]. This is the
mask_crop_size, mask_crop_size, num_downsample_channels]. This is the
instance feature crop.
instance feature crop.
detection_priors: A float Tensor of shape [batch_size * num_instances,
detection_priors: A float Tensor of shape [batch_size * num_instances,
mask_size, mask_size, 1].
mask_size, mask_size, 1].
"""
"""
with
backend
.
get_graph
().
as_default
():
with
backend
.
get_graph
().
as_default
(),
tf
.
name_scope
(
'prior_mask'
):
# loads class specific or agnostic shape priors
batch_size
,
num_instances
,
_
=
boxes
.
get_shape
().
as_list
()
if
self
.
_shape_prior_path
:
outer_boxes
=
tf
.
cast
(
outer_boxes
,
tf
.
float32
)
if
self
.
_use_category_for_mask
:
boxes
=
tf
.
cast
(
boxes
,
tf
.
float32
)
fid
=
tf
.
io
.
gfile
.
GFile
(
self
.
_shape_prior_path
,
'rb'
)
instance_features
=
spatial_transform_ops
.
multilevel_crop_and_resize
(
# The encoding='bytes' options is for incompatibility between python2
fpn_features
,
outer_boxes
,
output_size
=
self
.
_mask_crop_size
)
# and python3 pickle.
instance_features
=
self
.
_shape_prior_fc
(
instance_features
)
class_tups
=
pickle
.
load
(
fid
,
encoding
=
'bytes'
)
max_class_id
=
class_tups
[
-
1
][
0
]
+
1
shape_priors
=
self
.
_get_priors
()
class_masks
=
np
.
zeros
((
max_class_id
,
self
.
_num_clusters
,
self
.
_mask_crop_size
,
self
.
_mask_crop_size
),
# Get uniform priors for each outer box.
dtype
=
np
.
float32
)
uniform_priors
=
tf
.
ones
([
batch_size
,
num_instances
,
self
.
_mask_crop_size
,
for
cls_id
,
_
,
cls_mask
in
class_tups
:
self
.
_mask_crop_size
])
assert
cls_mask
.
shape
==
(
self
.
_num_clusters
,
self
.
_mask_crop_size
**
2
)
class_masks
[
cls_id
]
=
cls_mask
.
reshape
(
self
.
_num_clusters
,
self
.
_mask_crop_size
,
self
.
_mask_crop_size
)
self
.
class_priors
=
tf
.
convert_to_tensor
(
value
=
class_masks
,
dtype
=
tf
.
float32
)
else
:
npy_path
=
tf
.
io
.
gfile
.
GFile
(
self
.
_shape_prior_path
)
class_np_masks
=
np
.
load
(
npy_path
)
assert
class_np_masks
.
shape
==
(
self
.
_num_clusters
,
self
.
_mask_crop_size
,
self
.
_mask_crop_size
),
'Invalid priors!!!'
self
.
class_priors
=
tf
.
convert_to_tensor
(
value
=
class_np_masks
,
dtype
=
tf
.
float32
)
else
:
self
.
class_priors
=
tf
.
zeros
(
[
self
.
_num_clusters
,
self
.
_mask_crop_size
,
self
.
_mask_crop_size
],
tf
.
float32
)
batch_size
=
boxes
.
get_shape
()[
0
]
min_level_shape
=
fpn_features
[
self
.
_min_mask_level
].
get_shape
().
as_list
()
self
.
_max_feature_size
=
min_level_shape
[
1
]
detection_prior_levels
=
self
.
_compute_box_levels
(
boxes
)
level_outer_boxes
=
outer_boxes
/
tf
.
pow
(
2.
,
tf
.
expand_dims
(
detection_prior_levels
,
-
1
))
detection_prior_levels
=
tf
.
cast
(
detection_prior_levels
,
tf
.
int32
)
uniform_priors
=
spatial_transform_ops
.
crop_mask_in_target_box
(
uniform_priors
=
spatial_transform_ops
.
crop_mask_in_target_box
(
tf
.
ones
([
uniform_priors
,
boxes
,
outer_boxes
,
self
.
_mask_crop_size
)
batch_size
,
self
.
_num_of_instances
,
self
.
_mask_crop_size
,
self
.
_mask_crop_size
# Classify shape priors using uniform priors + instance features.
],
tf
.
float32
),
boxes
,
outer_boxes
,
self
.
_mask_crop_size
)
prior_distribution
=
self
.
_classify_shape_priors
(
tf
.
cast
(
instance_features
,
tf
.
float32
),
uniform_priors
,
classes
)
# Prepare crop features.
multi_level_features
=
self
.
_get_multilevel_features
(
fpn_features
)
instance_priors
=
tf
.
gather
(
shape_priors
,
classes
)
crop_features
=
spatial_transform_ops
.
single_level_feature_crop
(
instance_priors
*=
tf
.
expand_dims
(
tf
.
expand_dims
(
multi_level_features
,
level_outer_boxes
,
detection_prior_levels
,
tf
.
cast
(
prior_distribution
,
tf
.
float32
),
axis
=-
1
),
axis
=-
1
)
self
.
_min_mask_level
,
self
.
_mask_crop_size
)
instance_priors
=
tf
.
reduce_sum
(
instance_priors
,
axis
=
2
)
detection_priors
=
spatial_transform_ops
.
crop_mask_in_target_box
(
# Predict and fuse shape priors.
instance_priors
,
boxes
,
outer_boxes
,
self
.
_mask_crop_size
)
shape_weights
=
self
.
_classify_and_fuse_detection_priors
(
uniform_priors
,
classes
,
crop_features
)
return
instance_features
,
detection_priors
fused_shape_priors
=
self
.
_fuse_priors
(
shape_weights
,
classes
)
fused_shape_priors
=
tf
.
reshape
(
fused_shape_priors
,
[
def
_get_priors
(
self
):
batch_size
,
self
.
_num_of_instances
,
self
.
_mask_crop_size
,
"""Load shape priors from file."""
self
.
_mask_crop_size
# loads class specific or agnostic shape priors
])
if
self
.
_shape_prior_path
:
predicted_detection_priors
=
spatial_transform_ops
.
crop_mask_in_target_box
(
# Priors are loaded into shape [mask_num_classes, num_clusters, 32, 32].
fused_shape_priors
,
boxes
,
outer_boxes
,
self
.
_mask_crop_size
)
priors
=
np
.
load
(
tf
.
io
.
gfile
.
GFile
(
self
.
_shape_prior_path
,
'rb'
))
predicted_detection_priors
=
tf
.
reshape
(
priors
=
tf
.
convert_to_tensor
(
priors
,
dtype
=
tf
.
float32
)
predicted_detection_priors
,
self
.
_num_clusters
=
priors
.
get_shape
().
as_list
()[
1
]
[
-
1
,
self
.
_mask_crop_size
,
self
.
_mask_crop_size
,
1
])
else
:
# If prior path does not exist, do not use priors, i.e., pirors equal to
return
crop_features
,
predicted_detection_priors
# uniform empty 32x32 patch.
self
.
_num_clusters
=
1
def
_get_multilevel_features
(
self
,
fpn_features
):
priors
=
tf
.
zeros
([
self
.
_mask_num_classes
,
self
.
_num_clusters
,
"""Get multilevel features from FPN feature dictionary into one tensor.
self
.
_mask_crop_size
,
self
.
_mask_crop_size
])
return
priors
Args:
fpn_features: a dictionary of FPN features.
def
_classify_shape_priors
(
self
,
features
,
uniform_priors
,
classes
):
Returns:
features: a float tensor of shape [batch_size, num_levels,
max_feature_size, max_feature_size, num_downsample_channels].
"""
# TODO(yeqing): Recover reuse=tf.AUTO_REUSE logic.
with
tf
.
name_scope
(
'masknet'
):
mask_feats
=
{}
# Reduce the feature dimension at each FPN level by convolution.
for
feat_level
in
range
(
self
.
_min_mask_level
,
self
.
_max_mask_level
+
1
):
mask_feats
[
feat_level
]
=
tf
.
keras
.
layers
.
Conv2D
(
self
.
_num_downsample_channels
,
kernel_size
=
(
1
,
1
),
bias_initializer
=
tf
.
zeros_initializer
(),
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
),
padding
=
'same'
,
name
=
'mask-downsample'
)(
fpn_features
[
feat_level
])
# Concat features through padding to the max size.
features
=
[
mask_feats
[
self
.
_min_mask_level
]]
for
feat_level
in
range
(
self
.
_min_mask_level
+
1
,
self
.
_max_mask_level
+
1
):
features
.
append
(
tf
.
image
.
pad_to_bounding_box
(
mask_feats
[
feat_level
],
0
,
0
,
self
.
_max_feature_size
,
self
.
_max_feature_size
))
features
=
tf
.
stack
(
features
,
axis
=
1
)
return
features
def
_compute_box_levels
(
self
,
boxes
):
"""Compute the box FPN levels.
Args:
boxes: a float tensor of shape [batch_size, num_instances, 4].
Returns:
levels: a int tensor of shape [batch_size, num_instances].
"""
object_sizes
=
tf
.
stack
([
boxes
[:,
:,
2
]
-
boxes
[:,
:,
0
],
boxes
[:,
:,
3
]
-
boxes
[:,
:,
1
],
],
axis
=
2
)
object_sizes
=
tf
.
reduce_max
(
input_tensor
=
object_sizes
,
axis
=
2
)
ratios
=
object_sizes
/
self
.
_mask_crop_size
levels
=
tf
.
math
.
ceil
(
tf
.
math
.
log
(
ratios
)
/
tf
.
math
.
log
(
2.
))
levels
=
tf
.
maximum
(
tf
.
minimum
(
levels
,
self
.
_max_mask_level
),
self
.
_min_mask_level
)
return
levels
def
_classify_and_fuse_detection_priors
(
self
,
uniform_priors
,
detection_prior_classes
,
crop_features
):
"""Classify the uniform prior by predicting the shape modes.
"""Classify the uniform prior by predicting the shape modes.
Classify the object crop features into K modes of the clusters for each
Classify the object crop features into K modes of the clusters for each
category.
category.
Args:
Args:
features: A float Tensor of shape [batch_size, num_instances,
mask_size, mask_size, num_channels].
uniform_priors: A float Tensor of shape [batch_size, num_instances,
uniform_priors: A float Tensor of shape [batch_size, num_instances,
mask_size, mask_size] representing the uniform detection priors.
mask_size, mask_size] representing the uniform detection priors.
detection_prior_
classes: A int Tensor of shape [batch_size, num_instances]
classes: A int Tensor of shape [batch_size, num_instances]
of detection class ids.
of detection class ids.
crop_features: A float Tensor of shape [batch_size * num_instances,
mask_size, mask_size, num_channels].
Returns:
Returns:
shape_weights
: A float Tensor of shape
prior_distribution
: A float Tensor of shape
[batch_size
*
num_instances, num_clusters] representing the classifier
[batch_size
,
num_instances, num_clusters] representing the classifier
output probability over all possible shapes.
output probability over all possible shapes.
"""
"""
location_detection_priors
=
tf
.
reshape
(
uniform_priors
,
[
-
1
,
self
.
_mask_crop_size
,
self
.
_mask_crop_size
,
1
])
# Generate image embedding to shape.
fused_shape_features
=
crop_features
*
location_detection_priors
shape_embedding
=
tf
.
reduce_mean
(
input_tensor
=
fused_shape_features
,
axis
=
(
1
,
2
))
if
not
self
.
_use_category_for_mask
:
# TODO(weicheng) use custom op for performance
shape_logits
=
tf
.
keras
.
layers
.
Dense
(
self
.
_num_clusters
,
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
))(
shape_embedding
)
shape_logits
=
tf
.
reshape
(
shape_logits
,
[
-
1
,
self
.
_num_clusters
])
/
self
.
_temperature
shape_weights
=
tf
.
nn
.
softmax
(
shape_logits
,
name
=
'shape_prior_weights'
)
else
:
shape_logits
=
tf
.
keras
.
layers
.
Dense
(
self
.
_mask_num_classes
*
self
.
_num_clusters
,
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
))(
shape_embedding
)
shape_logits
=
tf
.
reshape
(
shape_logits
,
[
-
1
,
self
.
_mask_num_classes
,
self
.
_num_clusters
])
training_classes
=
tf
.
reshape
(
detection_prior_classes
,
[
-
1
])
class_idx
=
tf
.
stack
(
[
tf
.
range
(
tf
.
size
(
input
=
training_classes
)),
training_classes
-
1
],
axis
=
1
)
shape_logits
=
tf
.
gather_nd
(
shape_logits
,
class_idx
)
/
self
.
_temperature
shape_weights
=
tf
.
nn
.
softmax
(
shape_logits
,
name
=
'shape_prior_weights'
)
return
shape_weights
def
_fuse_priors
(
self
,
shape_weights
,
detection_prior_classes
):
"""Fuse shape priors by the predicted shape probability.
Args:
shape_weights: A float Tensor of shape [batch_size * num_instances,
num_clusters] of predicted shape probability distribution.
detection_prior_classes: A int Tensor of shape [batch_size, num_instances]
of detection class ids.
Returns:
batch_size
,
num_instances
,
_
,
_
,
_
=
features
.
get_shape
().
as_list
()
detection_priors: A float Tensor of shape [batch_size * num_instances,
features
*=
tf
.
expand_dims
(
uniform_priors
,
axis
=-
1
)
mask_size, mask_size, 1].
# Reduce spatial dimension of features. The features have shape
"""
# [batch_size, num_instances, num_channels].
features
=
tf
.
reduce_mean
(
features
,
axis
=
(
2
,
3
))
logits
=
tf
.
keras
.
layers
.
Dense
(
self
.
_mask_num_classes
*
self
.
_num_clusters
,
kernel_initializer
=
tf
.
random_normal_initializer
(
stddev
=
0.01
))(
features
)
logits
=
tf
.
reshape
(
logits
,
[
batch_size
,
num_instances
,
self
.
_mask_num_classes
,
self
.
_num_clusters
])
if
self
.
_use_category_for_mask
:
if
self
.
_use_category_for_mask
:
object_class_priors
=
tf
.
gather
(
logits
=
tf
.
gather
(
logits
,
tf
.
expand_dims
(
classes
,
axis
=-
1
),
batch_dims
=
2
)
self
.
class_priors
,
detection_prior_classes
)
logits
=
tf
.
squeeze
(
logits
,
axis
=
2
)
else
:
else
:
num_batch_instances
=
shape_weights
.
get_shape
()[
0
]
logits
=
logits
[:,
:,
0
,
:]
object_class_priors
=
tf
.
tile
(
tf
.
expand_dims
(
self
.
class_priors
,
0
),
distribution
=
tf
.
nn
.
softmax
(
logits
,
name
=
'shape_prior_weights'
)
[
num_batch_instances
,
1
,
1
,
1
])
return
distribution
vector_class_priors
=
tf
.
reshape
(
object_class_priors
,
[
-
1
,
self
.
_num_clusters
,
self
.
_mask_crop_size
*
self
.
_mask_crop_size
])
detection_priors
=
tf
.
matmul
(
tf
.
expand_dims
(
shape_weights
,
1
),
vector_class_priors
)[:,
0
,
:]
detection_priors
=
tf
.
reshape
(
detection_priors
,
[
-
1
,
self
.
_mask_crop_size
,
self
.
_mask_crop_size
,
1
])
return
detection_priors
class
ShapemaskCoarsemaskHead
(
object
):
class
ShapemaskCoarsemaskHead
(
object
):
...
@@ -879,7 +738,8 @@ class ShapemaskCoarsemaskHead(object):
...
@@ -879,7 +738,8 @@ class ShapemaskCoarsemaskHead(object):
num_downsample_channels
,
num_downsample_channels
,
mask_crop_size
,
mask_crop_size
,
use_category_for_mask
,
use_category_for_mask
,
num_convs
):
num_convs
,
norm_activation
=
nn_ops
.
norm_activation_builder
()):
"""Initialize params to build ShapeMask coarse and fine prediction head.
"""Initialize params to build ShapeMask coarse and fine prediction head.
Args:
Args:
...
@@ -889,118 +749,106 @@ class ShapemaskCoarsemaskHead(object):
...
@@ -889,118 +749,106 @@ class ShapemaskCoarsemaskHead(object):
use_category_for_mask: use class information in mask branch.
use_category_for_mask: use class information in mask branch.
num_convs: `int` number of stacked convolution before the last prediction
num_convs: `int` number of stacked convolution before the last prediction
layer.
layer.
norm_activation: an operation that includes a normalization layer
followed by an optional activation layer.
"""
"""
self
.
_mask_num_classes
=
num_classes
self
.
_mask_num_classes
=
num_classes
if
use_category_for_mask
else
1
self
.
_use_category_for_mask
=
use_category_for_mask
self
.
_num_downsample_channels
=
num_downsample_channels
self
.
_num_downsample_channels
=
num_downsample_channels
self
.
_mask_crop_size
=
mask_crop_size
self
.
_mask_crop_size
=
mask_crop_size
self
.
_use_category_for_mask
=
use_category_for_mask
self
.
_num_convs
=
num_convs
self
.
_num_convs
=
num_convs
if
not
use_category_for_mask
:
self
.
_norm_activation
=
norm_activation
assert
num_classes
==
1
self
.
_coarse_mask_fc
=
tf
.
keras
.
layers
.
Dense
(
def
__call__
(
self
,
self
.
_num_downsample_channels
,
name
=
'coarse-mask-fc'
)
crop_features
,
detection_priors
,
self
.
_class_conv
=
[]
inst_classes
,
self
.
_class_norm_activation
=
[]
is_training
=
None
):
for
i
in
range
(
self
.
_num_convs
):
self
.
_class_conv
.
append
(
tf
.
keras
.
layers
.
Conv2D
(
self
.
_num_downsample_channels
,
kernel_size
=
(
3
,
3
),
bias_initializer
=
tf
.
zeros_initializer
(),
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
),
padding
=
'same'
,
name
=
'coarse-mask-class-%d'
%
i
))
self
.
_class_norm_activation
.
append
(
norm_activation
(
name
=
'coarse-mask-class-%d-bn'
%
i
))
self
.
_class_predict
=
tf
.
keras
.
layers
.
Conv2D
(
self
.
_mask_num_classes
,
kernel_size
=
(
1
,
1
),
# Focal loss bias initialization to have foreground 0.01 probability.
bias_initializer
=
tf
.
constant_initializer
(
-
np
.
log
((
1
-
0.01
)
/
0.01
)),
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
),
padding
=
'same'
,
name
=
'coarse-mask-class-predict'
)
def
__call__
(
self
,
features
,
detection_priors
,
classes
,
is_training
):
"""Generate instance masks from FPN features and detection priors.
"""Generate instance masks from FPN features and detection priors.
This corresponds to the Fig. 5-6 of the ShapeMask paper at
This corresponds to the Fig. 5-6 of the ShapeMask paper at
https://arxiv.org/pdf/1904.03239.pdf
https://arxiv.org/pdf/1904.03239.pdf
Args:
Args:
crop_
features: a float Tensor of shape [batch_size
*
num_instances,
features: a float Tensor of shape [batch_size
,
num_instances,
mask_crop_size, mask_crop_size, num_downsample_channels]. This is the
mask_crop_size, mask_crop_size, num_downsample_channels]. This is the
instance feature crop.
instance feature crop.
detection_priors: a float Tensor of shape [batch_size
*
num_instances,
detection_priors: a float Tensor of shape [batch_size
,
num_instances,
mask_crop_size, mask_crop_size, 1]. This is the detection prior for
mask_crop_size, mask_crop_size, 1]. This is the detection prior for
the instance.
the instance.
inst_
classes: a int Tensor of shape [batch_size, num_instances]
classes: a int Tensor of shape [batch_size, num_instances]
of instance classes.
of instance classes.
is_training: a bool indicating whether in training mode.
is_training: a bool indicating whether in training mode.
Returns:
Returns:
mask_outputs: instance mask prediction as a float Tensor of shape
mask_outputs: instance mask prediction as a float Tensor of shape
[batch_size
*
num_instances, mask_size, mask_size
, num_classes
].
[batch_size
,
num_instances, mask_size, mask_size].
"""
"""
# Embed the anchor map into some feature space for anchor conditioning.
with
backend
.
get_graph
().
as_default
(),
tf
.
name_scope
(
'coarse_mask'
):
detection_prior_features
=
tf
.
keras
.
layers
.
Conv2D
(
# Transform detection priors to have the same dimension as features.
self
.
_num_downsample_channels
,
detection_priors
=
tf
.
expand_dims
(
detection_priors
,
axis
=-
1
)
kernel_size
=
(
1
,
1
),
detection_priors
=
self
.
_coarse_mask_fc
(
detection_priors
)
bias_initializer
=
tf
.
zeros_initializer
(),
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
features
+=
detection_priors
mean
=
0.
,
stddev
=
0.01
),
mask_logits
=
self
.
decoder_net
(
features
,
is_training
)
padding
=
'same'
,
# Gather the logits with right input class.
name
=
'anchor-conv'
)(
if
self
.
_use_category_for_mask
:
detection_priors
)
mask_logits
=
tf
.
transpose
(
mask_logits
,
[
0
,
1
,
4
,
2
,
3
])
mask_logits
=
tf
.
gather
(
mask_logits
,
tf
.
expand_dims
(
classes
,
-
1
),
prior_conditioned_features
=
crop_features
+
detection_prior_features
batch_dims
=
2
)
coarse_output_features
=
self
.
coarsemask_decoder_net
(
mask_logits
=
tf
.
squeeze
(
mask_logits
,
axis
=
2
)
prior_conditioned_features
,
is_training
)
else
:
mask_logits
=
mask_logits
[...,
0
]
coarse_mask_classes
=
tf
.
keras
.
layers
.
Conv2D
(
self
.
_mask_num_classes
,
kernel_size
=
(
1
,
1
),
# Focal loss bias initialization to have foreground 0.01 probability.
bias_initializer
=
tf
.
constant_initializer
(
-
np
.
log
((
1
-
0.01
)
/
0.01
)),
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
mean
=
0
,
stddev
=
0.01
),
padding
=
'same'
,
name
=
'class-predict'
)(
coarse_output_features
)
if
self
.
_use_category_for_mask
:
inst_classes
=
tf
.
cast
(
tf
.
reshape
(
inst_classes
,
[
-
1
]),
tf
.
int32
)
coarse_mask_classes_t
=
tf
.
transpose
(
a
=
coarse_mask_classes
,
perm
=
(
0
,
3
,
1
,
2
))
# pylint: disable=g-long-lambda
coarse_mask_logits
=
tf
.
cond
(
pred
=
tf
.
size
(
input
=
inst_classes
)
>
0
,
true_fn
=
lambda
:
tf
.
gather_nd
(
coarse_mask_classes_t
,
tf
.
stack
(
[
tf
.
range
(
tf
.
size
(
input
=
inst_classes
)),
inst_classes
-
1
],
axis
=
1
)),
false_fn
=
lambda
:
coarse_mask_classes_t
[:,
0
,
:,
:])
# pylint: enable=g-long-lambda
coarse_mask_logits
=
tf
.
expand_dims
(
coarse_mask_logits
,
-
1
)
else
:
coarse_mask_logits
=
coarse_mask_classes
coarse_class_probs
=
tf
.
nn
.
sigmoid
(
coarse_mask_logits
)
class_probs
=
tf
.
cast
(
coarse_class_probs
,
prior_conditioned_features
.
dtype
)
return
coarse_mask_classes
,
class_probs
,
prior_conditioned_feature
s
return
mask_logit
s
def
coarsemask_decoder_net
(
self
,
def
decoder_net
(
self
,
features
,
is_training
=
False
):
images
,
is_training
=
None
,
norm_activation
=
nn_ops
.
norm_activation_builder
()):
"""Coarse mask decoder network architecture.
"""Coarse mask decoder network architecture.
Args:
Args:
imag
es: A tensor of size [batch, height_in, width_in, channels_in].
featur
es: A tensor of size [batch, height_in, width_in, channels_in].
is_training: Whether batch_norm layers are in training mode.
is_training: Whether batch_norm layers are in training mode.
norm_activation: an operation that includes a batch normalization layer
followed by a relu layer(optional).
Returns:
Returns:
images: A feature tensor of size [batch, output_size, output_size,
images: A feature tensor of size [batch, output_size, output_size,
num_channels]
num_channels]
"""
"""
(
batch_size
,
num_instances
,
height
,
width
,
num_channels
)
=
features
.
get_shape
().
as_list
()
features
=
tf
.
reshape
(
features
,
[
batch_size
*
num_instances
,
height
,
width
,
num_channels
])
for
i
in
range
(
self
.
_num_convs
):
for
i
in
range
(
self
.
_num_convs
):
images
=
tf
.
keras
.
layers
.
Conv2D
(
features
=
self
.
_class_conv
[
i
](
features
)
self
.
_num_downsample_channels
,
features
=
self
.
_class_norm_activation
[
i
](
features
,
kernel_size
=
(
3
,
3
),
is_training
=
is_training
)
bias_initializer
=
tf
.
zeros_initializer
(),
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
),
activation
=
None
,
padding
=
'same'
,
name
=
'coarse-class-%d'
%
i
)(
images
)
images
=
norm_activation
(
name
=
'coarse-class-%d-bn'
%
i
)(
images
,
is_training
=
is_training
)
return
images
mask_logits
=
self
.
_class_predict
(
features
)
mask_logits
=
tf
.
reshape
(
mask_logits
,
[
batch_size
,
num_instances
,
height
,
width
,
self
.
_mask_num_classes
])
return
mask_logits
class
ShapemaskFinemaskHead
(
object
):
class
ShapemaskFinemaskHead
(
object
):
...
@@ -1010,9 +858,9 @@ class ShapemaskFinemaskHead(object):
...
@@ -1010,9 +858,9 @@ class ShapemaskFinemaskHead(object):
num_classes
,
num_classes
,
num_downsample_channels
,
num_downsample_channels
,
mask_crop_size
,
mask_crop_size
,
use_category_for_mask
,
num_convs
,
num_convs
,
coarse_mask_thr
,
upsample_factor
,
gt_upsample_scale
,
norm_activation
=
nn_ops
.
norm_activation_builder
()):
norm_activation
=
nn_ops
.
norm_activation_builder
()):
"""Initialize params to build ShapeMask coarse and fine prediction head.
"""Initialize params to build ShapeMask coarse and fine prediction head.
...
@@ -1020,33 +868,29 @@ class ShapemaskFinemaskHead(object):
...
@@ -1020,33 +868,29 @@ class ShapemaskFinemaskHead(object):
num_classes: `int` number of mask classification categories.
num_classes: `int` number of mask classification categories.
num_downsample_channels: `int` number of filters at mask head.
num_downsample_channels: `int` number of filters at mask head.
mask_crop_size: feature crop size.
mask_crop_size: feature crop size.
use_category_for_mask: use class information in mask branch.
num_convs: `int` number of stacked convolution before the last prediction
num_convs: `int` number of stacked convolution before the last prediction
layer.
layer.
coarse_mask_thr: the threshold for suppressing noisy coarse prediction.
upsample_factor: `int` number of fine mask upsampling factor.
gt_upsample_scale: scale for upsampling groundtruths.
norm_activation: an operation that includes a batch normalization layer
norm_activation: an operation that includes a batch normalization layer
followed by a relu layer(optional).
followed by a relu layer(optional).
"""
"""
self
.
_mask_num_classes
=
num_classes
self
.
_use_category_for_mask
=
use_category_for_mask
self
.
_mask_num_classes
=
num_classes
if
use_category_for_mask
else
1
self
.
_num_downsample_channels
=
num_downsample_channels
self
.
_num_downsample_channels
=
num_downsample_channels
self
.
_mask_crop_size
=
mask_crop_size
self
.
_mask_crop_size
=
mask_crop_size
self
.
_num_convs
=
num_convs
self
.
_num_convs
=
num_convs
self
.
_coarse_mask_thr
=
coarse_mask_thr
self
.
up_sample_factor
=
upsample_factor
self
.
_gt_upsample_scale
=
gt_upsample_scale
self
.
_fine_mask_fc
=
tf
.
keras
.
layers
.
Dense
(
self
.
_num_downsample_channels
,
name
=
'fine-mask-fc'
)
self
.
_class_predict_conv
=
tf
.
keras
.
layers
.
Conv2D
(
self
.
_mask_num_classes
,
kernel_size
=
(
1
,
1
),
# Focal loss bias initialization to have foreground 0.01 probability.
bias_initializer
=
tf
.
constant_initializer
(
-
np
.
log
((
1
-
0.01
)
/
0.01
)),
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
mean
=
0
,
stddev
=
0.01
),
padding
=
'same'
,
name
=
'affinity-class-predict'
)
self
.
_upsample_conv
=
tf
.
keras
.
layers
.
Conv2DTranspose
(
self
.
_upsample_conv
=
tf
.
keras
.
layers
.
Conv2DTranspose
(
self
.
_num_downsample_channels
//
2
,
self
.
_num_downsample_channels
,
(
self
.
_gt_upsample_scale
,
self
.
_gt_upsample_scale
),
(
self
.
up_sample_factor
,
self
.
up_sample_factor
),
(
self
.
_gt_upsample_scale
,
self
.
_gt_upsample_scale
))
(
self
.
up_sample_factor
,
self
.
up_sample_factor
),
name
=
'fine-mask-conv2d-tran'
)
self
.
_fine_class_conv
=
[]
self
.
_fine_class_conv
=
[]
self
.
_fine_class_bn
=
[]
self
.
_fine_class_bn
=
[]
for
i
in
range
(
self
.
_num_convs
):
for
i
in
range
(
self
.
_num_convs
):
...
@@ -1059,60 +903,73 @@ class ShapemaskFinemaskHead(object):
...
@@ -1059,60 +903,73 @@ class ShapemaskFinemaskHead(object):
stddev
=
0.01
),
stddev
=
0.01
),
activation
=
None
,
activation
=
None
,
padding
=
'same'
,
padding
=
'same'
,
name
=
'fine-class-%d'
%
i
))
name
=
'fine-mask-class-%d'
%
i
))
self
.
_fine_class_bn
.
append
(
norm_activation
(
name
=
'fine-class-%d-bn'
%
i
))
self
.
_fine_class_bn
.
append
(
norm_activation
(
name
=
'fine-mask-class-%d-bn'
%
i
))
self
.
_class_predict_conv
=
tf
.
keras
.
layers
.
Conv2D
(
self
.
_mask_num_classes
,
kernel_size
=
(
1
,
1
),
# Focal loss bias initialization to have foreground 0.01 probability.
bias_initializer
=
tf
.
constant_initializer
(
-
np
.
log
((
1
-
0.01
)
/
0.01
)),
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
),
padding
=
'same'
,
name
=
'fine-mask-class-predict'
)
def
__call__
(
self
,
prior_conditioned_features
,
class_prob
s
,
is_training
=
None
):
def
__call__
(
self
,
features
,
mask_logits
,
classe
s
,
is_training
):
"""Generate instance masks from FPN features and detection priors.
"""Generate instance masks from FPN features and detection priors.
This corresponds to the Fig. 5-6 of the ShapeMask paper at
This corresponds to the Fig. 5-6 of the ShapeMask paper at
https://arxiv.org/pdf/1904.03239.pdf
https://arxiv.org/pdf/1904.03239.pdf
Args:
Args:
prior_conditioned_features: a float Tensor of shape [batch_size *
features: a float Tensor of shape
num_instances, mask_crop_size, mask_crop_size, num_downsample_channels].
[batch_size, num_instances, mask_crop_size, mask_crop_size,
This is the instance feature crop.
num_downsample_channels]. This is the instance feature crop.
class_probs: a float Tensor of shape [batch_size * num_instances,
mask_logits: a float Tensor of shape
mask_crop_size, mask_crop_size, 1]. This is the class probability of
[batch_size, num_instances, mask_crop_size, mask_crop_size] indicating
instance segmentation.
predicted mask logits.
classes: a int Tensor of shape [batch_size, num_instances]
of instance classes.
is_training: a bool indicating whether in training mode.
is_training: a bool indicating whether in training mode.
Returns:
Returns:
mask_outputs: instance mask prediction as a float Tensor of shape
mask_outputs: instance mask prediction as a float Tensor of shape
[batch_size
*
num_instances, mask_size, mask_size
, num_classes
].
[batch_size
,
num_instances, mask_size, mask_size].
"""
"""
with
backend
.
get_graph
().
as_default
(),
tf
.
name_scope
(
'affinity-masknet'
):
# Extract the foreground mean features
# Extract the foreground mean features
# with tf.variable_scope('fine_mask', reuse=tf.AUTO_REUSE):
point_samp_prob_thr
=
1.
/
(
1.
+
tf
.
exp
(
-
self
.
_coarse_mask_thr
))
with
backend
.
get_graph
().
as_default
(),
tf
.
name_scope
(
'fine_mask'
):
point_samp_prob_thr
=
tf
.
cast
(
point_samp_prob_thr
,
class_probs
.
dtype
)
mask_probs
=
tf
.
nn
.
sigmoid
(
mask_logits
)
class_probs
=
tf
.
where
(
# Compute instance embedding for hard average.
tf
.
greater
(
class_probs
,
point_samp_prob_thr
),
class_probs
,
binary_mask
=
tf
.
cast
(
tf
.
greater
(
mask_probs
,
0.5
),
features
.
dtype
)
tf
.
zeros_like
(
class_probs
))
weighted_features
=
class_probs
*
prior_conditioned_features
sum_class_vector
=
tf
.
reduce_sum
(
input_tensor
=
class_probs
,
axis
=
(
1
,
2
))
+
tf
.
constant
(
1e-20
,
class_probs
.
dtype
)
instance_embedding
=
tf
.
reduce_sum
(
instance_embedding
=
tf
.
reduce_sum
(
input_tensor
=
weighted_features
,
axis
=
(
1
,
2
))
/
sum_class_vector
features
*
tf
.
expand_dims
(
binary_mask
,
axis
=-
1
),
axis
=
(
2
,
3
))
instance_embedding
/=
tf
.
expand_dims
(
tf
.
reduce_sum
(
binary_mask
,
axis
=
(
2
,
3
))
+
1e-20
,
axis
=-
1
)
# Take the difference between crop features and mean instance features.
# Take the difference between crop features and mean instance features.
instance_
features
=
prior_conditioned_features
-
tf
.
reshape
(
features
-
=
tf
.
expand_dims
(
instance_embedding
,
(
-
1
,
1
,
1
,
self
.
_num_downsample_channels
)
)
tf
.
expand_dims
(
instance_embedding
,
axis
=
2
),
axis
=
2
)
# Decoder to generate upsampled segmentation mask.
features
+=
self
.
_fine_mask_fc
(
tf
.
expand_dims
(
mask_probs
,
axis
=-
1
))
affinity_output_features
=
self
.
finemask_decoder_net
(
instance_features
,
is_training
)
# Predict per-class instance masks.
# Decoder to generate upsampled segmentation mask.
affinity_mask_classes
=
self
.
_class_predict_conv
(
affinity_output_features
)
mask_logits
=
self
.
decoder_net
(
features
,
is_training
)
if
self
.
_use_category_for_mask
:
mask_logits
=
tf
.
transpose
(
mask_logits
,
[
0
,
1
,
4
,
2
,
3
])
mask_logits
=
tf
.
gather
(
mask_logits
,
tf
.
expand_dims
(
classes
,
-
1
),
batch_dims
=
2
)
mask_logits
=
tf
.
squeeze
(
mask_logits
,
axis
=
2
)
else
:
mask_logits
=
mask_logits
[...,
0
]
return
affinity_mask_classe
s
return
mask_logit
s
def
finemask_
decoder_net
(
self
,
imag
es
,
is_training
=
Non
e
):
def
decoder_net
(
self
,
featur
es
,
is_training
=
Fals
e
):
"""Fine mask decoder network architecture.
"""Fine mask decoder network architecture.
Args:
Args:
imag
es: A tensor of size [batch, height_in, width_in, channels_in].
featur
es: A tensor of size [batch, height_in, width_in, channels_in].
is_training: Whether batch_norm layers are in training mode.
is_training: Whether batch_norm layers are in training mode.
Returns:
Returns:
...
@@ -1120,11 +977,23 @@ class ShapemaskFinemaskHead(object):
...
@@ -1120,11 +977,23 @@ class ShapemaskFinemaskHead(object):
num_channels], where output size is self._gt_upsample_scale times
num_channels], where output size is self._gt_upsample_scale times
that of input.
that of input.
"""
"""
(
batch_size
,
num_instances
,
height
,
width
,
num_channels
)
=
features
.
get_shape
().
as_list
()
features
=
tf
.
reshape
(
features
,
[
batch_size
*
num_instances
,
height
,
width
,
num_channels
])
for
i
in
range
(
self
.
_num_convs
):
for
i
in
range
(
self
.
_num_convs
):
images
=
self
.
_fine_class_conv
[
i
](
images
)
features
=
self
.
_fine_class_conv
[
i
](
features
)
images
=
self
.
_fine_class_bn
[
i
](
images
,
is_training
=
is_training
)
features
=
self
.
_fine_class_bn
[
i
](
features
,
is_training
=
is_training
)
if
self
.
up_sample_factor
>
1
:
features
=
self
.
_upsample_conv
(
features
)
if
self
.
_gt_upsample_scale
>
1
:
# Predict per-class instance masks.
images
=
self
.
_upsample_conv
(
imag
es
)
mask_logits
=
self
.
_class_predict_conv
(
featur
es
)
return
images
mask_logits
=
tf
.
reshape
(
mask_logits
,
[
batch_size
,
num_instances
,
height
*
self
.
up_sample_factor
,
width
*
self
.
up_sample_factor
,
self
.
_mask_num_classes
])
return
mask_logits
official/vision/detection/modeling/architecture/nn_ops.py
View file @
d305396d
...
@@ -19,7 +19,6 @@ from __future__ import division
...
@@ -19,7 +19,6 @@ from __future__ import division
from
__future__
import
print_function
from
__future__
import
print_function
import
functools
import
functools
from
absl
import
logging
import
tensorflow
as
tf
import
tensorflow
as
tf
...
@@ -105,5 +104,5 @@ def norm_activation_builder(momentum=0.997,
...
@@ -105,5 +104,5 @@ def norm_activation_builder(momentum=0.997,
momentum
=
momentum
,
momentum
=
momentum
,
epsilon
=
epsilon
,
epsilon
=
epsilon
,
trainable
=
trainable
,
trainable
=
trainable
,
activation
=
'relu'
,
activation
=
activation
,
**
kwargs
)
**
kwargs
)
official/vision/detection/modeling/losses.py
View file @
d305396d
...
@@ -12,7 +12,7 @@
...
@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
# ==============================================================================
# ==============================================================================
"""Losses used for
Mask-RCNN
."""
"""Losses used for
detection models
."""
from
__future__
import
absolute_import
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
division
...
@@ -479,12 +479,62 @@ class RetinanetBoxLoss(object):
...
@@ -479,12 +479,62 @@ class RetinanetBoxLoss(object):
class
ShapemaskMseLoss
(
object
):
class
ShapemaskMseLoss
(
object
):
"""ShapeMask mask Mean Squared Error loss function wrapper."""
"""ShapeMask mask Mean Squared Error loss function wrapper."""
def
__init__
(
self
):
def
__call__
(
self
,
probs
,
labels
,
valid_mask
):
raise
NotImplementedError
(
'Not Implemented.'
)
"""Compute instance segmentation loss.
Args:
probs: A Tensor of shape [batch_size * num_points, height, width,
num_classes]. The logits are not necessarily between 0 and 1.
labels: A float32/float16 Tensor of shape [batch_size, num_instances,
mask_size, mask_size], where mask_size =
mask_crop_size * gt_upsample_scale for fine mask, or mask_crop_size
for coarse masks and shape priors.
valid_mask: a binary mask indicating valid training masks.
Returns:
loss: an float tensor representing total mask classification loss.
"""
with
tf
.
name_scope
(
'shapemask_prior_loss'
):
batch_size
,
num_instances
=
valid_mask
.
get_shape
().
as_list
()[:
2
]
diff
=
(
tf
.
cast
(
labels
,
dtype
=
tf
.
float32
)
-
tf
.
cast
(
probs
,
dtype
=
tf
.
float32
))
diff
*=
tf
.
cast
(
tf
.
reshape
(
valid_mask
,
[
batch_size
,
num_instances
,
1
,
1
]),
tf
.
float32
)
# Adding 0.001 in the denominator to avoid division by zero.
loss
=
tf
.
nn
.
l2_loss
(
diff
)
/
(
tf
.
reduce_sum
(
labels
)
+
0.001
)
return
loss
class
ShapemaskLoss
(
object
):
class
ShapemaskLoss
(
object
):
"""ShapeMask mask loss function wrapper."""
"""ShapeMask mask loss function wrapper."""
def
__init__
(
self
):
def
__init__
(
self
):
raise
NotImplementedError
(
'Not Implemented.'
)
self
.
_binary_crossentropy
=
tf
.
keras
.
losses
.
BinaryCrossentropy
(
reduction
=
tf
.
keras
.
losses
.
Reduction
.
SUM
,
from_logits
=
True
)
def
__call__
(
self
,
logits
,
labels
,
valid_mask
):
"""ShapeMask mask cross entropy loss function wrapper.
Args:
logits: A Tensor of shape [batch_size * num_instances, height, width,
num_classes]. The logits are not necessarily between 0 and 1.
labels: A float16/float32 Tensor of shape [batch_size, num_instances,
mask_size, mask_size], where mask_size =
mask_crop_size * gt_upsample_scale for fine mask, or mask_crop_size
for coarse masks and shape priors.
valid_mask: a binary mask of shape [batch_size, num_instances]
indicating valid training masks.
Returns:
loss: an float tensor representing total mask classification loss.
"""
with
tf
.
name_scope
(
'shapemask_loss'
):
batch_size
,
num_instances
=
valid_mask
.
get_shape
().
as_list
()[:
2
]
labels
=
tf
.
cast
(
labels
,
tf
.
float32
)
logits
=
tf
.
cast
(
logits
,
tf
.
float32
)
loss
=
self
.
_binary_crossentropy
(
labels
,
logits
)
loss
*=
tf
.
cast
(
tf
.
reshape
(
valid_mask
,
[
batch_size
,
num_instances
,
1
,
1
]),
loss
.
dtype
)
# Adding 0.001 in the denominator to avoid division by zero.
loss
=
tf
.
reduce_sum
(
loss
)
/
(
tf
.
reduce_sum
(
labels
)
+
0.001
)
return
loss
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment