Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
e7de233b
Commit
e7de233b
authored
Oct 27, 2017
by
Vivek Rathod
Browse files
updates changes in object_detecion/cores directory.
parent
edcd29f2
Changes
14
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
2135 additions
and
327 deletions
+2135
-327
research/object_detection/core/BUILD
research/object_detection/core/BUILD
+5
-0
research/object_detection/core/box_list_ops.py
research/object_detection/core/box_list_ops.py
+20
-11
research/object_detection/core/box_predictor.py
research/object_detection/core/box_predictor.py
+31
-12
research/object_detection/core/data_parser.py
research/object_detection/core/data_parser.py
+41
-0
research/object_detection/core/keypoint_ops.py
research/object_detection/core/keypoint_ops.py
+51
-0
research/object_detection/core/keypoint_ops_test.py
research/object_detection/core/keypoint_ops_test.py
+32
-0
research/object_detection/core/losses.py
research/object_detection/core/losses.py
+72
-2
research/object_detection/core/losses_test.py
research/object_detection/core/losses_test.py
+313
-0
research/object_detection/core/model.py
research/object_detection/core/model.py
+20
-6
research/object_detection/core/preprocessor.py
research/object_detection/core/preprocessor.py
+763
-156
research/object_detection/core/preprocessor_test.py
research/object_detection/core/preprocessor_test.py
+680
-129
research/object_detection/core/standard_fields.py
research/object_detection/core/standard_fields.py
+58
-3
research/object_detection/core/target_assigner.py
research/object_detection/core/target_assigner.py
+14
-8
research/object_detection/core/target_assigner_test.py
research/object_detection/core/target_assigner_test.py
+35
-0
No files found.
research/object_detection/core/BUILD
View file @
e7de233b
...
...
@@ -264,6 +264,11 @@ py_library(
srcs
=
[
"data_decoder.py"
],
)
py_library
(
name
=
"data_parser"
,
srcs
=
[
"data_parser.py"
],
)
py_library
(
name
=
"box_predictor"
,
srcs
=
[
"box_predictor.py"
],
...
...
research/object_detection/core/box_list_ops.py
View file @
e7de233b
...
...
@@ -584,7 +584,8 @@ def sort_by_field(boxlist, field, order=SortOrder.descend, scope=None):
[
'Incorrect field size: actual vs expected.'
,
num_entries
,
num_boxes
])
with
tf
.
control_dependencies
([
length_assert
]):
# TODO: Remove with tf.device when top_k operation runs correctly on GPU.
# TODO: Remove with tf.device when top_k operation runs
# correctly on GPU.
with
tf
.
device
(
'/cpu:0'
):
_
,
sorted_indices
=
tf
.
nn
.
top_k
(
field_to_sort
,
num_boxes
,
sorted
=
True
)
...
...
@@ -655,7 +656,7 @@ def filter_greater_than(boxlist, thresh, scope=None):
This op keeps the collection of boxes whose corresponding scores are
greater than the input threshold.
TODO: Change function name to
F
ilter
S
cores
G
reater
T
han
TODO: Change function name to
f
ilter
_s
cores
_g
reater
_t
han
Args:
boxlist: BoxList holding N boxes. Must contain a 'scores' field
...
...
@@ -772,18 +773,25 @@ def to_normalized_coordinates(boxlist, height, width,
return
scale
(
boxlist
,
1
/
height
,
1
/
width
)
def
to_absolute_coordinates
(
boxlist
,
height
,
width
,
check_range
=
True
,
scope
=
None
):
def
to_absolute_coordinates
(
boxlist
,
height
,
width
,
check_range
=
True
,
maximum_normalized_coordinate
=
1.01
,
scope
=
None
):
"""Converts normalized box coordinates to absolute pixel coordinates.
This function raises an assertion failed error when the maximum box coordinate
value is larger than 1.01 (in which case coordinates are already absolute).
value is larger than maximum_normalized_coordinate (in which case coordinates
are already absolute).
Args:
boxlist: BoxList with coordinates in range [0, 1].
height: Maximum value for height of absolute box coordinates.
width: Maximum value for width of absolute box coordinates.
check_range: If True, checks if the coordinates are normalized or not.
maximum_normalized_coordinate: Maximum coordinate value to be considered
as normalized, default to 1.01.
scope: name scope.
Returns:
...
...
@@ -797,9 +805,10 @@ def to_absolute_coordinates(boxlist, height, width,
# Ensure range of input boxes is correct.
if
check_range
:
box_maximum
=
tf
.
reduce_max
(
boxlist
.
get
())
max_assert
=
tf
.
Assert
(
tf
.
greater_equal
(
1.01
,
box_maximum
),
max_assert
=
tf
.
Assert
(
tf
.
greater_equal
(
maximum_normalized_coordinate
,
box_maximum
),
[
'maximum box coordinate value is larger '
'than 1.01: '
,
box_maximum
])
'than %f: '
%
maximum_normalized_coordinate
,
box_maximum
])
with
tf
.
control_dependencies
([
max_assert
]):
width
=
tf
.
identity
(
width
)
...
...
@@ -927,9 +936,9 @@ def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5):
iou_
=
iou
(
selected_boxes
,
pool_boxes
)
match_indicator
=
tf
.
to_float
(
tf
.
greater
(
iou_
,
iou_thresh
))
num_matches
=
tf
.
reduce_sum
(
match_indicator
,
1
)
# TODO: Handle the case where some boxes in selected_boxes do not
match to any
# boxes in pool_boxes. For such boxes without any matches, we
should return
# the original boxes without voting.
# TODO: Handle the case where some boxes in selected_boxes do not
#
match to any
boxes in pool_boxes. For such boxes without any matches, we
#
should return
the original boxes without voting.
match_assert
=
tf
.
Assert
(
tf
.
reduce_all
(
tf
.
greater
(
num_matches
,
0
)),
[
'Each box in selected_boxes must match with at least one box '
...
...
research/object_detection/core/box_predictor.py
View file @
e7de233b
...
...
@@ -278,6 +278,8 @@ class MaskRCNNBoxPredictor(BoxPredictor):
box_code_size
,
conv_hyperparams
=
None
,
predict_instance_masks
=
False
,
mask_height
=
14
,
mask_width
=
14
,
mask_prediction_conv_depth
=
256
,
predict_keypoints
=
False
):
"""Constructor.
...
...
@@ -300,6 +302,8 @@ class MaskRCNNBoxPredictor(BoxPredictor):
ops.
predict_instance_masks: Whether to predict object masks inside detection
boxes.
mask_height: Desired output mask height. The default value is 14.
mask_width: Desired output mask width. The default value is 14.
mask_prediction_conv_depth: The depth for the first conv2d_transpose op
applied to the image_features in the mask prediciton branch.
predict_keypoints: Whether to predict keypoints insde detection boxes.
...
...
@@ -315,10 +319,10 @@ class MaskRCNNBoxPredictor(BoxPredictor):
self
.
_dropout_keep_prob
=
dropout_keep_prob
self
.
_conv_hyperparams
=
conv_hyperparams
self
.
_predict_instance_masks
=
predict_instance_masks
self
.
_mask_height
=
mask_height
self
.
_mask_width
=
mask_width
self
.
_mask_prediction_conv_depth
=
mask_prediction_conv_depth
self
.
_predict_keypoints
=
predict_keypoints
if
self
.
_predict_instance_masks
:
raise
ValueError
(
'Mask prediction is unimplemented.'
)
if
self
.
_predict_keypoints
:
raise
ValueError
(
'Keypoint prediction is unimplemented.'
)
if
((
self
.
_predict_instance_masks
or
self
.
_predict_keypoints
)
and
...
...
@@ -339,6 +343,11 @@ class MaskRCNNBoxPredictor(BoxPredictor):
have been folded into the batch dimension. Thus we output 1 for the
anchors dimension.
Also optionally predicts instance masks.
The mask prediction head is based on the Mask RCNN paper with the following
modifications: We replace the deconvolution layer with a bilinear resize
and a convolution.
Args:
image_features: A float tensor of shape [batch_size, height, width,
channels] containing features for a batch of images.
...
...
@@ -397,15 +406,18 @@ class MaskRCNNBoxPredictor(BoxPredictor):
if
self
.
_predict_instance_masks
:
with
slim
.
arg_scope
(
self
.
_conv_hyperparams
):
upsampled_features
=
slim
.
conv2d_transpose
(
upsampled_features
=
tf
.
image
.
resize_bilinear
(
image_features
,
[
self
.
_mask_height
,
self
.
_mask_width
],
align_corners
=
True
)
upsampled_features
=
slim
.
conv2d
(
upsampled_features
,
num_outputs
=
self
.
_mask_prediction_conv_depth
,
kernel_size
=
[
2
,
2
],
stride
=
2
)
kernel_size
=
[
2
,
2
])
mask_predictions
=
slim
.
conv2d
(
upsampled_features
,
num_outputs
=
self
.
num_classes
,
activation_fn
=
None
,
kernel_size
=
[
1
,
1
])
kernel_size
=
[
3
,
3
])
instance_masks
=
tf
.
expand_dims
(
tf
.
transpose
(
mask_predictions
,
perm
=
[
0
,
3
,
1
,
2
]),
axis
=
1
,
...
...
@@ -437,7 +449,8 @@ class ConvolutionalBoxPredictor(BoxPredictor):
dropout_keep_prob
,
kernel_size
,
box_code_size
,
apply_sigmoid_to_scores
=
False
):
apply_sigmoid_to_scores
=
False
,
class_prediction_bias_init
=
0.0
):
"""Constructor.
Args:
...
...
@@ -464,6 +477,8 @@ class ConvolutionalBoxPredictor(BoxPredictor):
box_code_size: Size of encoding for each box.
apply_sigmoid_to_scores: if True, apply the sigmoid on the output
class_predictions.
class_prediction_bias_init: constant value to initialize bias of the last
conv2d layer before class prediction.
Raises:
ValueError: if min_depth > max_depth.
...
...
@@ -480,6 +495,7 @@ class ConvolutionalBoxPredictor(BoxPredictor):
self
.
_box_code_size
=
box_code_size
self
.
_dropout_keep_prob
=
dropout_keep_prob
self
.
_apply_sigmoid_to_scores
=
apply_sigmoid_to_scores
self
.
_class_prediction_bias_init
=
class_prediction_bias_init
def
_predict
(
self
,
image_features
,
num_predictions_per_location
):
"""Computes encoded object locations and corresponding confidences.
...
...
@@ -499,15 +515,16 @@ class ConvolutionalBoxPredictor(BoxPredictor):
[batch_size, num_anchors, num_classes + 1] representing the class
predictions for the proposals.
"""
features_depth
=
static_shape
.
get_depth
(
image_features
.
get_shape
())
depth
=
max
(
min
(
features_depth
,
self
.
_max_depth
),
self
.
_min_depth
)
# Add a slot for the background class.
num_class_slots
=
self
.
num_classes
+
1
net
=
image_features
with
slim
.
arg_scope
(
self
.
_conv_hyperparams
),
\
slim
.
arg_scope
([
slim
.
dropout
],
is_training
=
self
.
_is_training
):
# Add additional conv layers before the predictor.
# Add additional conv layers before the class predictor.
features_depth
=
static_shape
.
get_depth
(
image_features
.
get_shape
())
depth
=
max
(
min
(
features_depth
,
self
.
_max_depth
),
self
.
_min_depth
)
tf
.
logging
.
info
(
'depth of additional conv before box predictor: {}'
.
format
(
depth
))
if
depth
>
0
and
self
.
_num_layers_before_predictor
>
0
:
for
i
in
range
(
self
.
_num_layers_before_predictor
):
net
=
slim
.
conv2d
(
...
...
@@ -522,7 +539,9 @@ class ConvolutionalBoxPredictor(BoxPredictor):
net
=
slim
.
dropout
(
net
,
keep_prob
=
self
.
_dropout_keep_prob
)
class_predictions_with_background
=
slim
.
conv2d
(
net
,
num_predictions_per_location
*
num_class_slots
,
[
self
.
_kernel_size
,
self
.
_kernel_size
],
scope
=
'ClassPredictor'
)
[
self
.
_kernel_size
,
self
.
_kernel_size
],
scope
=
'ClassPredictor'
,
biases_initializer
=
tf
.
constant_initializer
(
self
.
_class_prediction_bias_init
))
if
self
.
_apply_sigmoid_to_scores
:
class_predictions_with_background
=
tf
.
sigmoid
(
class_predictions_with_background
)
...
...
research/object_detection/core/data_parser.py
0 → 100644
View file @
e7de233b
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Interface for data parsers.
Data parser parses input data and returns a dictionary of numpy arrays
keyed by the entries in standard_fields.py. Since the parser parses records
to numpy arrays (materialized tensors) directly, it is used to read data for
evaluation/visualization; to parse the data during training, DataDecoder should
be used.
"""
from
abc
import
ABCMeta
from
abc
import
abstractmethod
class
DataToNumpyParser
(
object
):
__metaclass__
=
ABCMeta
@
abstractmethod
def
parse
(
self
,
input_data
):
"""Parses input and returns a numpy array or a dictionary of numpy arrays.
Args:
input_data: an input data
Returns:
A numpy array or a dictionary of numpy arrays or None, if input
cannot be parsed.
"""
pass
research/object_detection/core/keypoint_ops.py
View file @
e7de233b
...
...
@@ -229,3 +229,54 @@ def flip_horizontal(keypoints, flip_point, flip_permutation, scope=None):
new_keypoints
=
tf
.
concat
([
v
,
u
],
2
)
new_keypoints
=
tf
.
transpose
(
new_keypoints
,
[
1
,
0
,
2
])
return
new_keypoints
def
flip_vertical
(
keypoints
,
flip_point
,
flip_permutation
,
scope
=
None
):
"""Flips the keypoints vertically around the flip_point.
This operation flips the y coordinate for each keypoint around the flip_point
and also permutes the keypoints in a manner specified by flip_permutation.
Args:
keypoints: a tensor of shape [num_instances, num_keypoints, 2]
flip_point: (float) scalar tensor representing the y coordinate to flip the
keypoints around.
flip_permutation: rank 1 int32 tensor containing the keypoint flip
permutation. This specifies the mapping from original keypoint indices
to the flipped keypoint indices. This is used primarily for keypoints
that are not reflection invariant. E.g. Suppose there are 3 keypoints
representing ['head', 'right_eye', 'left_eye'], then a logical choice for
flip_permutation might be [0, 2, 1] since we want to swap the 'left_eye'
and 'right_eye' after a horizontal flip.
scope: name scope.
Returns:
new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
"""
with
tf
.
name_scope
(
scope
,
'FlipVertical'
):
keypoints
=
tf
.
transpose
(
keypoints
,
[
1
,
0
,
2
])
keypoints
=
tf
.
gather
(
keypoints
,
flip_permutation
)
v
,
u
=
tf
.
split
(
value
=
keypoints
,
num_or_size_splits
=
2
,
axis
=
2
)
v
=
flip_point
*
2.0
-
v
new_keypoints
=
tf
.
concat
([
v
,
u
],
2
)
new_keypoints
=
tf
.
transpose
(
new_keypoints
,
[
1
,
0
,
2
])
return
new_keypoints
def
rot90
(
keypoints
,
scope
=
None
):
"""Rotates the keypoints counter-clockwise by 90 degrees.
Args:
keypoints: a tensor of shape [num_instances, num_keypoints, 2]
scope: name scope.
Returns:
new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
"""
with
tf
.
name_scope
(
scope
,
'Rot90'
):
keypoints
=
tf
.
transpose
(
keypoints
,
[
1
,
0
,
2
])
v
,
u
=
tf
.
split
(
value
=
keypoints
[:,
:,
::
-
1
],
num_or_size_splits
=
2
,
axis
=
2
)
v
=
1.0
-
v
new_keypoints
=
tf
.
concat
([
v
,
u
],
2
)
new_keypoints
=
tf
.
transpose
(
new_keypoints
,
[
1
,
0
,
2
])
return
new_keypoints
research/object_detection/core/keypoint_ops_test.py
View file @
e7de233b
...
...
@@ -163,6 +163,38 @@ class KeypointOpsTest(tf.test.TestCase):
output_
,
expected_keypoints_
=
sess
.
run
([
output
,
expected_keypoints
])
self
.
assertAllClose
(
output_
,
expected_keypoints_
)
def
test_flip_vertical
(
self
):
keypoints
=
tf
.
constant
([
[[
0.1
,
0.1
],
[
0.2
,
0.2
],
[
0.3
,
0.3
]],
[[
0.4
,
0.4
],
[
0.5
,
0.5
],
[
0.6
,
0.6
]]
])
flip_permutation
=
[
0
,
2
,
1
]
expected_keypoints
=
tf
.
constant
([
[[
0.9
,
0.1
],
[
0.7
,
0.3
],
[
0.8
,
0.2
]],
[[
0.6
,
0.4
],
[
0.4
,
0.6
],
[
0.5
,
0.5
]],
])
output
=
keypoint_ops
.
flip_vertical
(
keypoints
,
0.5
,
flip_permutation
)
with
self
.
test_session
()
as
sess
:
output_
,
expected_keypoints_
=
sess
.
run
([
output
,
expected_keypoints
])
self
.
assertAllClose
(
output_
,
expected_keypoints_
)
def
test_rot90
(
self
):
keypoints
=
tf
.
constant
([
[[
0.1
,
0.1
],
[
0.2
,
0.2
],
[
0.3
,
0.3
]],
[[
0.4
,
0.6
],
[
0.5
,
0.6
],
[
0.6
,
0.7
]]
])
expected_keypoints
=
tf
.
constant
([
[[
0.9
,
0.1
],
[
0.8
,
0.2
],
[
0.7
,
0.3
]],
[[
0.4
,
0.4
],
[
0.4
,
0.5
],
[
0.3
,
0.6
]],
])
output
=
keypoint_ops
.
rot90
(
keypoints
)
with
self
.
test_session
()
as
sess
:
output_
,
expected_keypoints_
=
sess
.
run
([
output
,
expected_keypoints
])
self
.
assertAllClose
(
output_
,
expected_keypoints_
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/object_detection/core/losses.py
View file @
e7de233b
...
...
@@ -72,7 +72,7 @@ class Loss(object):
@
abstractmethod
def
_compute_loss
(
self
,
prediction_tensor
,
target_tensor
,
**
params
):
"""Method to be overriden by implementations.
"""Method to be overrid
d
en by implementations.
Args:
prediction_tensor: a tensor representing predicted quantities
...
...
@@ -238,17 +238,85 @@ class WeightedSigmoidClassificationLoss(Loss):
return
tf
.
reduce_sum
(
per_entry_cross_ent
*
weights
)
class
SigmoidFocalClassificationLoss
(
Loss
):
"""Sigmoid focal cross entropy loss.
Focal loss down-weights well classified examples and focusses on the hard
examples. See https://arxiv.org/pdf/1708.02002.pdf for the loss definition.
"""
def
__init__
(
self
,
anchorwise_output
=
False
,
gamma
=
2.0
,
alpha
=
0.25
):
"""Constructor.
Args:
anchorwise_output: Outputs loss per anchor. (default False)
gamma: exponent of the modulating factor (1 - p_t) ^ gamma.
alpha: optional alpha weighting factor to balance positives vs negatives.
"""
self
.
_anchorwise_output
=
anchorwise_output
self
.
_alpha
=
alpha
self
.
_gamma
=
gamma
def
_compute_loss
(
self
,
prediction_tensor
,
target_tensor
,
weights
,
class_indices
=
None
):
"""Compute loss function.
Args:
prediction_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing the predicted logits for each class
target_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing one-hot encoded classification targets
weights: a float tensor of shape [batch_size, num_anchors]
class_indices: (Optional) A 1-D integer tensor of class indices.
If provided, computes loss only for the specified class indices.
Returns:
loss: a (scalar) tensor representing the value of the loss function
or a float tensor of shape [batch_size, num_anchors]
"""
weights
=
tf
.
expand_dims
(
weights
,
2
)
if
class_indices
is
not
None
:
weights
*=
tf
.
reshape
(
ops
.
indices_to_dense_vector
(
class_indices
,
tf
.
shape
(
prediction_tensor
)[
2
]),
[
1
,
1
,
-
1
])
per_entry_cross_ent
=
(
tf
.
nn
.
sigmoid_cross_entropy_with_logits
(
labels
=
target_tensor
,
logits
=
prediction_tensor
))
prediction_probabilities
=
tf
.
sigmoid
(
prediction_tensor
)
p_t
=
((
target_tensor
*
prediction_probabilities
)
+
((
1
-
target_tensor
)
*
(
1
-
prediction_probabilities
)))
modulating_factor
=
1.0
if
self
.
_gamma
:
modulating_factor
=
tf
.
pow
(
1.0
-
p_t
,
self
.
_gamma
)
alpha_weight_factor
=
1.0
if
self
.
_alpha
is
not
None
:
alpha_weight_factor
=
(
target_tensor
*
self
.
_alpha
+
(
1
-
target_tensor
)
*
(
1
-
self
.
_alpha
))
focal_cross_entropy_loss
=
(
modulating_factor
*
alpha_weight_factor
*
per_entry_cross_ent
)
if
self
.
_anchorwise_output
:
return
tf
.
reduce_sum
(
focal_cross_entropy_loss
*
weights
,
2
)
return
tf
.
reduce_sum
(
focal_cross_entropy_loss
*
weights
)
class
WeightedSoftmaxClassificationLoss
(
Loss
):
"""Softmax loss function."""
def
__init__
(
self
,
anchorwise_output
=
False
):
def
__init__
(
self
,
anchorwise_output
=
False
,
logit_scale
=
1.0
):
"""Constructor.
Args:
anchorwise_output: Whether to output loss per anchor (default False)
logit_scale: When this value is high, the prediction is "diffused" and
when this value is low, the prediction is made peakier.
(default 1.0)
"""
self
.
_anchorwise_output
=
anchorwise_output
self
.
_logit_scale
=
logit_scale
def
_compute_loss
(
self
,
prediction_tensor
,
target_tensor
,
weights
):
"""Compute loss function.
...
...
@@ -264,6 +332,8 @@ class WeightedSoftmaxClassificationLoss(Loss):
loss: a (scalar) tensor representing the value of the loss function
"""
num_classes
=
prediction_tensor
.
get_shape
().
as_list
()[
-
1
]
prediction_tensor
=
tf
.
divide
(
prediction_tensor
,
self
.
_logit_scale
,
name
=
'scale_logit'
)
per_row_cross_ent
=
(
tf
.
nn
.
softmax_cross_entropy_with_logits
(
labels
=
tf
.
reshape
(
target_tensor
,
[
-
1
,
num_classes
]),
logits
=
tf
.
reshape
(
prediction_tensor
,
[
-
1
,
num_classes
])))
...
...
research/object_detection/core/losses_test.py
View file @
e7de233b
...
...
@@ -225,6 +225,286 @@ class WeightedSigmoidClassificationLossTest(tf.test.TestCase):
self
.
assertAllClose
(
loss_output
,
exp_loss
)
def
_logit
(
probability
):
return
math
.
log
(
probability
/
(
1.
-
probability
))
class
SigmoidFocalClassificationLossTest
(
tf
.
test
.
TestCase
):
def
testEasyExamplesProduceSmallLossComparedToSigmoidXEntropy
(
self
):
prediction_tensor
=
tf
.
constant
([[[
_logit
(
0.97
)],
[
_logit
(
0.90
)],
[
_logit
(
0.73
)],
[
_logit
(
0.27
)],
[
_logit
(
0.09
)],
[
_logit
(
0.03
)]]],
tf
.
float32
)
target_tensor
=
tf
.
constant
([[[
1
],
[
1
],
[
1
],
[
0
],
[
0
],
[
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
,
1
,
1
]],
tf
.
float32
)
focal_loss_op
=
losses
.
SigmoidFocalClassificationLoss
(
anchorwise_output
=
True
,
gamma
=
2.0
,
alpha
=
None
)
sigmoid_loss_op
=
losses
.
WeightedSigmoidClassificationLoss
(
anchorwise_output
=
True
)
focal_loss
=
focal_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
sigmoid_loss
=
sigmoid_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
with
self
.
test_session
()
as
sess
:
sigmoid_loss
,
focal_loss
=
sess
.
run
([
sigmoid_loss
,
focal_loss
])
order_of_ratio
=
np
.
power
(
10
,
np
.
floor
(
np
.
log10
(
sigmoid_loss
/
focal_loss
)))
self
.
assertAllClose
(
order_of_ratio
,
[[
1000
,
100
,
10
,
10
,
100
,
1000
]])
def
testHardExamplesProduceLossComparableToSigmoidXEntropy
(
self
):
prediction_tensor
=
tf
.
constant
([[[
_logit
(
0.55
)],
[
_logit
(
0.52
)],
[
_logit
(
0.50
)],
[
_logit
(
0.48
)],
[
_logit
(
0.45
)]]],
tf
.
float32
)
target_tensor
=
tf
.
constant
([[[
1
],
[
1
],
[
1
],
[
0
],
[
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
,
1
]],
tf
.
float32
)
focal_loss_op
=
losses
.
SigmoidFocalClassificationLoss
(
anchorwise_output
=
True
,
gamma
=
2.0
,
alpha
=
None
)
sigmoid_loss_op
=
losses
.
WeightedSigmoidClassificationLoss
(
anchorwise_output
=
True
)
focal_loss
=
focal_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
sigmoid_loss
=
sigmoid_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
with
self
.
test_session
()
as
sess
:
sigmoid_loss
,
focal_loss
=
sess
.
run
([
sigmoid_loss
,
focal_loss
])
order_of_ratio
=
np
.
power
(
10
,
np
.
floor
(
np
.
log10
(
sigmoid_loss
/
focal_loss
)))
self
.
assertAllClose
(
order_of_ratio
,
[[
1.
,
1.
,
1.
,
1.
,
1.
]])
def
testNonAnchorWiseOutputComparableToSigmoidXEntropy
(
self
):
prediction_tensor
=
tf
.
constant
([[[
_logit
(
0.55
)],
[
_logit
(
0.52
)],
[
_logit
(
0.50
)],
[
_logit
(
0.48
)],
[
_logit
(
0.45
)]]],
tf
.
float32
)
target_tensor
=
tf
.
constant
([[[
1
],
[
1
],
[
1
],
[
0
],
[
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
,
1
]],
tf
.
float32
)
focal_loss_op
=
losses
.
SigmoidFocalClassificationLoss
(
anchorwise_output
=
False
,
gamma
=
2.0
,
alpha
=
None
)
sigmoid_loss_op
=
losses
.
WeightedSigmoidClassificationLoss
(
anchorwise_output
=
False
)
focal_loss
=
focal_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
sigmoid_loss
=
sigmoid_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
with
self
.
test_session
()
as
sess
:
sigmoid_loss
,
focal_loss
=
sess
.
run
([
sigmoid_loss
,
focal_loss
])
order_of_ratio
=
np
.
power
(
10
,
np
.
floor
(
np
.
log10
(
sigmoid_loss
/
focal_loss
)))
self
.
assertAlmostEqual
(
order_of_ratio
,
1.
)
def
testIgnoreNegativeExampleLossViaAlphaMultiplier
(
self
):
prediction_tensor
=
tf
.
constant
([[[
_logit
(
0.55
)],
[
_logit
(
0.52
)],
[
_logit
(
0.50
)],
[
_logit
(
0.48
)],
[
_logit
(
0.45
)]]],
tf
.
float32
)
target_tensor
=
tf
.
constant
([[[
1
],
[
1
],
[
1
],
[
0
],
[
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
,
1
]],
tf
.
float32
)
focal_loss_op
=
losses
.
SigmoidFocalClassificationLoss
(
anchorwise_output
=
True
,
gamma
=
2.0
,
alpha
=
1.0
)
sigmoid_loss_op
=
losses
.
WeightedSigmoidClassificationLoss
(
anchorwise_output
=
True
)
focal_loss
=
focal_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
sigmoid_loss
=
sigmoid_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
with
self
.
test_session
()
as
sess
:
sigmoid_loss
,
focal_loss
=
sess
.
run
([
sigmoid_loss
,
focal_loss
])
self
.
assertAllClose
(
focal_loss
[
0
][
3
:],
[
0.
,
0.
])
order_of_ratio
=
np
.
power
(
10
,
np
.
floor
(
np
.
log10
(
sigmoid_loss
[
0
][:
3
]
/
focal_loss
[
0
][:
3
])))
self
.
assertAllClose
(
order_of_ratio
,
[
1.
,
1.
,
1.
])
def
testIgnorePositiveExampleLossViaAlphaMultiplier
(
self
):
prediction_tensor
=
tf
.
constant
([[[
_logit
(
0.55
)],
[
_logit
(
0.52
)],
[
_logit
(
0.50
)],
[
_logit
(
0.48
)],
[
_logit
(
0.45
)]]],
tf
.
float32
)
target_tensor
=
tf
.
constant
([[[
1
],
[
1
],
[
1
],
[
0
],
[
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
,
1
]],
tf
.
float32
)
focal_loss_op
=
losses
.
SigmoidFocalClassificationLoss
(
anchorwise_output
=
True
,
gamma
=
2.0
,
alpha
=
0.0
)
sigmoid_loss_op
=
losses
.
WeightedSigmoidClassificationLoss
(
anchorwise_output
=
True
)
focal_loss
=
focal_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
sigmoid_loss
=
sigmoid_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
with
self
.
test_session
()
as
sess
:
sigmoid_loss
,
focal_loss
=
sess
.
run
([
sigmoid_loss
,
focal_loss
])
self
.
assertAllClose
(
focal_loss
[
0
][:
3
],
[
0.
,
0.
,
0.
])
order_of_ratio
=
np
.
power
(
10
,
np
.
floor
(
np
.
log10
(
sigmoid_loss
[
0
][
3
:]
/
focal_loss
[
0
][
3
:])))
self
.
assertAllClose
(
order_of_ratio
,
[
1.
,
1.
])
def
testSimilarToSigmoidXEntropyWithHalfAlphaAndZeroGammaUpToAScale
(
self
):
prediction_tensor
=
tf
.
constant
([[[
-
100
,
100
,
-
100
],
[
100
,
-
100
,
-
100
],
[
100
,
0
,
-
100
],
[
-
100
,
-
100
,
100
]],
[[
-
100
,
0
,
100
],
[
-
100
,
100
,
-
100
],
[
100
,
100
,
100
],
[
0
,
0
,
-
1
]]],
tf
.
float32
)
target_tensor
=
tf
.
constant
([[[
0
,
1
,
0
],
[
1
,
0
,
0
],
[
1
,
0
,
0
],
[
0
,
0
,
1
]],
[[
0
,
0
,
1
],
[
0
,
1
,
0
],
[
1
,
1
,
1
],
[
1
,
0
,
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
0
]],
tf
.
float32
)
focal_loss_op
=
losses
.
SigmoidFocalClassificationLoss
(
anchorwise_output
=
True
,
alpha
=
0.5
,
gamma
=
0.0
)
sigmoid_loss_op
=
losses
.
WeightedSigmoidClassificationLoss
(
anchorwise_output
=
True
)
focal_loss
=
focal_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
sigmoid_loss
=
sigmoid_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
with
self
.
test_session
()
as
sess
:
sigmoid_loss
,
focal_loss
=
sess
.
run
([
sigmoid_loss
,
focal_loss
])
self
.
assertAllClose
(
sigmoid_loss
,
focal_loss
*
2
)
def
testSameAsSigmoidXEntropyWithNoAlphaAndZeroGamma
(
self
):
prediction_tensor
=
tf
.
constant
([[[
-
100
,
100
,
-
100
],
[
100
,
-
100
,
-
100
],
[
100
,
0
,
-
100
],
[
-
100
,
-
100
,
100
]],
[[
-
100
,
0
,
100
],
[
-
100
,
100
,
-
100
],
[
100
,
100
,
100
],
[
0
,
0
,
-
1
]]],
tf
.
float32
)
target_tensor
=
tf
.
constant
([[[
0
,
1
,
0
],
[
1
,
0
,
0
],
[
1
,
0
,
0
],
[
0
,
0
,
1
]],
[[
0
,
0
,
1
],
[
0
,
1
,
0
],
[
1
,
1
,
1
],
[
1
,
0
,
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
0
]],
tf
.
float32
)
focal_loss_op
=
losses
.
SigmoidFocalClassificationLoss
(
anchorwise_output
=
True
,
alpha
=
None
,
gamma
=
0.0
)
sigmoid_loss_op
=
losses
.
WeightedSigmoidClassificationLoss
(
anchorwise_output
=
True
)
focal_loss
=
focal_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
sigmoid_loss
=
sigmoid_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
with
self
.
test_session
()
as
sess
:
sigmoid_loss
,
focal_loss
=
sess
.
run
([
sigmoid_loss
,
focal_loss
])
self
.
assertAllClose
(
sigmoid_loss
,
focal_loss
)
def
testExpectedLossWithAlphaOneAndZeroGamma
(
self
):
# All zeros correspond to 0.5 probability.
prediction_tensor
=
tf
.
constant
([[[
0
,
0
,
0
],
[
0
,
0
,
0
],
[
0
,
0
,
0
],
[
0
,
0
,
0
]],
[[
0
,
0
,
0
],
[
0
,
0
,
0
],
[
0
,
0
,
0
],
[
0
,
0
,
0
]]],
tf
.
float32
)
target_tensor
=
tf
.
constant
([[[
0
,
1
,
0
],
[
1
,
0
,
0
],
[
1
,
0
,
0
],
[
0
,
0
,
1
]],
[[
0
,
0
,
1
],
[
0
,
1
,
0
],
[
1
,
0
,
0
],
[
1
,
0
,
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
1
]],
tf
.
float32
)
focal_loss_op
=
losses
.
SigmoidFocalClassificationLoss
(
anchorwise_output
=
False
,
alpha
=
1.0
,
gamma
=
0.0
)
focal_loss
=
focal_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
with
self
.
test_session
()
as
sess
:
focal_loss
=
sess
.
run
(
focal_loss
)
self
.
assertAllClose
(
(
-
math
.
log
(.
5
)
*
# x-entropy per class per anchor
1.0
*
# alpha
8
),
# positives from 8 anchors
focal_loss
)
def
testExpectedLossWithAlpha75AndZeroGamma
(
self
):
# All zeros correspond to 0.5 probability.
prediction_tensor
=
tf
.
constant
([[[
0
,
0
,
0
],
[
0
,
0
,
0
],
[
0
,
0
,
0
],
[
0
,
0
,
0
]],
[[
0
,
0
,
0
],
[
0
,
0
,
0
],
[
0
,
0
,
0
],
[
0
,
0
,
0
]]],
tf
.
float32
)
target_tensor
=
tf
.
constant
([[[
0
,
1
,
0
],
[
1
,
0
,
0
],
[
1
,
0
,
0
],
[
0
,
0
,
1
]],
[[
0
,
0
,
1
],
[
0
,
1
,
0
],
[
1
,
0
,
0
],
[
1
,
0
,
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
1
]],
tf
.
float32
)
focal_loss_op
=
losses
.
SigmoidFocalClassificationLoss
(
anchorwise_output
=
False
,
alpha
=
0.75
,
gamma
=
0.0
)
focal_loss
=
focal_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
with
self
.
test_session
()
as
sess
:
focal_loss
=
sess
.
run
(
focal_loss
)
self
.
assertAllClose
(
(
-
math
.
log
(.
5
)
*
# x-entropy per class per anchor.
((
0.75
*
# alpha for positives.
8
)
+
# positives from 8 anchors.
(
0.25
*
# alpha for negatives.
8
*
2
))),
# negatives from 8 anchors for two classes.
focal_loss
)
class
WeightedSoftmaxClassificationLossTest
(
tf
.
test
.
TestCase
):
def
testReturnsCorrectLoss
(
self
):
...
...
@@ -282,6 +562,39 @@ class WeightedSoftmaxClassificationLossTest(tf.test.TestCase):
loss_output
=
sess
.
run
(
loss
)
self
.
assertAllClose
(
loss_output
,
exp_loss
)
def
testReturnsCorrectAnchorWiseLossWithHighLogitScaleSetting
(
self
):
"""At very high logit_scale, all predictions will be ~0.33."""
# TODO(yonib): Also test logit_scale with anchorwise=False.
logit_scale
=
10e16
prediction_tensor
=
tf
.
constant
([[[
-
100
,
100
,
-
100
],
[
100
,
-
100
,
-
100
],
[
0
,
0
,
-
100
],
[
-
100
,
-
100
,
100
]],
[[
-
100
,
0
,
0
],
[
-
100
,
100
,
-
100
],
[
-
100
,
100
,
-
100
],
[
100
,
-
100
,
-
100
]]],
tf
.
float32
)
target_tensor
=
tf
.
constant
([[[
0
,
1
,
0
],
[
1
,
0
,
0
],
[
1
,
0
,
0
],
[
0
,
0
,
1
]],
[[
0
,
0
,
1
],
[
0
,
1
,
0
],
[
0
,
1
,
0
],
[
1
,
0
,
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
1
]],
tf
.
float32
)
loss_op
=
losses
.
WeightedSoftmaxClassificationLoss
(
anchorwise_output
=
True
,
logit_scale
=
logit_scale
)
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
uniform_distribution_loss
=
-
math
.
log
(.
33333333333
)
exp_loss
=
np
.
matrix
([[
uniform_distribution_loss
]
*
4
,
[
uniform_distribution_loss
]
*
4
])
with
self
.
test_session
()
as
sess
:
loss_output
=
sess
.
run
(
loss
)
self
.
assertAllClose
(
loss_output
,
exp_loss
)
class
BootstrappedSigmoidClassificationLossTest
(
tf
.
test
.
TestCase
):
...
...
research/object_detection/core/model.py
View file @
e7de233b
...
...
@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Abstract detection model.
This file defines a generic base class for detection models. Programs that are
...
...
@@ -87,6 +86,18 @@ class DetectionModel(object):
raise
RuntimeError
(
'Groundtruth tensor %s has not been provided'
,
field
)
return
self
.
_groundtruth_lists
[
field
]
def
groundtruth_has_field
(
self
,
field
):
"""Determines whether the groundtruth includes the given field.
Args:
field: a string key, options are
fields.BoxListFields.{boxes,classes,masks,keypoints}
Returns:
True if the groundtruth includes the given field, False otherwise.
"""
return
field
in
self
.
_groundtruth_lists
@
abstractmethod
def
preprocess
(
self
,
inputs
):
"""Input preprocessing.
...
...
@@ -148,7 +159,8 @@ class DetectionModel(object):
Outputs adhere to the following conventions:
* Classes are integers in [0, num_classes); background classes are removed
and the first non-background class is mapped to 0.
and the first non-background class is mapped to 0. If the model produces
class-agnostic detections, then no output is produced for classes.
* Boxes are to be interpreted as being in [y_min, x_min, y_max, x_max]
format and normalized relative to the image window.
* `num_detections` is provided for settings where detections are padded to a
...
...
@@ -168,6 +180,8 @@ class DetectionModel(object):
detection_boxes: [batch, max_detections, 4]
detection_scores: [batch, max_detections]
detection_classes: [batch, max_detections]
(If a model is producing class-agnostic detections, this field may be
missing)
instance_masks: [batch, max_detections, image_height, image_width]
(optional)
keypoints: [batch, max_detections, num_keypoints, 2] (optional)
...
...
@@ -207,13 +221,13 @@ class DetectionModel(object):
groundtruth_classes_list: a list of 2-D tf.float32 one-hot (or k-hot)
tensors of shape [num_boxes, num_classes] containing the class targets
with the 0th index assumed to map to the first non-background class.
groundtruth_masks_list: a list of
2
-D tf.float32 tensors of
shape [
max_detection
s, height_in, width_in] containing instance
groundtruth_masks_list: a list of
3
-D tf.float32 tensors of
shape [
num_boxe
s, height_in, width_in] containing instance
masks with values in {0, 1}. If None, no masks are provided.
Mask resolution `height_in`x`width_in` must agree with the resolution
of the input image tensor provided to the `preprocess` function.
groundtruth_keypoints_list: a list of
2
-D tf.float32 tensors of
shape [
batch, max_detection
s, num_keypoints, 2] containing keypoints.
groundtruth_keypoints_list: a list of
3
-D tf.float32 tensors of
shape [
num_boxe
s, num_keypoints, 2] containing keypoints.
Keypoints are assumed to be provided in normalized coordinates and
missing keypoints should be encoded as NaN.
"""
...
...
research/object_detection/core/preprocessor.py
View file @
e7de233b
This diff is collapsed.
Click to expand it.
research/object_detection/core/preprocessor_test.py
View file @
e7de233b
This diff is collapsed.
Click to expand it.
research/object_detection/core/standard_fields.py
View file @
e7de233b
...
...
@@ -18,6 +18,7 @@
Specifies:
InputDataFields: standard fields used by reader/preprocessor/batcher.
DetectionResultFields: standard fields returned by object detector.
BoxListFields: standard field used by BoxList
TfExampleFields: standard fields for tf-example data format (go/tf-example).
"""
...
...
@@ -41,12 +42,17 @@ class InputDataFields(object):
groundtruth_boxes: coordinates of the ground truth boxes in the image.
groundtruth_classes: box-level class labels.
groundtruth_label_types: box-level label types (e.g. explicit negative).
groundtruth_is_crowd: is the groundtruth a single object or a crowd.
groundtruth_is_crowd: [DEPRECATED, use groundtruth_group_of instead]
is the groundtruth a single object or a crowd.
groundtruth_area: area of a groundtruth segment.
groundtruth_difficult: is a `difficult` object
groundtruth_group_of: is a `group_of` objects, e.g. multiple objects of the
same class, forming a connected group, where instances are heavily
occluding each other.
proposal_boxes: coordinates of object proposal boxes.
proposal_objectness: objectness score of each proposal.
groundtruth_instance_masks: ground truth instance masks.
groundtruth_instance_boundaries: ground truth instance boundaries.
groundtruth_instance_classes: instance mask-level class labels.
groundtruth_keypoints: ground truth keypoints.
groundtruth_keypoint_visibilities: ground truth keypoint visibilities.
...
...
@@ -64,15 +70,43 @@ class InputDataFields(object):
groundtruth_is_crowd
=
'groundtruth_is_crowd'
groundtruth_area
=
'groundtruth_area'
groundtruth_difficult
=
'groundtruth_difficult'
groundtruth_group_of
=
'groundtruth_group_of'
proposal_boxes
=
'proposal_boxes'
proposal_objectness
=
'proposal_objectness'
groundtruth_instance_masks
=
'groundtruth_instance_masks'
groundtruth_instance_boundaries
=
'groundtruth_instance_boundaries'
groundtruth_instance_classes
=
'groundtruth_instance_classes'
groundtruth_keypoints
=
'groundtruth_keypoints'
groundtruth_keypoint_visibilities
=
'groundtruth_keypoint_visibilities'
groundtruth_label_scores
=
'groundtruth_label_scores'
class
DetectionResultFields
(
object
):
"""Naming converntions for storing the output of the detector.
Attributes:
source_id: source of the original image.
key: unique key corresponding to image.
detection_boxes: coordinates of the detection boxes in the image.
detection_scores: detection scores for the detection boxes in the image.
detection_classes: detection-level class labels.
detection_masks: contains a segmentation mask for each detection box.
detection_boundaries: contains an object boundary for each detection box.
detection_keypoints: contains detection keypoints for each detection box.
num_detections: number of detections in the batch.
"""
source_id
=
'source_id'
key
=
'key'
detection_boxes
=
'detection_boxes'
detection_scores
=
'detection_scores'
detection_classes
=
'detection_classes'
detection_masks
=
'detection_masks'
detection_boundaries
=
'detection_boundaries'
detection_keypoints
=
'detection_keypoints'
num_detections
=
'num_detections'
class
BoxListFields
(
object
):
"""Naming conventions for BoxLists.
...
...
@@ -83,6 +117,7 @@ class BoxListFields(object):
weights: sample weights per bounding box.
objectness: objectness score per bounding box.
masks: masks per bounding box.
boundaries: boundaries per bounding box.
keypoints: keypoints per bounding box.
keypoint_heatmaps: keypoint heatmaps per bounding box.
"""
...
...
@@ -92,6 +127,7 @@ class BoxListFields(object):
weights
=
'weights'
objectness
=
'objectness'
masks
=
'masks'
boundaries
=
'boundaries'
keypoints
=
'keypoints'
keypoint_heatmaps
=
'keypoint_heatmaps'
...
...
@@ -112,7 +148,7 @@ class TfExampleFields(object):
width: width of image in pixels, e.g. 581
source_id: original source of the image
object_class_text: labels in text format, e.g. ["person", "cat"]
object_class_
text
: labels in numbers, e.g. [16, 8]
object_class_
label
: labels in numbers, e.g. [16, 8]
object_bbox_xmin: xmin coordinates of groundtruth box, e.g. 10, 30
object_bbox_xmax: xmax coordinates of groundtruth box, e.g. 50, 40
object_bbox_ymin: ymin coordinates of groundtruth box, e.g. 40, 50
...
...
@@ -121,10 +157,20 @@ class TfExampleFields(object):
object_truncated: is object truncated, e.g. [true, false]
object_occluded: is object occluded, e.g. [true, false]
object_difficult: is object difficult, e.g. [true, false]
object_is_crowd: is the object a single object or a crowd
object_group_of: is object a single object or a group of objects
object_depiction: is object a depiction
object_is_crowd: [DEPRECATED, use object_group_of instead]
is the object a single object or a crowd
object_segment_area: the area of the segment.
instance_masks: instance segmentation masks.
instance_boundaries: instance boundaries.
instance_classes: Classes for each instance segmentation mask.
detection_class_label: class label in numbers.
detection_bbox_ymin: ymin coordinates of a detection box.
detection_bbox_xmin: xmin coordinates of a detection box.
detection_bbox_ymax: ymax coordinates of a detection box.
detection_bbox_xmax: xmax coordinates of a detection box.
detection_score: detection score for the class label and box.
"""
image_encoded
=
'image/encoded'
image_format
=
'image/format'
# format is reserved keyword
...
...
@@ -144,7 +190,16 @@ class TfExampleFields(object):
object_truncated
=
'image/object/truncated'
object_occluded
=
'image/object/occluded'
object_difficult
=
'image/object/difficult'
object_group_of
=
'image/object/group_of'
object_depiction
=
'image/object/depiction'
object_is_crowd
=
'image/object/is_crowd'
object_segment_area
=
'image/object/segment/area'
instance_masks
=
'image/segmentation/object'
instance_boundaries
=
'image/boundaries/object'
instance_classes
=
'image/segmentation/object/class'
detection_class_label
=
'image/detection/label'
detection_bbox_ymin
=
'image/detection/bbox/ymin'
detection_bbox_xmin
=
'image/detection/bbox/xmin'
detection_bbox_ymax
=
'image/detection/bbox/ymax'
detection_bbox_xmax
=
'image/detection/bbox/xmax'
detection_score
=
'image/detection/score'
research/object_detection/core/target_assigner.py
View file @
e7de233b
...
...
@@ -50,7 +50,7 @@ class TargetAssigner(object):
def
__init__
(
self
,
similarity_calc
,
matcher
,
box_coder
,
positive_class_weight
=
1.0
,
negative_class_weight
=
1.0
,
unmatched_cls_target
=
None
):
"""Construct
Multibox
Target Assigner.
"""Construct
Object Detection
Target Assigner.
Args:
similarity_calc: a RegionSimilarityCalculator
...
...
@@ -108,7 +108,7 @@ class TargetAssigner(object):
Args:
anchors: a BoxList representing N anchors
groundtruth_boxes: a BoxList representing M groundtruth boxes
groundtruth_labels: a tensor of shape [
num_gt_boxes
, d_1, ... d_k]
groundtruth_labels: a tensor of shape [
M
, d_1, ... d_k]
with labels for each of the ground_truth boxes. The subshape
[d_1, ... d_k] can be empty (corresponding to scalar inputs). When set
to None, groundtruth_labels assumes a binary problem where all
...
...
@@ -140,10 +140,16 @@ class TargetAssigner(object):
groundtruth_labels
=
tf
.
ones
(
tf
.
expand_dims
(
groundtruth_boxes
.
num_boxes
(),
0
))
groundtruth_labels
=
tf
.
expand_dims
(
groundtruth_labels
,
-
1
)
shape_assert
=
tf
.
assert_equal
(
tf
.
shape
(
groundtruth_labels
)[
1
:],
tf
.
shape
(
self
.
_unmatched_cls_target
))
with
tf
.
control_dependencies
([
shape_assert
]):
unmatched_shape_assert
=
tf
.
assert_equal
(
tf
.
shape
(
groundtruth_labels
)[
1
:],
tf
.
shape
(
self
.
_unmatched_cls_target
),
message
=
'Unmatched class target shape incompatible '
'with groundtruth labels shape!'
)
labels_and_box_shapes_assert
=
tf
.
assert_equal
(
tf
.
shape
(
groundtruth_labels
)[
0
],
groundtruth_boxes
.
num_boxes
(),
message
=
'Groundtruth boxes and labels have incompatible shapes!'
)
with
tf
.
control_dependencies
(
[
unmatched_shape_assert
,
labels_and_box_shapes_assert
]):
match_quality_matrix
=
self
.
_similarity_calc
.
compare
(
groundtruth_boxes
,
anchors
)
match
=
self
.
_matcher
.
match
(
match_quality_matrix
,
**
params
)
...
...
@@ -316,8 +322,8 @@ class TargetAssigner(object):
return
self
.
_box_coder
# TODO: This method pulls in all the implementation dependencies into
core.
# Therefore its best to have this factory method outside of core.
# TODO: This method pulls in all the implementation dependencies into
#
core.
Therefore its best to have this factory method outside of core.
def
create_target_assigner
(
reference
,
stage
=
None
,
positive_class_weight
=
1.0
,
negative_class_weight
=
1.0
,
...
...
research/object_detection/core/target_assigner_test.py
View file @
e7de233b
...
...
@@ -327,6 +327,41 @@ class TargetAssignerTest(tf.test.TestCase):
self
.
assertEquals
(
reg_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
matching_anchors_out
.
dtype
,
np
.
int32
)
def
test_raises_error_on_incompatible_groundtruth_boxes_and_labels
(
self
):
similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
unmatched_cls_target
=
tf
.
constant
([
1
,
0
,
0
,
0
,
0
,
0
,
0
],
tf
.
float32
)
target_assigner
=
targetassigner
.
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
,
unmatched_cls_target
=
unmatched_cls_target
)
prior_means
=
tf
.
constant
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
1.0
,
0.8
],
[
0
,
0.5
,
.
5
,
1.0
],
[.
75
,
0
,
1.0
,
.
25
]])
prior_stddevs
=
tf
.
constant
(
4
*
[
4
*
[.
1
]])
priors
=
box_list
.
BoxList
(
prior_means
)
priors
.
add_field
(
'stddev'
,
prior_stddevs
)
box_corners
=
[[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.0
,
0.0
,
0.5
,
0.8
],
[
0.5
,
0.5
,
0.9
,
0.9
],
[.
75
,
0
,
.
95
,
.
27
]]
boxes
=
box_list
.
BoxList
(
tf
.
constant
(
box_corners
))
groundtruth_labels
=
tf
.
constant
([[
0
,
1
,
0
,
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,
0
,
1
,
0
],
[
0
,
0
,
0
,
1
,
0
,
0
,
0
]],
tf
.
float32
)
result
=
target_assigner
.
assign
(
priors
,
boxes
,
groundtruth_labels
,
num_valid_rows
=
3
)
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
_
)
=
result
with
self
.
test_session
()
as
sess
:
with
self
.
assertRaisesWithPredicateMatch
(
tf
.
errors
.
InvalidArgumentError
,
'Groundtruth boxes and labels have incompatible shapes!'
):
sess
.
run
([
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
])
def
test_raises_error_on_invalid_groundtruth_labels
(
self
):
similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment