Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
3b158095
Commit
3b158095
authored
May 07, 2018
by
Ilya Mironov
Browse files
Merge branch 'master' of
https://github.com/ilyamironov/models
parents
a90db800
be659c2f
Changes
121
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1119 additions
and
134 deletions
+1119
-134
research/object_detection/core/losses.py
research/object_detection/core/losses.py
+49
-0
research/object_detection/core/losses_test.py
research/object_detection/core/losses_test.py
+105
-0
research/object_detection/core/preprocessor.py
research/object_detection/core/preprocessor.py
+2
-2
research/object_detection/core/preprocessor_test.py
research/object_detection/core/preprocessor_test.py
+13
-5
research/object_detection/data/ava_label_map_v2.1.pbtxt
research/object_detection/data/ava_label_map_v2.1.pbtxt
+240
-0
research/object_detection/data_decoders/tf_example_decoder.py
...arch/object_detection/data_decoders/tf_example_decoder.py
+43
-3
research/object_detection/data_decoders/tf_example_decoder_test.py
...object_detection/data_decoders/tf_example_decoder_test.py
+44
-0
research/object_detection/g3doc/detection_model_zoo.md
research/object_detection/g3doc/detection_model_zoo.md
+8
-1
research/object_detection/g3doc/installation.md
research/object_detection/g3doc/installation.md
+1
-1
research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
...ect_detection/meta_architectures/faster_rcnn_meta_arch.py
+70
-43
research/object_detection/meta_architectures/faster_rcnn_meta_arch_test.py
...etection/meta_architectures/faster_rcnn_meta_arch_test.py
+40
-0
research/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py
...tion/meta_architectures/faster_rcnn_meta_arch_test_lib.py
+100
-0
research/object_detection/metrics/coco_evaluation.py
research/object_detection/metrics/coco_evaluation.py
+77
-26
research/object_detection/metrics/coco_evaluation_test.py
research/object_detection/metrics/coco_evaluation_test.py
+224
-0
research/object_detection/model_lib.py
research/object_detection/model_lib.py
+18
-19
research/object_detection/model_main.py
research/object_detection/model_main.py
+21
-11
research/object_detection/model_tpu_main.py
research/object_detection/model_tpu_main.py
+8
-2
research/object_detection/models/faster_rcnn_mobilenet_v1_feature_extractor.py
...tion/models/faster_rcnn_mobilenet_v1_feature_extractor.py
+42
-20
research/object_detection/protos/box_predictor.proto
research/object_detection/protos/box_predictor.proto
+11
-1
research/object_detection/protos/input_reader.proto
research/object_detection/protos/input_reader.proto
+3
-0
No files found.
research/object_detection/core/losses.py
View file @
3b158095
...
...
@@ -23,6 +23,7 @@ Localization losses:
Classification losses:
* WeightedSigmoidClassificationLoss
* WeightedSoftmaxClassificationLoss
* WeightedSoftmaxClassificationAgainstLogitsLoss
* BootstrappedSigmoidClassificationLoss
"""
from
abc
import
ABCMeta
...
...
@@ -317,6 +318,54 @@ class WeightedSoftmaxClassificationLoss(Loss):
return
tf
.
reshape
(
per_row_cross_ent
,
tf
.
shape
(
weights
))
*
weights
class
WeightedSoftmaxClassificationAgainstLogitsLoss
(
Loss
):
"""Softmax loss function against logits.
Targets are expected to be provided in logits space instead of "one hot" or
"probability distribution" space.
"""
def
__init__
(
self
,
logit_scale
=
1.0
):
"""Constructor.
Args:
logit_scale: When this value is high, the target is "diffused" and
when this value is low, the target is made peakier.
(default 1.0)
"""
self
.
_logit_scale
=
logit_scale
def
_scale_and_softmax_logits
(
self
,
logits
):
"""Scale logits then apply softmax."""
scaled_logits
=
tf
.
divide
(
logits
,
self
.
_logit_scale
,
name
=
'scale_logits'
)
return
tf
.
nn
.
softmax
(
scaled_logits
,
name
=
'convert_scores'
)
def
_compute_loss
(
self
,
prediction_tensor
,
target_tensor
,
weights
):
"""Compute loss function.
Args:
prediction_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing the predicted logits for each class
target_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing logit classification targets
weights: a float tensor of shape [batch_size, num_anchors]
Returns:
loss: a float tensor of shape [batch_size, num_anchors]
representing the value of the loss function.
"""
num_classes
=
prediction_tensor
.
get_shape
().
as_list
()[
-
1
]
target_tensor
=
self
.
_scale_and_softmax_logits
(
target_tensor
)
prediction_tensor
=
tf
.
divide
(
prediction_tensor
,
self
.
_logit_scale
,
name
=
'scale_logits'
)
per_row_cross_ent
=
(
tf
.
nn
.
softmax_cross_entropy_with_logits
(
labels
=
tf
.
reshape
(
target_tensor
,
[
-
1
,
num_classes
]),
logits
=
tf
.
reshape
(
prediction_tensor
,
[
-
1
,
num_classes
])))
return
tf
.
reshape
(
per_row_cross_ent
,
tf
.
shape
(
weights
))
*
weights
class
BootstrappedSigmoidClassificationLoss
(
Loss
):
"""Bootstrapped sigmoid cross entropy classification loss function.
...
...
research/object_detection/core/losses_test.py
View file @
3b158095
...
...
@@ -576,6 +576,111 @@ class WeightedSoftmaxClassificationLossTest(tf.test.TestCase):
self
.
assertAllClose
(
loss_output
,
exp_loss
)
class
WeightedSoftmaxClassificationAgainstLogitsLossTest
(
tf
.
test
.
TestCase
):
def
testReturnsCorrectLoss
(
self
):
prediction_tensor
=
tf
.
constant
([[[
-
100
,
100
,
-
100
],
[
100
,
-
100
,
-
100
],
[
0
,
0
,
-
100
],
[
-
100
,
-
100
,
100
]],
[[
-
100
,
0
,
0
],
[
-
100
,
100
,
-
100
],
[
-
100
,
100
,
-
100
],
[
100
,
-
100
,
-
100
]]],
tf
.
float32
)
target_tensor
=
tf
.
constant
([[[
-
100
,
100
,
-
100
],
[
100
,
-
100
,
-
100
],
[
100
,
-
100
,
-
100
],
[
-
100
,
-
100
,
100
]],
[[
-
100
,
-
100
,
100
],
[
-
100
,
100
,
-
100
],
[
-
100
,
100
,
-
100
],
[
100
,
-
100
,
-
100
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
.
5
,
1
],
[
1
,
1
,
1
,
1
]],
tf
.
float32
)
loss_op
=
losses
.
WeightedSoftmaxClassificationAgainstLogitsLoss
()
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
loss
=
tf
.
reduce_sum
(
loss
)
exp_loss
=
-
1.5
*
math
.
log
(.
5
)
with
self
.
test_session
()
as
sess
:
loss_output
=
sess
.
run
(
loss
)
self
.
assertAllClose
(
loss_output
,
exp_loss
)
def
testReturnsCorrectAnchorWiseLoss
(
self
):
prediction_tensor
=
tf
.
constant
([[[
-
100
,
100
,
-
100
],
[
100
,
-
100
,
-
100
],
[
0
,
0
,
-
100
],
[
-
100
,
-
100
,
100
]],
[[
-
100
,
0
,
0
],
[
-
100
,
100
,
-
100
],
[
-
100
,
100
,
-
100
],
[
100
,
-
100
,
-
100
]]],
tf
.
float32
)
target_tensor
=
tf
.
constant
([[[
-
100
,
100
,
-
100
],
[
100
,
-
100
,
-
100
],
[
100
,
-
100
,
-
100
],
[
-
100
,
-
100
,
100
]],
[[
-
100
,
-
100
,
100
],
[
-
100
,
100
,
-
100
],
[
-
100
,
100
,
-
100
],
[
100
,
-
100
,
-
100
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
.
5
,
1
],
[
1
,
1
,
1
,
0
]],
tf
.
float32
)
loss_op
=
losses
.
WeightedSoftmaxClassificationAgainstLogitsLoss
()
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
exp_loss
=
np
.
matrix
([[
0
,
0
,
-
0.5
*
math
.
log
(.
5
),
0
],
[
-
math
.
log
(.
5
),
0
,
0
,
0
]])
with
self
.
test_session
()
as
sess
:
loss_output
=
sess
.
run
(
loss
)
self
.
assertAllClose
(
loss_output
,
exp_loss
)
def
testReturnsCorrectAnchorWiseLossWithLogitScaleSetting
(
self
):
logit_scale
=
100.
prediction_tensor
=
tf
.
constant
([[[
-
100
,
100
,
-
100
],
[
100
,
-
100
,
-
100
],
[
0
,
0
,
-
100
],
[
-
100
,
-
100
,
100
]],
[[
-
100
,
0
,
0
],
[
-
100
,
100
,
-
100
],
[
-
100
,
100
,
-
100
],
[
100
,
-
100
,
-
100
]]],
tf
.
float32
)
target_tensor
=
tf
.
constant
([[[
-
100
,
100
,
-
100
],
[
100
,
-
100
,
-
100
],
[
0
,
0
,
-
100
],
[
-
100
,
-
100
,
100
]],
[[
-
100
,
0
,
0
],
[
-
100
,
100
,
-
100
],
[
-
100
,
100
,
-
100
],
[
100
,
-
100
,
-
100
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
.
5
,
1
],
[
1
,
1
,
1
,
0
]],
tf
.
float32
)
loss_op
=
losses
.
WeightedSoftmaxClassificationAgainstLogitsLoss
(
logit_scale
=
logit_scale
)
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
# find softmax of the two prediction types above
softmax_pred1
=
[
np
.
exp
(
-
1
),
np
.
exp
(
-
1
),
np
.
exp
(
1
)]
softmax_pred1
/=
sum
(
softmax_pred1
)
softmax_pred2
=
[
np
.
exp
(
0
),
np
.
exp
(
0
),
np
.
exp
(
-
1
)]
softmax_pred2
/=
sum
(
softmax_pred2
)
# compute the expected cross entropy for perfect matches
exp_entropy1
=
sum
(
[
-
x
*
np
.
log
(
x
)
for
x
in
softmax_pred1
])
exp_entropy2
=
sum
(
[
-
x
*
np
.
log
(
x
)
for
x
in
softmax_pred2
])
# weighted expected losses
exp_loss
=
np
.
matrix
(
[[
exp_entropy1
,
exp_entropy1
,
exp_entropy2
*
.
5
,
exp_entropy1
],
[
exp_entropy2
,
exp_entropy1
,
exp_entropy1
,
0.
]])
with
self
.
test_session
()
as
sess
:
loss_output
=
sess
.
run
(
loss
)
self
.
assertAllClose
(
loss_output
,
exp_loss
)
class
BootstrappedSigmoidClassificationLossTest
(
tf
.
test
.
TestCase
):
def
testReturnsCorrectLossSoftBootstrapping
(
self
):
...
...
research/object_detection/core/preprocessor.py
View file @
3b158095
...
...
@@ -1000,8 +1000,8 @@ def random_adjust_saturation(image,
def
random_distort_color
(
image
,
color_ordering
=
0
,
preprocess_vars_cache
=
None
):
"""Randomly distorts color.
Randomly distorts color using a combination of brightness, hue, contrast
and
saturation changes. Makes sure the output image is still between 0 and 255.
Randomly distorts color using a combination of brightness, hue, contrast
and
saturation changes. Makes sure the output image is still between 0 and 255.
Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
...
...
research/object_detection/core/preprocessor_test.py
View file @
3b158095
...
...
@@ -2620,16 +2620,24 @@ class PreprocessorTest(tf.test.TestCase):
distorted_images_rank
=
tf
.
rank
(
distorted_images
)
boxes_rank
=
tf
.
rank
(
boxes
)
distorted_boxes_rank
=
tf
.
rank
(
distorted_boxes
)
multiclass_scores_rank
=
tf
.
rank
(
multiclass_scores
)
distorted_multiclass_scores_rank
=
tf
.
rank
(
distorted_multiclass_scores
)
with
self
.
test_session
()
as
sess
:
(
boxes_rank_
,
distorted_boxes_rank_
,
images_rank_
,
distorted_images_rank_
,
multiclass_scores_
,
distorted_multiclass_scores_
)
=
sess
.
run
([
boxes_rank
,
distorted_boxes_rank
,
images_rank
,
distorted_images_rank
,
multiclass_scores
,
distorted_multiclass_scores
(
boxes_rank_
,
distorted_boxes_
,
distorted_boxes_rank_
,
images_rank_
,
distorted_images_rank_
,
multiclass_scores_rank_
,
distorted_multiclass_scores_
,
distorted_multiclass_scores_rank_
)
=
sess
.
run
([
boxes_rank
,
distorted_boxes
,
distorted_boxes_rank
,
images_rank
,
distorted_images_rank
,
multiclass_scores_rank
,
distorted_multiclass_scores
,
distorted_multiclass_scores_rank
])
self
.
assertAllEqual
(
boxes_rank_
,
distorted_boxes_rank_
)
self
.
assertAllEqual
(
images_rank_
,
distorted_images_rank_
)
self
.
assertAllEqual
(
multiclass_scores_
,
distorted_multiclass_scores_
)
self
.
assertAllEqual
(
multiclass_scores_rank_
,
distorted_multiclass_scores_rank_
)
self
.
assertAllEqual
(
distorted_boxes_
.
shape
[
0
],
distorted_multiclass_scores_
.
shape
[
0
])
def
testSSDRandomCropPad
(
self
):
images
=
self
.
createTestImages
()
...
...
research/object_detection/data/ava_label_map_v2.1.pbtxt
0 → 100644
View file @
3b158095
item {
name: "bend/bow (at the waist)"
id: 1
}
item {
name: "crouch/kneel"
id: 3
}
item {
name: "dance"
id: 4
}
item {
name: "fall down"
id: 5
}
item {
name: "get up"
id: 6
}
item {
name: "jump/leap"
id: 7
}
item {
name: "lie/sleep"
id: 8
}
item {
name: "martial art"
id: 9
}
item {
name: "run/jog"
id: 10
}
item {
name: "sit"
id: 11
}
item {
name: "stand"
id: 12
}
item {
name: "swim"
id: 13
}
item {
name: "walk"
id: 14
}
item {
name: "answer phone"
id: 15
}
item {
name: "carry/hold (an object)"
id: 17
}
item {
name: "climb (e.g., a mountain)"
id: 20
}
item {
name: "close (e.g., a door, a box)"
id: 22
}
item {
name: "cut"
id: 24
}
item {
name: "dress/put on clothing"
id: 26
}
item {
name: "drink"
id: 27
}
item {
name: "drive (e.g., a car, a truck)"
id: 28
}
item {
name: "eat"
id: 29
}
item {
name: "enter"
id: 30
}
item {
name: "hit (an object)"
id: 34
}
item {
name: "lift/pick up"
id: 36
}
item {
name: "listen (e.g., to music)"
id: 37
}
item {
name: "open (e.g., a window, a car door)"
id: 38
}
item {
name: "play musical instrument"
id: 41
}
item {
name: "point to (an object)"
id: 43
}
item {
name: "pull (an object)"
id: 45
}
item {
name: "push (an object)"
id: 46
}
item {
name: "put down"
id: 47
}
item {
name: "read"
id: 48
}
item {
name: "ride (e.g., a bike, a car, a horse)"
id: 49
}
item {
name: "sail boat"
id: 51
}
item {
name: "shoot"
id: 52
}
item {
name: "smoke"
id: 54
}
item {
name: "take a photo"
id: 56
}
item {
name: "text on/look at a cellphone"
id: 57
}
item {
name: "throw"
id: 58
}
item {
name: "touch (an object)"
id: 59
}
item {
name: "turn (e.g., a screwdriver)"
id: 60
}
item {
name: "watch (e.g., TV)"
id: 61
}
item {
name: "work on a computer"
id: 62
}
item {
name: "write"
id: 63
}
item {
name: "fight/hit (a person)"
id: 64
}
item {
name: "give/serve (an object) to (a person)"
id: 65
}
item {
name: "grab (a person)"
id: 66
}
item {
name: "hand clap"
id: 67
}
item {
name: "hand shake"
id: 68
}
item {
name: "hand wave"
id: 69
}
item {
name: "hug (a person)"
id: 70
}
item {
name: "kiss (a person)"
id: 72
}
item {
name: "lift (a person)"
id: 73
}
item {
name: "listen to (a person)"
id: 74
}
item {
name: "push (another person)"
id: 76
}
item {
name: "sing to (e.g., self, a person, a group)"
id: 77
}
item {
name: "take (an object) from (a person)"
id: 78
}
item {
name: "talk to (e.g., self, a person, a group)"
id: 79
}
item {
name: "watch (a person)"
id: 80
}
research/object_detection/data_decoders/tf_example_decoder.py
View file @
3b158095
...
...
@@ -111,7 +111,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
instance_mask_type
=
input_reader_pb2
.
NUMERICAL_MASKS
,
label_map_proto_file
=
None
,
use_display_name
=
False
,
dct_method
=
''
):
dct_method
=
''
,
num_keypoints
=
0
):
"""Constructor sets keys_to_features and items_to_handlers.
Args:
...
...
@@ -131,6 +132,7 @@ class TfExampleDecoder(data_decoder.DataDecoder):
algorithm used for jpeg decompression. Currently valid values
are ['INTEGER_FAST', 'INTEGER_ACCURATE']. The hint may be ignored, for
example, the jpeg library does not have that specific option.
num_keypoints: the number of keypoints per object.
Raises:
ValueError: If `instance_mask_type` option is not one of
...
...
@@ -149,9 +151,9 @@ class TfExampleDecoder(data_decoder.DataDecoder):
'image/source_id'
:
tf
.
FixedLenFeature
((),
tf
.
string
,
default_value
=
''
),
'image/height'
:
tf
.
FixedLenFeature
((),
tf
.
int64
,
1
),
tf
.
FixedLenFeature
((),
tf
.
int64
,
default_value
=
1
),
'image/width'
:
tf
.
FixedLenFeature
((),
tf
.
int64
,
1
),
tf
.
FixedLenFeature
((),
tf
.
int64
,
default_value
=
1
),
# Object boxes and classes.
'image/object/bbox/xmin'
:
tf
.
VarLenFeature
(
tf
.
float32
),
...
...
@@ -209,6 +211,16 @@ class TfExampleDecoder(data_decoder.DataDecoder):
fields
.
InputDataFields
.
groundtruth_weights
:
(
slim_example_decoder
.
Tensor
(
'image/object/weight'
)),
}
self
.
_num_keypoints
=
num_keypoints
if
num_keypoints
>
0
:
self
.
keys_to_features
[
'image/object/keypoint/x'
]
=
(
tf
.
VarLenFeature
(
tf
.
float32
))
self
.
keys_to_features
[
'image/object/keypoint/y'
]
=
(
tf
.
VarLenFeature
(
tf
.
float32
))
self
.
items_to_handlers
[
fields
.
InputDataFields
.
groundtruth_keypoints
]
=
(
slim_example_decoder
.
ItemHandlerCallback
(
[
'image/object/keypoint/y'
,
'image/object/keypoint/x'
],
self
.
_reshape_keypoints
))
if
load_instance_masks
:
if
instance_mask_type
in
(
input_reader_pb2
.
DEFAULT
,
input_reader_pb2
.
NUMERICAL_MASKS
):
...
...
@@ -286,6 +298,9 @@ class TfExampleDecoder(data_decoder.DataDecoder):
[None] indicating if the boxes represent `difficult` instances.
fields.InputDataFields.groundtruth_group_of - 1D bool tensor of shape
[None] indicating if the boxes represent `group_of` instances.
fields.InputDataFields.groundtruth_keypoints - 3D float32 tensor of
shape [None, None, 2] containing keypoints, where the coordinates of
the keypoints are ordered (y, x).
fields.InputDataFields.groundtruth_instance_masks - 3D float32 tensor of
shape [None, None, None] containing instance masks.
"""
...
...
@@ -314,6 +329,31 @@ class TfExampleDecoder(data_decoder.DataDecoder):
default_groundtruth_weights
)
return
tensor_dict
def
_reshape_keypoints
(
self
,
keys_to_tensors
):
"""Reshape keypoints.
The instance segmentation masks are reshaped to [num_instances,
num_keypoints, 2].
Args:
keys_to_tensors: a dictionary from keys to tensors.
Returns:
A 3-D float tensor of shape [num_instances, num_keypoints, 2] with values
in {0, 1}.
"""
y
=
keys_to_tensors
[
'image/object/keypoint/y'
]
if
isinstance
(
y
,
tf
.
SparseTensor
):
y
=
tf
.
sparse_tensor_to_dense
(
y
)
y
=
tf
.
expand_dims
(
y
,
1
)
x
=
keys_to_tensors
[
'image/object/keypoint/x'
]
if
isinstance
(
x
,
tf
.
SparseTensor
):
x
=
tf
.
sparse_tensor_to_dense
(
x
)
x
=
tf
.
expand_dims
(
x
,
1
)
keypoints
=
tf
.
concat
([
y
,
x
],
1
)
keypoints
=
tf
.
reshape
(
keypoints
,
[
-
1
,
self
.
_num_keypoints
,
2
])
return
keypoints
def
_reshape_instance_masks
(
self
,
keys_to_tensors
):
"""Reshape instance segmentation masks.
...
...
research/object_detection/data_decoders/tf_example_decoder_test.py
View file @
3b158095
...
...
@@ -304,6 +304,50 @@ class TfExampleDecoderTest(tf.test.TestCase):
self
.
assertAllEqual
(
2
,
tensor_dict
[
fields
.
InputDataFields
.
num_groundtruth_boxes
])
def
testDecodeKeypoint
(
self
):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
bbox_ymins
=
[
0.0
,
4.0
]
bbox_xmins
=
[
1.0
,
5.0
]
bbox_ymaxs
=
[
2.0
,
6.0
]
bbox_xmaxs
=
[
3.0
,
7.0
]
keypoint_ys
=
[
0.0
,
1.0
,
2.0
,
3.0
,
4.0
,
5.0
]
keypoint_xs
=
[
1.0
,
2.0
,
3.0
,
4.0
,
5.0
,
6.0
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
'image/object/bbox/ymin'
:
self
.
_FloatFeature
(
bbox_ymins
),
'image/object/bbox/xmin'
:
self
.
_FloatFeature
(
bbox_xmins
),
'image/object/bbox/ymax'
:
self
.
_FloatFeature
(
bbox_ymaxs
),
'image/object/bbox/xmax'
:
self
.
_FloatFeature
(
bbox_xmaxs
),
'image/object/keypoint/y'
:
self
.
_FloatFeature
(
keypoint_ys
),
'image/object/keypoint/x'
:
self
.
_FloatFeature
(
keypoint_xs
),
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
(
num_keypoints
=
3
)
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
].
get_shape
().
as_list
()),
[
None
,
4
])
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_keypoints
].
get_shape
().
as_list
()),
[
None
,
3
,
2
])
with
self
.
test_session
()
as
sess
:
tensor_dict
=
sess
.
run
(
tensor_dict
)
expected_boxes
=
np
.
vstack
([
bbox_ymins
,
bbox_xmins
,
bbox_ymaxs
,
bbox_xmaxs
]).
transpose
()
self
.
assertAllEqual
(
expected_boxes
,
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
])
self
.
assertAllEqual
(
2
,
tensor_dict
[
fields
.
InputDataFields
.
num_groundtruth_boxes
])
expected_keypoints
=
(
np
.
vstack
([
keypoint_ys
,
keypoint_xs
]).
transpose
().
reshape
((
2
,
3
,
2
)))
self
.
assertAllEqual
(
expected_keypoints
,
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_keypoints
])
def
testDecodeDefaultGroundtruthWeights
(
self
):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
...
...
research/object_detection/g3doc/detection_model_zoo.md
View file @
3b158095
...
...
@@ -91,7 +91,7 @@ Some remarks on frozen inference graphs:
## Kitti-trained models {#kitti-models}
Model name | Speed (ms) | Pascal mAP@0.5
(ms)
| Outputs
Model name | Speed (ms) | Pascal mAP@0.5 | Outputs
----------------------------------------------------------------------------------------------------------------------------------------------------------------- | :---: | :-------------: | :-----:
[
faster_rcnn_resnet101_kitti
](
http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_kitti_2018_01_28.tar.gz
)
| 79 | 87 | Boxes
...
...
@@ -103,6 +103,13 @@ Model name
[
faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid
](
http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid_2018_01_28.tar.gz
)
| 347 | | Boxes
## AVA v2.1 trained models {#ava-models}
Model name | Speed (ms) | Pascal mAP@0.5 | Outputs
----------------------------------------------------------------------------------------------------------------------------------------------------------------- | :---: | :-------------: | :-----:
[
faster_rcnn_resnet101_ava_v2.1
](
http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_ava_v2.1_2018_04_30.tar.gz
)
| 93 | 11 | Boxes
[
^1
]:
See
[
MSCOCO evaluation protocol
](
http://cocodataset.org/#detections-eval
)
.
[
^2
]:
This
is PASCAL mAP with a slightly different way of true positives computation: see
[
Open Images evaluation protocol
](
evaluation_protocols.md#open-images
)
.
research/object_detection/g3doc/installation.md
View file @
3b158095
...
...
@@ -4,7 +4,7 @@
Tensorflow Object Detection API depends on the following libraries:
*
Protobuf
2.6
*
Protobuf
3+
*
Python-tk
*
Pillow 1.0
*
lxml
...
...
research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
View file @
3b158095
...
...
@@ -599,9 +599,11 @@ class FasterRCNNMetaArch(model.DetectionModel):
(and if number_of_stages > 1):
7) refined_box_encodings: a 3-D tensor with shape
[total_num_proposals, num_classes, 4] representing predicted
(final) refined box encodings, where
total_num_proposals=batch_size*self._max_num_proposals
[total_num_proposals, num_classes, self._box_coder.code_size]
representing predicted (final) refined box encodings, where
total_num_proposals=batch_size*self._max_num_proposals. If using
a shared box across classes the shape will instead be
[total_num_proposals, 1, self._box_coder.code_size].
8) class_predictions_with_background: a 3-D tensor with shape
[total_num_proposals, num_classes + 1] containing class
predictions (logits) for each of the anchors, where
...
...
@@ -712,9 +714,11 @@ class FasterRCNNMetaArch(model.DetectionModel):
Returns:
prediction_dict: a dictionary holding "raw" prediction tensors:
1) refined_box_encodings: a 3-D tensor with shape
[total_num_proposals, num_classes, 4] representing predicted
(final) refined box encodings, where
total_num_proposals=batch_size*self._max_num_proposals
[total_num_proposals, num_classes, self._box_coder.code_size]
representing predicted (final) refined box encodings, where
total_num_proposals=batch_size*self._max_num_proposals. If using a
shared box across classes the shape will instead be
[total_num_proposals, 1, self._box_coder.code_size].
2) class_predictions_with_background: a 3-D tensor with shape
[total_num_proposals, num_classes + 1] containing class
predictions (logits) for each of the anchors, where
...
...
@@ -791,9 +795,11 @@ class FasterRCNNMetaArch(model.DetectionModel):
Args:
prediction_dict: a dictionary holding "raw" prediction tensors:
1) refined_box_encodings: a 3-D tensor with shape
[total_num_proposals, num_classes, 4] representing predicted
(final) refined box encodings, where
total_num_proposals=batch_size*self._max_num_proposals
[total_num_proposals, num_classes, self._box_coder.code_size]
representing predicted (final) refined box encodings, where
total_num_proposals=batch_size*self._max_num_proposals. If using a
shared box across classes the shape will instead be
[total_num_proposals, 1, self._box_coder.code_size].
2) class_predictions_with_background: a 3-D tensor with shape
[total_num_proposals, num_classes + 1] containing class
predictions (logits) for each of the anchors, where
...
...
@@ -823,13 +829,13 @@ class FasterRCNNMetaArch(model.DetectionModel):
if
self
.
_is_training
:
curr_box_classifier_features
=
prediction_dict
[
'box_classifier_features'
]
detection_classes
=
prediction_dict
[
'class_predictions_with_background'
]
box
_predictions
=
self
.
_mask_rcnn_box_predictor
.
predict
(
mask
_predictions
=
self
.
_mask_rcnn_box_predictor
.
predict
(
[
curr_box_classifier_features
],
num_predictions_per_location
=
[
1
],
scope
=
self
.
second_stage_box_predictor_scope
,
predict_boxes_and_classes
=
False
,
predict_auxiliary_outputs
=
True
)
prediction_dict
[
'mask_predictions'
]
=
tf
.
squeeze
(
box
_predictions
[
prediction_dict
[
'mask_predictions'
]
=
tf
.
squeeze
(
mask
_predictions
[
box_predictor
.
MASK_PREDICTIONS
],
axis
=
1
)
else
:
detections_dict
=
self
.
_postprocess_box_classifier
(
...
...
@@ -854,14 +860,14 @@ class FasterRCNNMetaArch(model.DetectionModel):
flattened_detected_feature_maps
,
scope
=
self
.
second_stage_feature_extractor_scope
))
box
_predictions
=
self
.
_mask_rcnn_box_predictor
.
predict
(
mask
_predictions
=
self
.
_mask_rcnn_box_predictor
.
predict
(
[
curr_box_classifier_features
],
num_predictions_per_location
=
[
1
],
scope
=
self
.
second_stage_box_predictor_scope
,
predict_boxes_and_classes
=
False
,
predict_auxiliary_outputs
=
True
)
detection_masks
=
tf
.
squeeze
(
box
_predictions
[
detection_masks
=
tf
.
squeeze
(
mask
_predictions
[
box_predictor
.
MASK_PREDICTIONS
],
axis
=
1
)
_
,
num_classes
,
mask_height
,
mask_width
=
(
...
...
@@ -1098,8 +1104,10 @@ class FasterRCNNMetaArch(model.DetectionModel):
tf
.
to_float
(
num_proposals
),
}
# TODO(jrru): Remove mask_predictions from _post_process_box_classifier.
with
tf
.
name_scope
(
'SecondStagePostprocessor'
):
if
self
.
_number_of_stages
==
2
:
if
(
self
.
_number_of_stages
==
2
or
(
self
.
_number_of_stages
==
3
and
self
.
_is_training
)):
mask_predictions
=
prediction_dict
.
get
(
box_predictor
.
MASK_PREDICTIONS
)
detections_dict
=
self
.
_postprocess_box_classifier
(
prediction_dict
[
'refined_box_encodings'
],
...
...
@@ -1438,8 +1446,10 @@ class FasterRCNNMetaArch(model.DetectionModel):
Args:
refined_box_encodings: a 3-D float tensor with shape
[total_num_padded_proposals, num_classes, 4] representing predicted
(final) refined box encodings.
[total_num_padded_proposals, num_classes, self._box_coder.code_size]
representing predicted (final) refined box encodings. If using a shared
box across classes the shape will instead be
[total_num_padded_proposals, 1, 4]
class_predictions_with_background: a 3-D tensor float with shape
[total_num_padded_proposals, num_classes + 1] containing class
predictions (logits) for each of the proposals. Note that this tensor
...
...
@@ -1466,10 +1476,12 @@ class FasterRCNNMetaArch(model.DetectionModel):
that a pixel-wise sigmoid score converter is applied to the detection
masks.
"""
refined_box_encodings_batch
=
tf
.
reshape
(
refined_box_encodings
,
[
-
1
,
self
.
max_num_proposals
,
self
.
num_classes
,
self
.
_box_coder
.
code_size
])
refined_box_encodings_batch
=
tf
.
reshape
(
refined_box_encodings
,
[
-
1
,
self
.
max_num_proposals
,
refined_box_encodings
.
shape
[
1
],
self
.
_box_coder
.
code_size
])
class_predictions_with_background_batch
=
tf
.
reshape
(
class_predictions_with_background
,
[
-
1
,
self
.
max_num_proposals
,
self
.
num_classes
+
1
]
...
...
@@ -1517,13 +1529,18 @@ class FasterRCNNMetaArch(model.DetectionModel):
box_encodings: a 4-D tensor with shape
[batch_size, num_anchors, num_classes, self._box_coder.code_size]
representing box encodings.
anchor_boxes: [batch_size, num_anchors, 4] representing
decoded bounding boxes.
anchor_boxes: [batch_size, num_anchors, self._box_coder.code_size]
representing decoded bounding boxes. If using a shared box across
classes the shape will instead be
[total_num_proposals, 1, self._box_coder.code_size].
Returns:
decoded_boxes: a [batch_size, num_anchors, num_classes, 4]
float tensor representing bounding box predictions
(for each image in batch, proposal and class).
decoded_boxes: a
[batch_size, num_anchors, num_classes, self._box_coder.code_size]
float tensor representing bounding box predictions (for each image in
batch, proposal and class). If using a shared box across classes the
shape will instead be
[batch_size, num_anchors, 1, self._box_coder.code_size].
"""
combined_shape
=
shape_utils
.
combined_static_and_dynamic_shape
(
box_encodings
)
...
...
@@ -1697,7 +1714,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
Args:
refined_box_encodings: a 3-D tensor with shape
[total_num_proposals, num_classes, box_coder.code_size] representing
predicted (final) refined box encodings.
predicted (final) refined box encodings. If using a shared box across
classes this will instead have shape
[total_num_proposals, 1, box_coder.code_size].
class_predictions_with_background: a 2-D tensor with shape
[total_num_proposals, num_classes + 1] containing class
predictions (logits) for each of the anchors. Note that this tensor
...
...
@@ -1748,31 +1767,39 @@ class FasterRCNNMetaArch(model.DetectionModel):
self
.
_detector_target_assigner
,
proposal_boxlists
,
groundtruth_boxlists
,
groundtruth_classes_with_background_list
)
# We only predict refined location encodings for the non background
# classes, but we now pad it to make it compatible with the class
# predictions
class_predictions_with_background
=
tf
.
reshape
(
class_predictions_with_background
,
[
batch_size
,
self
.
max_num_proposals
,
-
1
])
flat_cls_targets_with_background
=
tf
.
reshape
(
batch_cls_targets_with_background
,
[
batch_size
*
self
.
max_num_proposals
,
-
1
])
refined_box_encodings_with_background
=
tf
.
pad
(
refined_box_encodings
,
[[
0
,
0
],
[
1
,
0
],
[
0
,
0
]])
# For anchors with multiple labels, picks refined_location_encodings
# for just one class to avoid over-counting for regression loss and
# (optionally) mask loss.
one_hot_flat_cls_targets_with_background
=
tf
.
argmax
(
flat_cls_targets_with_background
,
axis
=
1
)
one_hot_flat_cls_targets_with_background
=
tf
.
one_hot
(
one_hot_flat_cls_targets_with_background
,
flat_cls_targets_with_background
.
get_shape
()[
1
])
refined_box_encodings_masked_by_class_targets
=
tf
.
boolean_mask
(
refined_box_encodings_with_background
,
tf
.
greater
(
one_hot_flat_cls_targets_with_background
,
0
))
class_predictions_with_background
=
tf
.
reshape
(
class_predictions_with_background
,
[
batch_size
,
self
.
max_num_proposals
,
-
1
])
reshaped_refined_box_encodings
=
tf
.
reshape
(
refined_box_encodings_masked_by_class_targets
,
[
batch_size
,
-
1
,
4
])
# If using a shared box across classes use directly
if
refined_box_encodings
.
shape
[
1
]
==
1
:
reshaped_refined_box_encodings
=
tf
.
reshape
(
refined_box_encodings
,
[
batch_size
,
self
.
max_num_proposals
,
self
.
_box_coder
.
code_size
])
# For anchors with multiple labels, picks refined_location_encodings
# for just one class to avoid over-counting for regression loss and
# (optionally) mask loss.
else
:
# We only predict refined location encodings for the non background
# classes, but we now pad it to make it compatible with the class
# predictions
refined_box_encodings_with_background
=
tf
.
pad
(
refined_box_encodings
,
[[
0
,
0
],
[
1
,
0
],
[
0
,
0
]])
refined_box_encodings_masked_by_class_targets
=
tf
.
boolean_mask
(
refined_box_encodings_with_background
,
tf
.
greater
(
one_hot_flat_cls_targets_with_background
,
0
))
reshaped_refined_box_encodings
=
tf
.
reshape
(
refined_box_encodings_masked_by_class_targets
,
[
batch_size
,
self
.
max_num_proposals
,
self
.
_box_coder
.
code_size
])
second_stage_loc_losses
=
self
.
_second_stage_localization_loss
(
reshaped_refined_box_encodings
,
...
...
research/object_detection/meta_architectures/faster_rcnn_meta_arch_test.py
View file @
3b158095
...
...
@@ -85,6 +85,46 @@ class FasterRCNNMetaArchTest(
self
.
assertTrue
(
np
.
amax
(
detections_out
[
'detection_masks'
]
<=
1.0
))
self
.
assertTrue
(
np
.
amin
(
detections_out
[
'detection_masks'
]
>=
0.0
))
def
test_postprocess_second_stage_only_inference_mode_with_shared_boxes
(
self
):
model
=
self
.
_build_model
(
is_training
=
False
,
number_of_stages
=
2
,
second_stage_batch_size
=
6
)
batch_size
=
2
total_num_padded_proposals
=
batch_size
*
model
.
max_num_proposals
proposal_boxes
=
tf
.
constant
(
[[[
1
,
1
,
2
,
3
],
[
0
,
0
,
1
,
1
],
[.
5
,
.
5
,
.
6
,
.
6
],
4
*
[
0
],
4
*
[
0
],
4
*
[
0
],
4
*
[
0
],
4
*
[
0
]],
[[
2
,
3
,
6
,
8
],
[
1
,
2
,
5
,
3
],
4
*
[
0
],
4
*
[
0
],
4
*
[
0
],
4
*
[
0
],
4
*
[
0
],
4
*
[
0
]]],
dtype
=
tf
.
float32
)
num_proposals
=
tf
.
constant
([
3
,
2
],
dtype
=
tf
.
int32
)
# This has 1 box instead of one for each class.
refined_box_encodings
=
tf
.
zeros
(
[
total_num_padded_proposals
,
1
,
4
],
dtype
=
tf
.
float32
)
class_predictions_with_background
=
tf
.
ones
(
[
total_num_padded_proposals
,
model
.
num_classes
+
1
],
dtype
=
tf
.
float32
)
image_shape
=
tf
.
constant
([
batch_size
,
36
,
48
,
3
],
dtype
=
tf
.
int32
)
_
,
true_image_shapes
=
model
.
preprocess
(
tf
.
zeros
(
image_shape
))
detections
=
model
.
postprocess
({
'refined_box_encodings'
:
refined_box_encodings
,
'class_predictions_with_background'
:
class_predictions_with_background
,
'num_proposals'
:
num_proposals
,
'proposal_boxes'
:
proposal_boxes
,
'image_shape'
:
image_shape
,
},
true_image_shapes
)
with
self
.
test_session
()
as
sess
:
detections_out
=
sess
.
run
(
detections
)
self
.
assertAllEqual
(
detections_out
[
'detection_boxes'
].
shape
,
[
2
,
5
,
4
])
self
.
assertAllClose
(
detections_out
[
'detection_scores'
],
[[
1
,
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
1
,
0
]])
self
.
assertAllClose
(
detections_out
[
'detection_classes'
],
[[
0
,
0
,
0
,
1
,
1
],
[
0
,
0
,
1
,
1
,
0
]])
self
.
assertAllClose
(
detections_out
[
'num_detections'
],
[
5
,
4
])
@
parameterized
.
parameters
(
{
'masks_are_class_agnostic'
:
False
},
{
'masks_are_class_agnostic'
:
True
},
...
...
research/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py
View file @
3b158095
...
...
@@ -1284,6 +1284,106 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
self
.
assertAllClose
(
loss_dict_out
[
'Loss/BoxClassifierLoss/classification_loss'
],
0
)
def
test_loss_full_with_shared_boxes
(
self
):
model
=
self
.
_build_model
(
is_training
=
True
,
number_of_stages
=
2
,
second_stage_batch_size
=
6
)
batch_size
=
2
anchors
=
tf
.
constant
(
[[
0
,
0
,
16
,
16
],
[
0
,
16
,
16
,
32
],
[
16
,
0
,
32
,
16
],
[
16
,
16
,
32
,
32
]],
dtype
=
tf
.
float32
)
rpn_box_encodings
=
tf
.
zeros
(
[
batch_size
,
anchors
.
get_shape
().
as_list
()[
0
],
BOX_CODE_SIZE
],
dtype
=
tf
.
float32
)
# use different numbers for the objectness category to break ties in
# order of boxes returned by NMS
rpn_objectness_predictions_with_background
=
tf
.
constant
([
[[
-
10
,
13
],
[
10
,
-
10
],
[
10
,
-
11
],
[
-
10
,
12
]],
[[
10
,
-
10
],
[
-
10
,
13
],
[
-
10
,
12
],
[
10
,
-
11
]]],
dtype
=
tf
.
float32
)
image_shape
=
tf
.
constant
([
batch_size
,
32
,
32
,
3
],
dtype
=
tf
.
int32
)
num_proposals
=
tf
.
constant
([
6
,
6
],
dtype
=
tf
.
int32
)
proposal_boxes
=
tf
.
constant
(
2
*
[[[
0
,
0
,
16
,
16
],
[
0
,
16
,
16
,
32
],
[
16
,
0
,
32
,
16
],
[
16
,
16
,
32
,
32
],
[
0
,
0
,
16
,
16
],
[
0
,
16
,
16
,
32
]]],
dtype
=
tf
.
float32
)
refined_box_encodings
=
tf
.
zeros
(
(
batch_size
*
model
.
max_num_proposals
,
1
,
# one box shared among all the classes
BOX_CODE_SIZE
),
dtype
=
tf
.
float32
)
class_predictions_with_background
=
tf
.
constant
(
[[
-
10
,
10
,
-
10
],
# first image
[
10
,
-
10
,
-
10
],
[
10
,
-
10
,
-
10
],
[
-
10
,
-
10
,
10
],
[
-
10
,
10
,
-
10
],
[
10
,
-
10
,
-
10
],
[
10
,
-
10
,
-
10
],
# second image
[
-
10
,
10
,
-
10
],
[
-
10
,
10
,
-
10
],
[
10
,
-
10
,
-
10
],
[
10
,
-
10
,
-
10
],
[
-
10
,
10
,
-
10
]],
dtype
=
tf
.
float32
)
mask_predictions_logits
=
20
*
tf
.
ones
((
batch_size
*
model
.
max_num_proposals
,
model
.
num_classes
,
14
,
14
),
dtype
=
tf
.
float32
)
groundtruth_boxes_list
=
[
tf
.
constant
([[
0
,
0
,
.
5
,
.
5
],
[.
5
,
.
5
,
1
,
1
]],
dtype
=
tf
.
float32
),
tf
.
constant
([[
0
,
.
5
,
.
5
,
1
],
[.
5
,
0
,
1
,
.
5
]],
dtype
=
tf
.
float32
)]
groundtruth_classes_list
=
[
tf
.
constant
([[
1
,
0
],
[
0
,
1
]],
dtype
=
tf
.
float32
),
tf
.
constant
([[
1
,
0
],
[
1
,
0
]],
dtype
=
tf
.
float32
)]
# Set all elements of groundtruth mask to 1.0. In this case all proposal
# crops of the groundtruth masks should return a mask that covers the entire
# proposal. Thus, if mask_predictions_logits element values are all greater
# than 20, the loss should be zero.
groundtruth_masks_list
=
[
tf
.
convert_to_tensor
(
np
.
ones
((
2
,
32
,
32
)),
dtype
=
tf
.
float32
),
tf
.
convert_to_tensor
(
np
.
ones
((
2
,
32
,
32
)),
dtype
=
tf
.
float32
)]
prediction_dict
=
{
'rpn_box_encodings'
:
rpn_box_encodings
,
'rpn_objectness_predictions_with_background'
:
rpn_objectness_predictions_with_background
,
'image_shape'
:
image_shape
,
'anchors'
:
anchors
,
'refined_box_encodings'
:
refined_box_encodings
,
'class_predictions_with_background'
:
class_predictions_with_background
,
'proposal_boxes'
:
proposal_boxes
,
'num_proposals'
:
num_proposals
,
'mask_predictions'
:
mask_predictions_logits
}
_
,
true_image_shapes
=
model
.
preprocess
(
tf
.
zeros
(
image_shape
))
model
.
provide_groundtruth
(
groundtruth_boxes_list
,
groundtruth_classes_list
,
groundtruth_masks_list
)
loss_dict
=
model
.
loss
(
prediction_dict
,
true_image_shapes
)
with
self
.
test_session
()
as
sess
:
loss_dict_out
=
sess
.
run
(
loss_dict
)
self
.
assertAllClose
(
loss_dict_out
[
'Loss/RPNLoss/localization_loss'
],
0
)
self
.
assertAllClose
(
loss_dict_out
[
'Loss/RPNLoss/objectness_loss'
],
0
)
self
.
assertAllClose
(
loss_dict_out
[
'Loss/BoxClassifierLoss/localization_loss'
],
0
)
self
.
assertAllClose
(
loss_dict_out
[
'Loss/BoxClassifierLoss/classification_loss'
],
0
)
self
.
assertAllClose
(
loss_dict_out
[
'Loss/BoxClassifierLoss/mask_loss'
],
0
)
def
test_restore_map_for_classification_ckpt
(
self
):
# Define mock tensorflow classification graph and save variables.
test_graph_classification
=
tf
.
Graph
()
...
...
research/object_detection/metrics/coco_evaluation.py
View file @
3b158095
...
...
@@ -203,26 +203,39 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
def
get_estimator_eval_metric_ops
(
self
,
image_id
,
groundtruth_boxes
,
groundtruth_classes
,
detection_boxes
,
detection_scores
,
detection_classes
):
detection_scores
,
detection_classes
,
num_gt_boxes_per_image
=
None
,
num_det_boxes_per_image
=
None
):
"""Returns a dictionary of eval metric ops to use with `tf.EstimatorSpec`.
Note that once value_op is called, the detections and groundtruth added via
update_op are cleared.
This function can take in groundtruth and detections for a batch of images,
or for a single image. For the latter case, the batch dimension for input
tensors need not be present.
Args:
image_id: Unique string/integer identifier for the image.
groundtruth_boxes: float32 tensor of shape [num_boxes, 4] containing
`num_boxes` groundtruth boxes of the format
image_id: string/integer tensor of shape [batch] with unique identifiers
for the images.
groundtruth_boxes: float32 tensor of shape [batch, num_boxes, 4]
containing `num_boxes` groundtruth boxes of the format
[ymin, xmin, ymax, xmax] in absolute image coordinates.
groundtruth_classes: int32 tensor of shape [num_boxes] containing
groundtruth_classes: int32 tensor of shape [
batch,
num_boxes] containing
1-indexed groundtruth classes for the boxes.
detection_boxes: float32 tensor of shape [num_boxes, 4] containing
detection_boxes: float32 tensor of shape [
batch,
num_boxes, 4] containing
`num_boxes` detection boxes of the format [ymin, xmin, ymax, xmax]
in absolute image coordinates.
detection_scores: float32 tensor of shape [num_boxes] containing
detection_scores: float32 tensor of shape [
batch,
num_boxes] containing
detection scores for the boxes.
detection_classes: int32 tensor of shape [num_boxes] containing
detection_classes: int32 tensor of shape [
batch,
num_boxes] containing
1-indexed detection classes for the boxes.
num_gt_boxes_per_image: int32 tensor of shape [batch] containing the
number of groundtruth boxes per image. If None, will assume no padding
in groundtruth tensors.
num_det_boxes_per_image: int32 tensor of shape [batch] containing the
number of detection boxes per image. If None, will assume no padding in
the detection tensors.
Returns:
a dictionary of metric names to tuple of value_op and update_op that can
...
...
@@ -231,28 +244,68 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
guarantee correct behaviour.
"""
def
update_op
(
image_id
,
groundtruth_boxes
,
groundtruth_classes
,
detection_boxes
,
detection_scores
,
detection_classes
):
self
.
add_single_ground_truth_image_info
(
image_id
,
{
'groundtruth_boxes'
:
groundtruth_boxes
,
'groundtruth_classes'
:
groundtruth_classes
})
self
.
add_single_detected_image_info
(
image_id
,
{
'detection_boxes'
:
detection_boxes
,
'detection_scores'
:
detection_scores
,
'detection_classes'
:
detection_classes
})
image_id_batched
,
groundtruth_boxes_batched
,
groundtruth_classes_batched
,
num_gt_boxes_per_image
,
detection_boxes_batched
,
detection_scores_batched
,
detection_classes_batched
,
num_det_boxes_per_image
):
"""Update operation for adding batch of images to Coco evaluator."""
for
(
image_id
,
gt_box
,
gt_class
,
num_gt_box
,
det_box
,
det_score
,
det_class
,
num_det_box
)
in
zip
(
image_id_batched
,
groundtruth_boxes_batched
,
groundtruth_classes_batched
,
num_gt_boxes_per_image
,
detection_boxes_batched
,
detection_scores_batched
,
detection_classes_batched
,
num_det_boxes_per_image
):
self
.
add_single_ground_truth_image_info
(
image_id
,
{
'groundtruth_boxes'
:
gt_box
[:
num_gt_box
],
'groundtruth_classes'
:
gt_class
[:
num_gt_box
]})
self
.
add_single_detected_image_info
(
image_id
,
{
'detection_boxes'
:
det_box
[:
num_det_box
],
'detection_scores'
:
det_score
[:
num_det_box
],
'detection_classes'
:
det_class
[:
num_det_box
]})
if
not
image_id
.
shape
.
as_list
():
# Apply a batch dimension to all tensors.
image_id
=
tf
.
expand_dims
(
image_id
,
0
)
groundtruth_boxes
=
tf
.
expand_dims
(
groundtruth_boxes
,
0
)
groundtruth_classes
=
tf
.
expand_dims
(
groundtruth_classes
,
0
)
detection_boxes
=
tf
.
expand_dims
(
detection_boxes
,
0
)
detection_scores
=
tf
.
expand_dims
(
detection_scores
,
0
)
detection_classes
=
tf
.
expand_dims
(
detection_classes
,
0
)
if
num_gt_boxes_per_image
is
None
:
num_gt_boxes_per_image
=
tf
.
shape
(
groundtruth_boxes
)[
1
:
2
]
else
:
num_gt_boxes_per_image
=
tf
.
expand_dims
(
num_gt_boxes_per_image
,
0
)
if
num_det_boxes_per_image
is
None
:
num_det_boxes_per_image
=
tf
.
shape
(
detection_boxes
)[
1
:
2
]
else
:
num_det_boxes_per_image
=
tf
.
expand_dims
(
num_det_boxes_per_image
,
0
)
else
:
if
num_gt_boxes_per_image
is
None
:
num_gt_boxes_per_image
=
tf
.
tile
(
tf
.
shape
(
groundtruth_boxes
)[
1
:
2
],
multiples
=
tf
.
shape
(
groundtruth_boxes
)[
0
:
1
])
if
num_det_boxes_per_image
is
None
:
num_det_boxes_per_image
=
tf
.
tile
(
tf
.
shape
(
detection_boxes
)[
1
:
2
],
multiples
=
tf
.
shape
(
detection_boxes
)[
0
:
1
])
update_op
=
tf
.
py_func
(
update_op
,
[
image_id
,
groundtruth_boxes
,
groundtruth_classes
,
num_gt_boxes_per_image
,
detection_boxes
,
detection_scores
,
detection_classes
],
[])
detection_classes
,
num_det_boxes_per_image
],
[])
metric_names
=
[
'DetectionBoxes_Precision/mAP'
,
'DetectionBoxes_Precision/mAP@.50IOU'
,
'DetectionBoxes_Precision/mAP@.75IOU'
,
...
...
@@ -583,5 +636,3 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
eval_metric_ops
[
metric_name
]
=
(
tf
.
py_func
(
value_func_factory
(
metric_name
),
[],
np
.
float32
),
update_op
)
return
eval_metric_ops
research/object_detection/metrics/coco_evaluation_test.py
View file @
3b158095
...
...
@@ -317,6 +317,230 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase):
self
.
assertFalse
(
coco_evaluator
.
_detection_boxes_list
)
self
.
assertFalse
(
coco_evaluator
.
_image_ids
)
def
testGetOneMAPWithMatchingGroundtruthAndDetectionsPadded
(
self
):
category_list
=
[{
'id'
:
0
,
'name'
:
'person'
},
{
'id'
:
1
,
'name'
:
'cat'
},
{
'id'
:
2
,
'name'
:
'dog'
}]
coco_evaluator
=
coco_evaluation
.
CocoDetectionEvaluator
(
category_list
)
image_id
=
tf
.
placeholder
(
tf
.
string
,
shape
=
())
groundtruth_boxes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
4
))
groundtruth_classes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
))
detection_boxes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
4
))
detection_scores
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
))
detection_classes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
))
eval_metric_ops
=
coco_evaluator
.
get_estimator_eval_metric_ops
(
image_id
,
groundtruth_boxes
,
groundtruth_classes
,
detection_boxes
,
detection_scores
,
detection_classes
)
_
,
update_op
=
eval_metric_ops
[
'DetectionBoxes_Precision/mAP'
]
with
self
.
test_session
()
as
sess
:
sess
.
run
(
update_op
,
feed_dict
=
{
image_id
:
'image1'
,
groundtruth_boxes
:
np
.
array
([[
100.
,
100.
,
200.
,
200.
],
[
-
1
,
-
1
,
-
1
,
-
1
]]),
groundtruth_classes
:
np
.
array
([
1
,
-
1
]),
detection_boxes
:
np
.
array
([[
100.
,
100.
,
200.
,
200.
],
[
0.
,
0.
,
0.
,
0.
]]),
detection_scores
:
np
.
array
([.
8
,
0.
]),
detection_classes
:
np
.
array
([
1
,
-
1
])
})
sess
.
run
(
update_op
,
feed_dict
=
{
image_id
:
'image2'
,
groundtruth_boxes
:
np
.
array
([[
50.
,
50.
,
100.
,
100.
],
[
-
1
,
-
1
,
-
1
,
-
1
]]),
groundtruth_classes
:
np
.
array
([
3
,
-
1
]),
detection_boxes
:
np
.
array
([[
50.
,
50.
,
100.
,
100.
],
[
0.
,
0.
,
0.
,
0.
]]),
detection_scores
:
np
.
array
([.
7
,
0.
]),
detection_classes
:
np
.
array
([
3
,
-
1
])
})
sess
.
run
(
update_op
,
feed_dict
=
{
image_id
:
'image3'
,
groundtruth_boxes
:
np
.
array
([[
25.
,
25.
,
50.
,
50.
],
[
10.
,
10.
,
15.
,
15.
]]),
groundtruth_classes
:
np
.
array
([
2
,
2
]),
detection_boxes
:
np
.
array
([[
25.
,
25.
,
50.
,
50.
],
[
10.
,
10.
,
15.
,
15.
]]),
detection_scores
:
np
.
array
([.
95
,
.
9
]),
detection_classes
:
np
.
array
([
2
,
2
])
})
metrics
=
{}
for
key
,
(
value_op
,
_
)
in
eval_metric_ops
.
iteritems
():
metrics
[
key
]
=
value_op
metrics
=
sess
.
run
(
metrics
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP@.50IOU'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP@.75IOU'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP (large)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP (medium)'
],
-
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP (small)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@1'
],
0.75
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@10'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100 (large)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100 (medium)'
],
-
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100 (small)'
],
1.0
)
self
.
assertFalse
(
coco_evaluator
.
_groundtruth_list
)
self
.
assertFalse
(
coco_evaluator
.
_detection_boxes_list
)
self
.
assertFalse
(
coco_evaluator
.
_image_ids
)
def
testGetOneMAPWithMatchingGroundtruthAndDetectionsBatched
(
self
):
category_list
=
[{
'id'
:
0
,
'name'
:
'person'
},
{
'id'
:
1
,
'name'
:
'cat'
},
{
'id'
:
2
,
'name'
:
'dog'
}]
coco_evaluator
=
coco_evaluation
.
CocoDetectionEvaluator
(
category_list
)
batch_size
=
3
image_id
=
tf
.
placeholder
(
tf
.
string
,
shape
=
(
batch_size
))
groundtruth_boxes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
batch_size
,
None
,
4
))
groundtruth_classes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
batch_size
,
None
))
detection_boxes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
batch_size
,
None
,
4
))
detection_scores
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
batch_size
,
None
))
detection_classes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
batch_size
,
None
))
eval_metric_ops
=
coco_evaluator
.
get_estimator_eval_metric_ops
(
image_id
,
groundtruth_boxes
,
groundtruth_classes
,
detection_boxes
,
detection_scores
,
detection_classes
)
_
,
update_op
=
eval_metric_ops
[
'DetectionBoxes_Precision/mAP'
]
with
self
.
test_session
()
as
sess
:
sess
.
run
(
update_op
,
feed_dict
=
{
image_id
:
[
'image1'
,
'image2'
,
'image3'
],
groundtruth_boxes
:
np
.
array
([[[
100.
,
100.
,
200.
,
200.
]],
[[
50.
,
50.
,
100.
,
100.
]],
[[
25.
,
25.
,
50.
,
50.
]]]),
groundtruth_classes
:
np
.
array
([[
1
],
[
3
],
[
2
]]),
detection_boxes
:
np
.
array
([[[
100.
,
100.
,
200.
,
200.
]],
[[
50.
,
50.
,
100.
,
100.
]],
[[
25.
,
25.
,
50.
,
50.
]]]),
detection_scores
:
np
.
array
([[.
8
],
[.
7
],
[.
9
]]),
detection_classes
:
np
.
array
([[
1
],
[
3
],
[
2
]])
})
metrics
=
{}
for
key
,
(
value_op
,
_
)
in
eval_metric_ops
.
iteritems
():
metrics
[
key
]
=
value_op
metrics
=
sess
.
run
(
metrics
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP@.50IOU'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP@.75IOU'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP (large)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP (medium)'
],
-
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP (small)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@1'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@10'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100 (large)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100 (medium)'
],
-
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100 (small)'
],
1.0
)
self
.
assertFalse
(
coco_evaluator
.
_groundtruth_list
)
self
.
assertFalse
(
coco_evaluator
.
_detection_boxes_list
)
self
.
assertFalse
(
coco_evaluator
.
_image_ids
)
def
testGetOneMAPWithMatchingGroundtruthAndDetectionsPaddedBatches
(
self
):
category_list
=
[{
'id'
:
0
,
'name'
:
'person'
},
{
'id'
:
1
,
'name'
:
'cat'
},
{
'id'
:
2
,
'name'
:
'dog'
}]
coco_evaluator
=
coco_evaluation
.
CocoDetectionEvaluator
(
category_list
)
batch_size
=
3
image_id
=
tf
.
placeholder
(
tf
.
string
,
shape
=
(
batch_size
))
groundtruth_boxes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
batch_size
,
None
,
4
))
groundtruth_classes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
batch_size
,
None
))
num_gt_boxes_per_image
=
tf
.
placeholder
(
tf
.
int32
,
shape
=
(
None
))
detection_boxes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
batch_size
,
None
,
4
))
detection_scores
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
batch_size
,
None
))
detection_classes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
batch_size
,
None
))
num_det_boxes_per_image
=
tf
.
placeholder
(
tf
.
int32
,
shape
=
(
None
))
eval_metric_ops
=
coco_evaluator
.
get_estimator_eval_metric_ops
(
image_id
,
groundtruth_boxes
,
groundtruth_classes
,
detection_boxes
,
detection_scores
,
detection_classes
,
num_gt_boxes_per_image
,
num_det_boxes_per_image
)
_
,
update_op
=
eval_metric_ops
[
'DetectionBoxes_Precision/mAP'
]
with
self
.
test_session
()
as
sess
:
sess
.
run
(
update_op
,
feed_dict
=
{
image_id
:
[
'image1'
,
'image2'
,
'image3'
],
groundtruth_boxes
:
np
.
array
([[[
100.
,
100.
,
200.
,
200.
],
[
-
1
,
-
1
,
-
1
,
-
1
]],
[[
50.
,
50.
,
100.
,
100.
],
[
-
1
,
-
1
,
-
1
,
-
1
]],
[[
25.
,
25.
,
50.
,
50.
],
[
10.
,
10.
,
15.
,
15.
]]]),
groundtruth_classes
:
np
.
array
([[
1
,
-
1
],
[
3
,
-
1
],
[
2
,
2
]]),
num_gt_boxes_per_image
:
np
.
array
([
1
,
1
,
2
]),
detection_boxes
:
np
.
array
([[[
100.
,
100.
,
200.
,
200.
],
[
0.
,
0.
,
0.
,
0.
]],
[[
50.
,
50.
,
100.
,
100.
],
[
0.
,
0.
,
0.
,
0.
]],
[[
25.
,
25.
,
50.
,
50.
],
[
10.
,
10.
,
15.
,
15.
]]]),
detection_scores
:
np
.
array
([[.
8
,
0.
],
[.
7
,
0.
],
[.
95
,
.
9
]]),
detection_classes
:
np
.
array
([[
1
,
-
1
],
[
3
,
-
1
],
[
2
,
2
]]),
num_det_boxes_per_image
:
np
.
array
([
1
,
1
,
2
]),
})
metrics
=
{}
for
key
,
(
value_op
,
_
)
in
eval_metric_ops
.
iteritems
():
metrics
[
key
]
=
value_op
metrics
=
sess
.
run
(
metrics
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP@.50IOU'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP@.75IOU'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP (large)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP (medium)'
],
-
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP (small)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@1'
],
0.75
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@10'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100 (large)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100 (medium)'
],
-
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100 (small)'
],
1.0
)
self
.
assertFalse
(
coco_evaluator
.
_groundtruth_list
)
self
.
assertFalse
(
coco_evaluator
.
_detection_boxes_list
)
self
.
assertFalse
(
coco_evaluator
.
_image_ids
)
class
CocoMaskEvaluationTest
(
tf
.
test
.
TestCase
):
...
...
research/object_detection/model_lib.py
View file @
3b158095
...
...
@@ -325,16 +325,16 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
}
eval_metric_ops
=
None
if
mode
in
(
tf
.
estimator
.
ModeKeys
.
TRAIN
,
tf
.
estimator
.
ModeKeys
.
EVAL
)
:
if
mode
==
tf
.
estimator
.
ModeKeys
.
EVAL
:
class_agnostic
=
(
fields
.
DetectionResultFields
.
detection_classes
not
in
detections
)
groundtruth
=
_get_groundtruth_data
(
detection_model
,
class_agnostic
)
use_original_images
=
fields
.
InputDataFields
.
original_image
in
features
origin
al_images
=
(
ev
al_images
=
(
features
[
fields
.
InputDataFields
.
original_image
]
if
use_original_images
else
features
[
fields
.
InputDataFields
.
image
])
eval_dict
=
eval_util
.
result_dict_for_single_example
(
origin
al_images
[
0
:
1
],
ev
al_images
[
0
:
1
],
features
[
inputs
.
HASH_KEY
][
0
],
detections
,
groundtruth
,
...
...
@@ -355,22 +355,21 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
img_summary
=
tf
.
summary
.
image
(
'Detections_Left_Groundtruth_Right'
,
detection_and_groundtruth
)
if
mode
==
tf
.
estimator
.
ModeKeys
.
EVAL
:
# Eval metrics on a single example.
eval_metrics
=
eval_config
.
metrics_set
if
not
eval_metrics
:
eval_metrics
=
[
'coco_detection_metrics'
]
eval_metric_ops
=
eval_util
.
get_eval_metric_ops_for_evaluators
(
eval_metrics
,
category_index
.
values
(),
eval_dict
,
include_metrics_per_category
=
False
)
for
loss_key
,
loss_tensor
in
iter
(
losses_dict
.
items
()):
eval_metric_ops
[
loss_key
]
=
tf
.
metrics
.
mean
(
loss_tensor
)
for
var
in
optimizer_summary_vars
:
eval_metric_ops
[
var
.
op
.
name
]
=
(
var
,
tf
.
no_op
())
if
img_summary
is
not
None
:
eval_metric_ops
[
'Detections_Left_Groundtruth_Right'
]
=
(
img_summary
,
tf
.
no_op
())
eval_metric_ops
=
{
str
(
k
):
v
for
k
,
v
in
eval_metric_ops
.
iteritems
()}
# Eval metrics on a single example.
eval_metrics
=
eval_config
.
metrics_set
if
not
eval_metrics
:
eval_metrics
=
[
'coco_detection_metrics'
]
eval_metric_ops
=
eval_util
.
get_eval_metric_ops_for_evaluators
(
eval_metrics
,
category_index
.
values
(),
eval_dict
,
include_metrics_per_category
=
False
)
for
loss_key
,
loss_tensor
in
iter
(
losses_dict
.
items
()):
eval_metric_ops
[
loss_key
]
=
tf
.
metrics
.
mean
(
loss_tensor
)
for
var
in
optimizer_summary_vars
:
eval_metric_ops
[
var
.
op
.
name
]
=
(
var
,
tf
.
no_op
())
if
img_summary
is
not
None
:
eval_metric_ops
[
'Detections_Left_Groundtruth_Right'
]
=
(
img_summary
,
tf
.
no_op
())
eval_metric_ops
=
{
str
(
k
):
v
for
k
,
v
in
eval_metric_ops
.
iteritems
()}
if
use_tpu
:
return
tf
.
contrib
.
tpu
.
TPUEstimatorSpec
(
...
...
research/object_detection/model_main.py
View file @
3b158095
# Copyright 201
8
The TensorFlow Authors. All Rights Reserved.
# Copyright 201
7
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -36,6 +36,10 @@ flags.DEFINE_string(
'hparams_overrides'
,
None
,
'Hyperparameter overrides, '
'represented as a string containing comma-separated '
'hparam_name=value pairs.'
)
flags
.
DEFINE_string
(
'checkpoint_dir'
,
None
,
'Path to directory holding a checkpoint. If '
'`checkpoint_dir` is provided, this binary operates in eval-only mode, '
'writing resulting metrics to `model_dir`.'
)
FLAGS
=
flags
.
FLAGS
...
...
@@ -59,17 +63,23 @@ def main(unused_argv):
train_steps
=
train_and_eval_dict
[
'train_steps'
]
eval_steps
=
train_and_eval_dict
[
'eval_steps'
]
train_spec
,
eval_specs
=
model_lib
.
create_train_and_eval_specs
(
train_input_fn
,
eval_input_fn
,
eval_on_train_input_fn
,
predict_input_fn
,
train_steps
,
eval_steps
,
eval_on_train_data
=
False
)
if
FLAGS
.
checkpoint_dir
:
estimator
.
evaluate
(
eval_input_fn
,
eval_steps
,
checkpoint_path
=
tf
.
train
.
latest_checkpoint
(
FLAGS
.
checkpoint_dir
))
else
:
train_spec
,
eval_specs
=
model_lib
.
create_train_and_eval_specs
(
train_input_fn
,
eval_input_fn
,
eval_on_train_input_fn
,
predict_input_fn
,
train_steps
,
eval_steps
,
eval_on_train_data
=
False
)
# Currently only a single Eval Spec is allowed.
tf
.
estimator
.
train_and_evaluate
(
estimator
,
train_spec
,
eval_specs
[
0
])
# Currently only a single Eval Spec is allowed.
tf
.
estimator
.
train_and_evaluate
(
estimator
,
train_spec
,
eval_specs
[
0
])
if
__name__
==
'__main__'
:
...
...
research/object_detection/model_tpu_main.py
View file @
3b158095
...
...
@@ -56,7 +56,9 @@ flags.DEFINE_integer('iterations_per_loop', 100,
# recent checkpoint every 10 minutes by default for train_and_eval
flags
.
DEFINE_string
(
'mode'
,
'train'
,
'Mode to run: train, eval'
)
flags
.
DEFINE_integer
(
'train_batch_size'
,
32
*
8
,
'Batch size for training.'
)
flags
.
DEFINE_integer
(
'train_batch_size'
,
None
,
'Batch size for training. If '
'this is not provided, batch size is read from training '
'config.'
)
flags
.
DEFINE_string
(
'hparams_overrides'
,
None
,
'Comma-separated list of '
...
...
@@ -93,6 +95,10 @@ def main(unused_argv):
iterations_per_loop
=
FLAGS
.
iterations_per_loop
,
num_shards
=
FLAGS
.
num_shards
))
kwargs
=
{}
if
FLAGS
.
train_batch_size
:
kwargs
[
'batch_size'
]
=
FLAGS
.
train_batch_size
train_and_eval_dict
=
model_lib
.
create_estimator_and_inputs
(
run_config
=
config
,
hparams
=
model_hparams
.
create_hparams
(
FLAGS
.
hparams_overrides
),
...
...
@@ -102,7 +108,7 @@ def main(unused_argv):
use_tpu_estimator
=
True
,
use_tpu
=
FLAGS
.
use_tpu
,
num_shards
=
FLAGS
.
num_shards
,
batch_size
=
FLAGS
.
train_batch_size
)
**
kwargs
)
estimator
=
train_and_eval_dict
[
'estimator'
]
train_input_fn
=
train_and_eval_dict
[
'train_input_fn'
]
eval_input_fn
=
train_and_eval_dict
[
'eval_input_fn'
]
...
...
research/object_detection/models/faster_rcnn_mobilenet_v1_feature_extractor.py
View file @
3b158095
...
...
@@ -14,6 +14,8 @@
# ==============================================================================
"""Mobilenet v1 Faster R-CNN implementation."""
import
numpy
as
np
import
tensorflow
as
tf
from
object_detection.meta_architectures
import
faster_rcnn_meta_arch
...
...
@@ -23,22 +25,31 @@ from nets import mobilenet_v1
slim
=
tf
.
contrib
.
slim
_MOBILENET_V1_100_CONV_NO_LAST_STRIDE_DEFS
=
[
mobilenet_v1
.
Conv
(
kernel
=
[
3
,
3
],
stride
=
2
,
depth
=
32
),
mobilenet_v1
.
DepthSepConv
(
kernel
=
[
3
,
3
],
stride
=
1
,
depth
=
64
),
mobilenet_v1
.
DepthSepConv
(
kernel
=
[
3
,
3
],
stride
=
2
,
depth
=
128
),
mobilenet_v1
.
DepthSepConv
(
kernel
=
[
3
,
3
],
stride
=
1
,
depth
=
128
),
mobilenet_v1
.
DepthSepConv
(
kernel
=
[
3
,
3
],
stride
=
2
,
depth
=
256
),
mobilenet_v1
.
DepthSepConv
(
kernel
=
[
3
,
3
],
stride
=
1
,
depth
=
256
),
mobilenet_v1
.
DepthSepConv
(
kernel
=
[
3
,
3
],
stride
=
2
,
depth
=
512
),
mobilenet_v1
.
DepthSepConv
(
kernel
=
[
3
,
3
],
stride
=
1
,
depth
=
512
),
mobilenet_v1
.
DepthSepConv
(
kernel
=
[
3
,
3
],
stride
=
1
,
depth
=
512
),
mobilenet_v1
.
DepthSepConv
(
kernel
=
[
3
,
3
],
stride
=
1
,
depth
=
512
),
mobilenet_v1
.
DepthSepConv
(
kernel
=
[
3
,
3
],
stride
=
1
,
depth
=
512
),
mobilenet_v1
.
DepthSepConv
(
kernel
=
[
3
,
3
],
stride
=
1
,
depth
=
512
),
mobilenet_v1
.
DepthSepConv
(
kernel
=
[
3
,
3
],
stride
=
1
,
depth
=
1024
),
mobilenet_v1
.
DepthSepConv
(
kernel
=
[
3
,
3
],
stride
=
1
,
depth
=
1024
)
]
def
_get_mobilenet_conv_no_last_stride_defs
(
conv_depth_ratio_in_percentage
):
if
conv_depth_ratio_in_percentage
not
in
[
25
,
50
,
75
,
100
]:
raise
ValueError
(
'Only the following ratio percentages are supported: 25, 50, 75, 100'
)
conv_depth_ratio_in_percentage
=
float
(
conv_depth_ratio_in_percentage
)
/
100.0
channels
=
np
.
array
([
32
,
64
,
128
,
128
,
256
,
256
,
512
,
512
,
512
,
512
,
512
,
512
,
1024
,
1024
],
dtype
=
np
.
float32
)
channels
=
(
channels
*
conv_depth_ratio_in_percentage
).
astype
(
np
.
int32
)
return
[
mobilenet_v1
.
Conv
(
kernel
=
[
3
,
3
],
stride
=
2
,
depth
=
channels
[
0
]),
mobilenet_v1
.
DepthSepConv
(
kernel
=
[
3
,
3
],
stride
=
1
,
depth
=
channels
[
1
]),
mobilenet_v1
.
DepthSepConv
(
kernel
=
[
3
,
3
],
stride
=
2
,
depth
=
channels
[
2
]),
mobilenet_v1
.
DepthSepConv
(
kernel
=
[
3
,
3
],
stride
=
1
,
depth
=
channels
[
3
]),
mobilenet_v1
.
DepthSepConv
(
kernel
=
[
3
,
3
],
stride
=
2
,
depth
=
channels
[
4
]),
mobilenet_v1
.
DepthSepConv
(
kernel
=
[
3
,
3
],
stride
=
1
,
depth
=
channels
[
5
]),
mobilenet_v1
.
DepthSepConv
(
kernel
=
[
3
,
3
],
stride
=
2
,
depth
=
channels
[
6
]),
mobilenet_v1
.
DepthSepConv
(
kernel
=
[
3
,
3
],
stride
=
1
,
depth
=
channels
[
7
]),
mobilenet_v1
.
DepthSepConv
(
kernel
=
[
3
,
3
],
stride
=
1
,
depth
=
channels
[
8
]),
mobilenet_v1
.
DepthSepConv
(
kernel
=
[
3
,
3
],
stride
=
1
,
depth
=
channels
[
9
]),
mobilenet_v1
.
DepthSepConv
(
kernel
=
[
3
,
3
],
stride
=
1
,
depth
=
channels
[
10
]),
mobilenet_v1
.
DepthSepConv
(
kernel
=
[
3
,
3
],
stride
=
1
,
depth
=
channels
[
11
]),
mobilenet_v1
.
DepthSepConv
(
kernel
=
[
3
,
3
],
stride
=
1
,
depth
=
channels
[
12
]),
mobilenet_v1
.
DepthSepConv
(
kernel
=
[
3
,
3
],
stride
=
1
,
depth
=
channels
[
13
])
]
class
FasterRCNNMobilenetV1FeatureExtractor
(
...
...
@@ -53,7 +64,8 @@ class FasterRCNNMobilenetV1FeatureExtractor(
weight_decay
=
0.0
,
depth_multiplier
=
1.0
,
min_depth
=
16
,
skip_last_stride
=
False
):
skip_last_stride
=
False
,
conv_depth_ratio_in_percentage
=
100
):
"""Constructor.
Args:
...
...
@@ -65,6 +77,8 @@ class FasterRCNNMobilenetV1FeatureExtractor(
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
skip_last_stride: Skip the last stride if True.
conv_depth_ratio_in_percentage: Conv depth ratio in percentage. Only
applied if skip_last_stride is True.
Raises:
ValueError: If `first_stage_features_stride` is not 8 or 16.
...
...
@@ -74,6 +88,7 @@ class FasterRCNNMobilenetV1FeatureExtractor(
self
.
_depth_multiplier
=
depth_multiplier
self
.
_min_depth
=
min_depth
self
.
_skip_last_stride
=
skip_last_stride
self
.
_conv_depth_ratio_in_percentage
=
conv_depth_ratio_in_percentage
super
(
FasterRCNNMobilenetV1FeatureExtractor
,
self
).
__init__
(
is_training
,
first_stage_features_stride
,
batch_norm_trainable
,
reuse_weights
,
weight_decay
)
...
...
@@ -124,7 +139,9 @@ class FasterRCNNMobilenetV1FeatureExtractor(
reuse
=
self
.
_reuse_weights
)
as
scope
:
params
=
{}
if
self
.
_skip_last_stride
:
params
[
'conv_defs'
]
=
_MOBILENET_V1_100_CONV_NO_LAST_STRIDE_DEFS
params
[
'conv_defs'
]
=
_get_mobilenet_conv_no_last_stride_defs
(
conv_depth_ratio_in_percentage
=
self
.
_conv_depth_ratio_in_percentage
)
_
,
activations
=
mobilenet_v1
.
mobilenet_v1_base
(
preprocessed_inputs
,
final_endpoint
=
'Conv2d_11_pointwise'
,
...
...
@@ -150,6 +167,11 @@ class FasterRCNNMobilenetV1FeatureExtractor(
"""
net
=
proposal_feature_maps
conv_depth
=
1024
if
self
.
_skip_last_stride
:
conv_depth_ratio
=
float
(
self
.
_conv_depth_ratio_in_percentage
)
/
100.0
conv_depth
=
int
(
float
(
conv_depth
)
*
conv_depth_ratio
)
depth
=
lambda
d
:
max
(
int
(
d
*
1.0
),
16
)
with
tf
.
variable_scope
(
'MobilenetV1'
,
reuse
=
self
.
_reuse_weights
):
with
slim
.
arg_scope
(
...
...
@@ -160,13 +182,13 @@ class FasterRCNNMobilenetV1FeatureExtractor(
[
slim
.
conv2d
,
slim
.
separable_conv2d
],
padding
=
'SAME'
):
net
=
slim
.
separable_conv2d
(
net
,
depth
(
1024
),
[
3
,
3
],
depth
(
conv_depth
),
[
3
,
3
],
depth_multiplier
=
1
,
stride
=
2
,
scope
=
'Conv2d_12_pointwise'
)
return
slim
.
separable_conv2d
(
net
,
depth
(
1024
),
[
3
,
3
],
depth
(
conv_depth
),
[
3
,
3
],
depth_multiplier
=
1
,
stride
=
1
,
scope
=
'Conv2d_13_pointwise'
)
research/object_detection/protos/box_predictor.proto
View file @
3b158095
...
...
@@ -20,7 +20,7 @@ message ConvolutionalBoxPredictor {
// Hyperparameters for convolution ops used in the box predictor.
optional
Hyperparams
conv_hyperparams
=
1
;
// Min
u
mum feature depth prior to predicting box encodings and class
// Min
i
mum feature depth prior to predicting box encodings and class
// predictions.
optional
int32
min_depth
=
2
[
default
=
0
];
...
...
@@ -81,6 +81,12 @@ message WeightSharedConvolutionalBoxPredictor {
// training where there are large number of negative boxes. See
// https://arxiv.org/abs/1708.02002 for details.
optional
float
class_prediction_bias_init
=
10
[
default
=
0.0
];
// Whether to use dropout for class prediction.
optional
bool
use_dropout
=
11
[
default
=
false
];
// Keep probability for dropout
optional
float
dropout_keep_probability
=
12
[
default
=
0.8
];
}
message
MaskRCNNBoxPredictor
{
...
...
@@ -119,6 +125,10 @@ message MaskRCNNBoxPredictor {
// branch.
optional
int32
mask_prediction_num_conv_layers
=
11
[
default
=
2
];
optional
bool
masks_are_class_agnostic
=
12
[
default
=
false
];
// Whether to use one box for all classes rather than a different box for each
// class.
optional
bool
share_box_across_classes
=
13
[
default
=
false
];
}
message
RfcnBoxPredictor
{
...
...
research/object_detection/protos/input_reader.proto
View file @
3b158095
...
...
@@ -60,6 +60,9 @@ message InputReader {
// Number of parallel decode ops to apply.
optional
uint32
num_parallel_map_calls
=
14
[
default
=
64
];
// Number of groundtruth keypoints per object.
optional
uint32
num_keypoints
=
16
[
default
=
0
];
// Whether to load groundtruth instance masks.
optional
bool
load_instance_masks
=
7
[
default
=
false
];
...
...
Prev
1
2
3
4
5
6
7
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment