Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
27b4acd4
Commit
27b4acd4
authored
Sep 25, 2018
by
Aman Gupta
Browse files
Merge remote-tracking branch 'upstream/master'
parents
5133522f
d4e1f97f
Changes
240
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
15202 additions
and
576 deletions
+15202
-576
research/object_detection/core/target_assigner.py
research/object_detection/core/target_assigner.py
+8
-6
research/object_detection/core/target_assigner_test.py
research/object_detection/core/target_assigner_test.py
+2
-4
research/object_detection/data/fgvc_2854_classes_label_map.pbtxt
...h/object_detection/data/fgvc_2854_classes_label_map.pbtxt
+14270
-0
research/object_detection/data_decoders/tf_example_decoder.py
...arch/object_detection/data_decoders/tf_example_decoder.py
+53
-36
research/object_detection/data_decoders/tf_example_decoder_test.py
...object_detection/data_decoders/tf_example_decoder_test.py
+321
-296
research/object_detection/dataset_tools/create_coco_tf_record.py
...h/object_detection/dataset_tools/create_coco_tf_record.py
+2
-2
research/object_detection/dataset_tools/create_coco_tf_record_test.py
...ect_detection/dataset_tools/create_coco_tf_record_test.py
+6
-0
research/object_detection/eval_util.py
research/object_detection/eval_util.py
+55
-31
research/object_detection/g3doc/detection_model_zoo.md
research/object_detection/g3doc/detection_model_zoo.md
+13
-7
research/object_detection/g3doc/running_locally.md
research/object_detection/g3doc/running_locally.md
+2
-2
research/object_detection/g3doc/running_pets.md
research/object_detection/g3doc/running_pets.md
+1
-1
research/object_detection/inputs.py
research/object_detection/inputs.py
+80
-13
research/object_detection/inputs_test.py
research/object_detection/inputs_test.py
+177
-36
research/object_detection/legacy/eval.py
research/object_detection/legacy/eval.py
+21
-26
research/object_detection/legacy/evaluator.py
research/object_detection/legacy/evaluator.py
+2
-1
research/object_detection/matchers/argmax_matcher.py
research/object_detection/matchers/argmax_matcher.py
+9
-5
research/object_detection/matchers/argmax_matcher_test.py
research/object_detection/matchers/argmax_matcher_test.py
+28
-0
research/object_detection/matchers/bipartite_matcher.py
research/object_detection/matchers/bipartite_matcher.py
+12
-6
research/object_detection/matchers/bipartite_matcher_test.py
research/object_detection/matchers/bipartite_matcher_test.py
+19
-9
research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
...ect_detection/meta_architectures/faster_rcnn_meta_arch.py
+121
-95
No files found.
research/object_detection/core/target_assigner.py
View file @
27b4acd4
...
...
@@ -93,8 +93,7 @@ class TargetAssigner(object):
groundtruth_boxes
,
groundtruth_labels
=
None
,
unmatched_class_label
=
None
,
groundtruth_weights
=
None
,
**
params
):
groundtruth_weights
=
None
):
"""Assign classification and regression targets to each anchor.
For a given set of anchors and groundtruth detections, match anchors
...
...
@@ -121,9 +120,11 @@ class TargetAssigner(object):
If set to None, unmatched_cls_target is set to be [0] for each anchor.
groundtruth_weights: a float tensor of shape [M] indicating the weight to
assign to all anchors match to a particular groundtruth box. The weights
must be in [0., 1.]. If None, all weights are set to 1.
**params: Additional keyword arguments for specific implementations of
the Matcher.
must be in [0., 1.]. If None, all weights are set to 1. Generally no
groundtruth boxes with zero weight match to any anchors as matchers are
aware of groundtruth weights. Additionally, `cls_weights` and
`reg_weights` are calculated using groundtruth weights as an added
safety.
Returns:
cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
...
...
@@ -177,7 +178,8 @@ class TargetAssigner(object):
[
unmatched_shape_assert
,
labels_and_box_shapes_assert
]):
match_quality_matrix
=
self
.
_similarity_calc
.
compare
(
groundtruth_boxes
,
anchors
)
match
=
self
.
_matcher
.
match
(
match_quality_matrix
,
**
params
)
match
=
self
.
_matcher
.
match
(
match_quality_matrix
,
valid_rows
=
tf
.
greater
(
groundtruth_weights
,
0
))
reg_targets
=
self
.
_create_regression_targets
(
anchors
,
groundtruth_boxes
,
match
)
...
...
research/object_detection/core/target_assigner_test.py
View file @
27b4acd4
...
...
@@ -495,8 +495,7 @@ class TargetAssignerTest(test_case.TestCase):
priors
,
boxes
,
groundtruth_labels
,
unmatched_class_label
=
unmatched_class_label
,
num_valid_rows
=
3
)
unmatched_class_label
=
unmatched_class_label
)
def
test_raises_error_on_invalid_groundtruth_labels
(
self
):
similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
...
...
@@ -520,8 +519,7 @@ class TargetAssignerTest(test_case.TestCase):
priors
,
boxes
,
groundtruth_labels
,
unmatched_class_label
=
unmatched_class_label
,
num_valid_rows
=
3
)
unmatched_class_label
=
unmatched_class_label
)
class
BatchTargetAssignerTest
(
test_case
.
TestCase
):
...
...
research/object_detection/data/fgvc_2854_classes_label_map.pbtxt
0 → 100644
View file @
27b4acd4
This source diff could not be displayed because it is too large. You can
view the blob
instead.
research/object_detection/data_decoders/tf_example_decoder.py
View file @
27b4acd4
...
...
@@ -19,9 +19,6 @@ protos for object detection.
"""
import
tensorflow
as
tf
from
tensorflow.python.ops
import
array_ops
from
tensorflow.python.ops
import
control_flow_ops
from
tensorflow.python.ops
import
math_ops
from
object_detection.core
import
data_decoder
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.protos
import
input_reader_pb2
...
...
@@ -30,14 +27,12 @@ from object_detection.utils import label_map_util
slim_example_decoder
=
tf
.
contrib
.
slim
.
tfexample_decoder
# TODO(lzc): keep LookupTensor and BackupHandler in sync with
# tf.contrib.slim.tfexample_decoder version.
class
LookupTensor
(
slim_example_decoder
.
Tensor
):
"""An ItemHandler that returns a parsed Tensor, the result of a lookup."""
class
_ClassTensorHandler
(
slim_example_decoder
.
Tensor
):
"""An ItemHandler to fetch class ids from class text."""
def
__init__
(
self
,
tensor_key
,
t
able
,
l
ab
el_map_proto_fi
le
,
shape_keys
=
None
,
shape
=
None
,
default_value
=
''
):
...
...
@@ -47,7 +42,8 @@ class LookupTensor(slim_example_decoder.Tensor):
Args:
tensor_key: the name of the `TFExample` feature to read the tensor from.
table: A tf.lookup table.
label_map_proto_file: File path to a text format LabelMapProto message
mapping class text to id.
shape_keys: Optional name or list of names of the TF-Example feature in
which the tensor shape is stored. If a list, then each corresponds to
one dimension of the shape.
...
...
@@ -59,16 +55,39 @@ class LookupTensor(slim_example_decoder.Tensor):
Raises:
ValueError: if both `shape_keys` and `shape` are specified.
"""
self
.
_table
=
table
super
(
LookupTensor
,
self
).
__init__
(
tensor_key
,
shape_keys
,
shape
,
default_value
)
name_to_id
=
label_map_util
.
get_label_map_dict
(
label_map_proto_file
,
use_display_name
=
False
)
# We use a default_value of -1, but we expect all labels to be contained
# in the label map.
name_to_id_table
=
tf
.
contrib
.
lookup
.
HashTable
(
initializer
=
tf
.
contrib
.
lookup
.
KeyValueTensorInitializer
(
keys
=
tf
.
constant
(
list
(
name_to_id
.
keys
())),
values
=
tf
.
constant
(
list
(
name_to_id
.
values
()),
dtype
=
tf
.
int64
)),
default_value
=-
1
)
display_name_to_id
=
label_map_util
.
get_label_map_dict
(
label_map_proto_file
,
use_display_name
=
True
)
# We use a default_value of -1, but we expect all labels to be contained
# in the label map.
display_name_to_id_table
=
tf
.
contrib
.
lookup
.
HashTable
(
initializer
=
tf
.
contrib
.
lookup
.
KeyValueTensorInitializer
(
keys
=
tf
.
constant
(
list
(
display_name_to_id
.
keys
())),
values
=
tf
.
constant
(
list
(
display_name_to_id
.
values
()),
dtype
=
tf
.
int64
)),
default_value
=-
1
)
self
.
_name_to_id_table
=
name_to_id_table
self
.
_display_name_to_id_table
=
display_name_to_id_table
super
(
_ClassTensorHandler
,
self
).
__init__
(
tensor_key
,
shape_keys
,
shape
,
default_value
)
def
tensors_to_item
(
self
,
keys_to_tensors
):
unmapped_tensor
=
super
(
LookupTensor
,
self
).
tensors_to_item
(
keys_to_tensors
)
return
self
.
_table
.
lookup
(
unmapped_tensor
)
unmapped_tensor
=
super
(
_ClassTensorHandler
,
self
).
tensors_to_item
(
keys_to_tensors
)
return
tf
.
maximum
(
self
.
_name_to_id_table
.
lookup
(
unmapped_tensor
),
self
.
_display_name_to_id_table
.
lookup
(
unmapped_tensor
))
class
BackupHandler
(
slim_example_decoder
.
ItemHandler
):
class
_
BackupHandler
(
slim_example_decoder
.
ItemHandler
):
"""An ItemHandler that tries two ItemHandlers in order."""
def
__init__
(
self
,
handler
,
backup
):
...
...
@@ -92,12 +111,12 @@ class BackupHandler(slim_example_decoder.ItemHandler):
'Backup handler is of type %s instead of ItemHandler'
%
type
(
backup
))
self
.
_handler
=
handler
self
.
_backup
=
backup
super
(
BackupHandler
,
self
).
__init__
(
handler
.
keys
+
backup
.
keys
)
super
(
_
BackupHandler
,
self
).
__init__
(
handler
.
keys
+
backup
.
keys
)
def
tensors_to_item
(
self
,
keys_to_tensors
):
item
=
self
.
_handler
.
tensors_to_item
(
keys_to_tensors
)
return
control_flow_ops
.
cond
(
pred
=
math_ops
.
equal
(
math_ops
.
reduce_prod
(
array_ops
.
shape
(
item
)),
0
),
return
tf
.
cond
(
pred
=
tf
.
equal
(
tf
.
reduce_prod
(
tf
.
shape
(
item
)),
0
),
true_fn
=
lambda
:
self
.
_backup
.
tensors_to_item
(
keys_to_tensors
),
false_fn
=
lambda
:
item
)
...
...
@@ -140,6 +159,9 @@ class TfExampleDecoder(data_decoder.DataDecoder):
input_reader_pb2.DEFAULT, input_reader_pb2.NUMERICAL, or
input_reader_pb2.PNG_MASKS.
"""
# TODO(rathodv): delete unused `use_display_name` argument once we change
# other decoders to handle label maps similarly.
del
use_display_name
self
.
keys_to_features
=
{
'image/encoded'
:
tf
.
FixedLenFeature
((),
tf
.
string
,
default_value
=
''
),
...
...
@@ -267,27 +289,18 @@ class TfExampleDecoder(data_decoder.DataDecoder):
else
:
raise
ValueError
(
'Did not recognize the `instance_mask_type` option.'
)
if
label_map_proto_file
:
label_map
=
label_map_util
.
get_label_map_dict
(
label_map_proto_file
,
use_display_name
)
# We use a default_value of -1, but we expect all labels to be contained
# in the label map.
table
=
tf
.
contrib
.
lookup
.
HashTable
(
initializer
=
tf
.
contrib
.
lookup
.
KeyValueTensorInitializer
(
keys
=
tf
.
constant
(
list
(
label_map
.
keys
())),
values
=
tf
.
constant
(
list
(
label_map
.
values
()),
dtype
=
tf
.
int64
)),
default_value
=-
1
)
# If the label_map_proto is provided, try to use it in conjunction with
# the class text, and fall back to a materialized ID.
# TODO(lzc): note that here we are using BackupHandler defined in this
# file(which is branching slim_example_decoder.BackupHandler). Need to
# switch back to slim_example_decoder.BackupHandler once tf 1.5 becomes
# more popular.
label_handler
=
BackupHandler
(
LookupTensor
(
'image/object/class/text'
,
table
,
default_value
=
''
),
label_handler
=
_BackupHandler
(
_ClassTensorHandler
(
'image/object/class/text'
,
label_map_proto_file
,
default_value
=
''
),
slim_example_decoder
.
Tensor
(
'image/object/class/label'
))
image_label_handler
=
BackupHandler
(
LookupTensor
(
fields
.
TfExampleFields
.
image_class_text
,
table
,
default_value
=
''
),
image_label_handler
=
_BackupHandler
(
_ClassTensorHandler
(
fields
.
TfExampleFields
.
image_class_text
,
label_map_proto_file
,
default_value
=
''
),
slim_example_decoder
.
Tensor
(
fields
.
TfExampleFields
.
image_class_label
))
else
:
label_handler
=
slim_example_decoder
.
Tensor
(
'image/object/class/label'
)
...
...
@@ -309,6 +322,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
A dictionary of the following tensors.
fields.InputDataFields.image - 3D uint8 tensor of shape [None, None, 3]
containing image.
fields.InputDataFields.original_image_spatial_shape - 1D int32 tensor of
shape [2] containing shape of the image.
fields.InputDataFields.source_id - string tensor containing original
image id.
fields.InputDataFields.key - string tensor with unique sha256 hash key.
...
...
@@ -352,6 +367,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
is_crowd
=
fields
.
InputDataFields
.
groundtruth_is_crowd
tensor_dict
[
is_crowd
]
=
tf
.
cast
(
tensor_dict
[
is_crowd
],
dtype
=
tf
.
bool
)
tensor_dict
[
fields
.
InputDataFields
.
image
].
set_shape
([
None
,
None
,
3
])
tensor_dict
[
fields
.
InputDataFields
.
original_image_spatial_shape
]
=
tf
.
shape
(
tensor_dict
[
fields
.
InputDataFields
.
image
])[:
2
]
tensor_dict
[
fields
.
InputDataFields
.
num_groundtruth_boxes
]
=
tf
.
shape
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
])[
0
]
...
...
research/object_detection/data_decoders/tf_example_decoder_test.py
View file @
27b4acd4
...
...
@@ -12,24 +12,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.data_decoders.tf_example_decoder."""
import
os
import
numpy
as
np
import
tensorflow
as
tf
from
tensorflow.core.example
import
example_pb2
from
tensorflow.core.example
import
feature_pb2
from
tensorflow.python.framework
import
constant_op
from
tensorflow.python.framework
import
dtypes
from
tensorflow.python.framework
import
test_util
from
tensorflow.python.ops
import
array_ops
from
tensorflow.python.ops
import
lookup_ops
from
tensorflow.python.ops
import
parsing_ops
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.data_decoders
import
tf_example_decoder
from
object_detection.protos
import
input_reader_pb2
from
object_detection.utils
import
dataset_util
slim_example_decoder
=
tf
.
contrib
.
slim
.
tfexample_decoder
...
...
@@ -56,25 +49,6 @@ class TfExampleDecoderTest(tf.test.TestCase):
raise
ValueError
(
'Invalid encoding type.'
)
return
image_decoded
def
_Int64Feature
(
self
,
value
):
return
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
value
))
def
_FloatFeature
(
self
,
value
):
return
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
value
))
def
_BytesFeature
(
self
,
value
):
if
isinstance
(
value
,
list
):
return
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
value
))
return
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
[
value
]))
def
_Int64FeatureFromList
(
self
,
ndarray
):
return
feature_pb2
.
Feature
(
int64_list
=
feature_pb2
.
Int64List
(
value
=
ndarray
.
flatten
().
tolist
()))
def
_BytesFeatureFromList
(
self
,
ndarray
):
values
=
ndarray
.
flatten
().
tolist
()
return
feature_pb2
.
Feature
(
bytes_list
=
feature_pb2
.
BytesList
(
value
=
values
))
def
testDecodeAdditionalChannels
(
self
):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
...
...
@@ -88,14 +62,14 @@ class TfExampleDecoderTest(tf.test.TestCase):
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
self
.
_B
ytes
F
eature
(
encoded_jpeg
),
dataset_util
.
b
ytes
_f
eature
(
encoded_jpeg
),
'image/additional_channels/encoded'
:
self
.
_BytesFeatureFromList
(
np
.
array
(
[
encoded_additional_channel
]
*
2
)
)
,
dataset_util
.
bytes_list_feature
(
[
encoded_additional_channel
]
*
2
),
'image/format'
:
self
.
_B
ytes
F
eature
(
'jpeg'
),
dataset_util
.
b
ytes
_f
eature
(
'jpeg'
),
'image/source_id'
:
self
.
_B
ytes
F
eature
(
'image_id'
),
dataset_util
.
b
ytes
_f
eature
(
'image_id'
),
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
(
...
...
@@ -108,118 +82,44 @@ class TfExampleDecoderTest(tf.test.TestCase):
np
.
concatenate
([
decoded_additional_channel
]
*
2
,
axis
=
2
),
tensor_dict
[
fields
.
InputDataFields
.
image_additional_channels
])
def
testDecodeExampleWithBranchedBackupHandler
(
self
):
example1
=
example_pb2
.
Example
(
features
=
feature_pb2
.
Features
(
feature
=
{
'image/object/class/text'
:
self
.
_BytesFeatureFromList
(
np
.
array
([
'cat'
,
'dog'
,
'guinea pig'
])),
'image/object/class/label'
:
self
.
_Int64FeatureFromList
(
np
.
array
([
42
,
10
,
900
]))
}))
example2
=
example_pb2
.
Example
(
features
=
feature_pb2
.
Features
(
feature
=
{
'image/object/class/text'
:
self
.
_BytesFeatureFromList
(
np
.
array
([
'cat'
,
'dog'
,
'guinea pig'
])),
}))
example3
=
example_pb2
.
Example
(
features
=
feature_pb2
.
Features
(
feature
=
{
'image/object/class/label'
:
self
.
_Int64FeatureFromList
(
np
.
array
([
42
,
10
,
901
]))
}))
# 'dog' -> 0, 'guinea pig' -> 1, 'cat' -> 2
table
=
lookup_ops
.
index_table_from_tensor
(
constant_op
.
constant
([
'dog'
,
'guinea pig'
,
'cat'
]))
keys_to_features
=
{
'image/object/class/text'
:
parsing_ops
.
VarLenFeature
(
dtypes
.
string
),
'image/object/class/label'
:
parsing_ops
.
VarLenFeature
(
dtypes
.
int64
),
}
backup_handler
=
tf_example_decoder
.
BackupHandler
(
handler
=
slim_example_decoder
.
Tensor
(
'image/object/class/label'
),
backup
=
tf_example_decoder
.
LookupTensor
(
'image/object/class/text'
,
table
))
items_to_handlers
=
{
'labels'
:
backup_handler
,
}
decoder
=
slim_example_decoder
.
TFExampleDecoder
(
keys_to_features
,
items_to_handlers
)
obtained_class_ids_each_example
=
[]
with
self
.
test_session
()
as
sess
:
sess
.
run
(
lookup_ops
.
tables_initializer
())
for
example
in
[
example1
,
example2
,
example3
]:
serialized_example
=
array_ops
.
reshape
(
example
.
SerializeToString
(),
shape
=
[])
obtained_class_ids_each_example
.
append
(
decoder
.
decode
(
serialized_example
)[
0
].
eval
())
self
.
assertAllClose
([
42
,
10
,
900
],
obtained_class_ids_each_example
[
0
])
self
.
assertAllClose
([
2
,
0
,
1
],
obtained_class_ids_each_example
[
1
])
self
.
assertAllClose
([
42
,
10
,
901
],
obtained_class_ids_each_example
[
2
])
def
testDecodeExampleWithBranchedLookup
(
self
):
example
=
example_pb2
.
Example
(
features
=
feature_pb2
.
Features
(
feature
=
{
'image/object/class/text'
:
self
.
_BytesFeatureFromList
(
np
.
array
([
'cat'
,
'dog'
,
'guinea pig'
])),
}))
serialized_example
=
example
.
SerializeToString
()
# 'dog' -> 0, 'guinea pig' -> 1, 'cat' -> 2
table
=
lookup_ops
.
index_table_from_tensor
(
constant_op
.
constant
([
'dog'
,
'guinea pig'
,
'cat'
]))
with
self
.
test_session
()
as
sess
:
sess
.
run
(
lookup_ops
.
tables_initializer
())
serialized_example
=
array_ops
.
reshape
(
serialized_example
,
shape
=
[])
keys_to_features
=
{
'image/object/class/text'
:
parsing_ops
.
VarLenFeature
(
dtypes
.
string
),
}
items_to_handlers
=
{
'labels'
:
tf_example_decoder
.
LookupTensor
(
'image/object/class/text'
,
table
),
}
decoder
=
slim_example_decoder
.
TFExampleDecoder
(
keys_to_features
,
items_to_handlers
)
obtained_class_ids
=
decoder
.
decode
(
serialized_example
)[
0
].
eval
()
self
.
assertAllClose
([
2
,
0
,
1
],
obtained_class_ids
)
def
testDecodeJpegImage
(
self
):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
decoded_jpeg
=
self
.
_DecodeImage
(
encoded_jpeg
)
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
'image/source_id'
:
self
.
_BytesFeature
(
'image_id'
),
})).
SerializeToString
()
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
dataset_util
.
bytes_feature
(
encoded_jpeg
),
'image/format'
:
dataset_util
.
bytes_feature
(
'jpeg'
),
'image/source_id'
:
dataset_util
.
bytes_feature
(
'image_id'
),
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
image
].
get_shape
().
as_list
()),
[
None
,
None
,
3
])
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
original_image_spatial_shape
].
get_shape
().
as_list
()),
[
2
])
with
self
.
test_session
()
as
sess
:
tensor_dict
=
sess
.
run
(
tensor_dict
)
self
.
assertAllEqual
(
decoded_jpeg
,
tensor_dict
[
fields
.
InputDataFields
.
image
])
self
.
assertAllEqual
([
4
,
5
],
tensor_dict
[
fields
.
InputDataFields
.
original_image_spatial_shape
])
self
.
assertEqual
(
'image_id'
,
tensor_dict
[
fields
.
InputDataFields
.
source_id
])
def
testDecodeImageKeyAndFilename
(
self
):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
'image/key/sha256'
:
self
.
_BytesFeature
(
'abc'
),
'image/filename'
:
self
.
_BytesFeature
(
'filename'
)
})).
SerializeToString
()
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
dataset_util
.
bytes_feature
(
encoded_jpeg
),
'image/key/sha256'
:
dataset_util
.
bytes_feature
(
'abc'
),
'image/filename'
:
dataset_util
.
bytes_feature
(
'filename'
)
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
...
...
@@ -234,21 +134,28 @@ class TfExampleDecoderTest(tf.test.TestCase):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_png
=
self
.
_EncodeImage
(
image_tensor
,
encoding_type
=
'png'
)
decoded_png
=
self
.
_DecodeImage
(
encoded_png
,
encoding_type
=
'png'
)
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
self
.
_BytesFeature
(
encoded_png
),
'image/format'
:
self
.
_BytesFeature
(
'png'
),
'image/source_id'
:
self
.
_BytesFeature
(
'image_id'
)
})).
SerializeToString
()
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
dataset_util
.
bytes_feature
(
encoded_png
),
'image/format'
:
dataset_util
.
bytes_feature
(
'png'
),
'image/source_id'
:
dataset_util
.
bytes_feature
(
'image_id'
)
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
image
].
get_shape
().
as_list
()),
[
None
,
None
,
3
])
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
original_image_spatial_shape
].
get_shape
().
as_list
()),
[
2
])
with
self
.
test_session
()
as
sess
:
tensor_dict
=
sess
.
run
(
tensor_dict
)
self
.
assertAllEqual
(
decoded_png
,
tensor_dict
[
fields
.
InputDataFields
.
image
])
self
.
assertAllEqual
([
4
,
5
],
tensor_dict
[
fields
.
InputDataFields
.
original_image_spatial_shape
])
self
.
assertEqual
(
'image_id'
,
tensor_dict
[
fields
.
InputDataFields
.
source_id
])
def
testDecodePngInstanceMasks
(
self
):
...
...
@@ -265,9 +172,12 @@ class TfExampleDecoderTest(tf.test.TestCase):
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
'image/object/mask'
:
self
.
_BytesFeature
(
encoded_masks
)
'image/encoded'
:
dataset_util
.
bytes_feature
(
encoded_jpeg
),
'image/format'
:
dataset_util
.
bytes_feature
(
'jpeg'
),
'image/object/mask'
:
dataset_util
.
bytes_list_feature
(
encoded_masks
)
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
(
...
...
@@ -288,11 +198,16 @@ class TfExampleDecoderTest(tf.test.TestCase):
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
'image/object/mask'
:
self
.
_BytesFeature
(
encoded_masks
),
'image/height'
:
self
.
_Int64Feature
([
10
]),
'image/width'
:
self
.
_Int64Feature
([
10
]),
'image/encoded'
:
dataset_util
.
bytes_feature
(
encoded_jpeg
),
'image/format'
:
dataset_util
.
bytes_feature
(
'jpeg'
),
'image/object/mask'
:
dataset_util
.
bytes_list_feature
(
encoded_masks
),
'image/height'
:
dataset_util
.
int64_feature
(
10
),
'image/width'
:
dataset_util
.
int64_feature
(
10
),
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
(
...
...
@@ -312,25 +227,33 @@ class TfExampleDecoderTest(tf.test.TestCase):
bbox_xmins
=
[
1.0
,
5.0
]
bbox_ymaxs
=
[
2.0
,
6.0
]
bbox_xmaxs
=
[
3.0
,
7.0
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
'image/object/bbox/ymin'
:
self
.
_FloatFeature
(
bbox_ymins
),
'image/object/bbox/xmin'
:
self
.
_FloatFeature
(
bbox_xmins
),
'image/object/bbox/ymax'
:
self
.
_FloatFeature
(
bbox_ymaxs
),
'image/object/bbox/xmax'
:
self
.
_FloatFeature
(
bbox_xmaxs
),
})).
SerializeToString
()
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
dataset_util
.
bytes_feature
(
encoded_jpeg
),
'image/format'
:
dataset_util
.
bytes_feature
(
'jpeg'
),
'image/object/bbox/ymin'
:
dataset_util
.
float_list_feature
(
bbox_ymins
),
'image/object/bbox/xmin'
:
dataset_util
.
float_list_feature
(
bbox_xmins
),
'image/object/bbox/ymax'
:
dataset_util
.
float_list_feature
(
bbox_ymaxs
),
'image/object/bbox/xmax'
:
dataset_util
.
float_list_feature
(
bbox_xmaxs
),
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
.
get_shape
().
as_list
()),
[
None
,
4
])
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
.
get_shape
().
as_list
()),
[
None
,
4
])
with
self
.
test_session
()
as
sess
:
tensor_dict
=
sess
.
run
(
tensor_dict
)
expected_boxes
=
np
.
vstack
([
bbox_ymins
,
bbox_xmins
,
bbox_ymaxs
,
bbox_xmaxs
]).
transpose
()
expected_boxes
=
np
.
vstack
([
bbox_ymins
,
bbox_xmins
,
bbox_ymaxs
,
bbox_xmaxs
]).
transpose
()
self
.
assertAllEqual
(
expected_boxes
,
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
])
self
.
assertAllEqual
(
...
...
@@ -346,30 +269,40 @@ class TfExampleDecoderTest(tf.test.TestCase):
bbox_xmaxs
=
[
3.0
,
7.0
]
keypoint_ys
=
[
0.0
,
1.0
,
2.0
,
3.0
,
4.0
,
5.0
]
keypoint_xs
=
[
1.0
,
2.0
,
3.0
,
4.0
,
5.0
,
6.0
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
'image/object/bbox/ymin'
:
self
.
_FloatFeature
(
bbox_ymins
),
'image/object/bbox/xmin'
:
self
.
_FloatFeature
(
bbox_xmins
),
'image/object/bbox/ymax'
:
self
.
_FloatFeature
(
bbox_ymaxs
),
'image/object/bbox/xmax'
:
self
.
_FloatFeature
(
bbox_xmaxs
),
'image/object/keypoint/y'
:
self
.
_FloatFeature
(
keypoint_ys
),
'image/object/keypoint/x'
:
self
.
_FloatFeature
(
keypoint_xs
),
})).
SerializeToString
()
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
dataset_util
.
bytes_feature
(
encoded_jpeg
),
'image/format'
:
dataset_util
.
bytes_feature
(
'jpeg'
),
'image/object/bbox/ymin'
:
dataset_util
.
float_list_feature
(
bbox_ymins
),
'image/object/bbox/xmin'
:
dataset_util
.
float_list_feature
(
bbox_xmins
),
'image/object/bbox/ymax'
:
dataset_util
.
float_list_feature
(
bbox_ymaxs
),
'image/object/bbox/xmax'
:
dataset_util
.
float_list_feature
(
bbox_xmaxs
),
'image/object/keypoint/y'
:
dataset_util
.
float_list_feature
(
keypoint_ys
),
'image/object/keypoint/x'
:
dataset_util
.
float_list_feature
(
keypoint_xs
),
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
(
num_keypoints
=
3
)
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
.
get_shape
().
as_list
()),
[
None
,
4
])
self
.
assertAllEqual
(
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_keypoints
].
get_shape
()
.
as_list
()),
[
2
,
3
,
2
])
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
.
get_shape
().
as_list
()),
[
None
,
4
])
self
.
assertAllEqual
(
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_keypoints
].
get_shape
()
.
as_list
()),
[
2
,
3
,
2
])
with
self
.
test_session
()
as
sess
:
tensor_dict
=
sess
.
run
(
tensor_dict
)
expected_boxes
=
np
.
vstack
([
bbox_ymins
,
bbox_xmins
,
bbox_ymaxs
,
bbox_xmaxs
]).
transpose
()
expected_boxes
=
np
.
vstack
([
bbox_ymins
,
bbox_xmins
,
bbox_ymaxs
,
bbox_xmaxs
]).
transpose
()
self
.
assertAllEqual
(
expected_boxes
,
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
])
self
.
assertAllEqual
(
...
...
@@ -377,9 +310,9 @@ class TfExampleDecoderTest(tf.test.TestCase):
expected_keypoints
=
(
np
.
vstack
([
keypoint_ys
,
keypoint_xs
]).
transpose
().
reshape
((
2
,
3
,
2
)))
self
.
assertAllEqual
(
expected_keypoints
,
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_keypoints
])
self
.
assertAllEqual
(
expected_keypoints
,
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_keypoints
])
def
testDecodeDefaultGroundtruthWeights
(
self
):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
...
...
@@ -388,20 +321,28 @@ class TfExampleDecoderTest(tf.test.TestCase):
bbox_xmins
=
[
1.0
,
5.0
]
bbox_ymaxs
=
[
2.0
,
6.0
]
bbox_xmaxs
=
[
3.0
,
7.0
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
'image/object/bbox/ymin'
:
self
.
_FloatFeature
(
bbox_ymins
),
'image/object/bbox/xmin'
:
self
.
_FloatFeature
(
bbox_xmins
),
'image/object/bbox/ymax'
:
self
.
_FloatFeature
(
bbox_ymaxs
),
'image/object/bbox/xmax'
:
self
.
_FloatFeature
(
bbox_xmaxs
),
})).
SerializeToString
()
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
dataset_util
.
bytes_feature
(
encoded_jpeg
),
'image/format'
:
dataset_util
.
bytes_feature
(
'jpeg'
),
'image/object/bbox/ymin'
:
dataset_util
.
float_list_feature
(
bbox_ymins
),
'image/object/bbox/xmin'
:
dataset_util
.
float_list_feature
(
bbox_xmins
),
'image/object/bbox/ymax'
:
dataset_util
.
float_list_feature
(
bbox_ymaxs
),
'image/object/bbox/xmax'
:
dataset_util
.
float_list_feature
(
bbox_xmaxs
),
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
.
get_shape
().
as_list
()),
[
None
,
4
])
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
.
get_shape
().
as_list
()),
[
None
,
4
])
with
self
.
test_session
()
as
sess
:
tensor_dict
=
sess
.
run
(
tensor_dict
)
...
...
@@ -414,18 +355,22 @@ class TfExampleDecoderTest(tf.test.TestCase):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
bbox_classes
=
[
0
,
1
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
'image/object/class/label'
:
self
.
_Int64Feature
(
bbox_classes
),
})).
SerializeToString
()
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
dataset_util
.
bytes_feature
(
encoded_jpeg
),
'image/format'
:
dataset_util
.
bytes_feature
(
'jpeg'
),
'image/object/class/label'
:
dataset_util
.
int64_list_feature
(
bbox_classes
),
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
].
get_shape
().
as_list
()),
[
2
])
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
]
.
get_shape
().
as_list
()),
[
2
])
with
self
.
test_session
()
as
sess
:
tensor_dict
=
sess
.
run
(
tensor_dict
)
...
...
@@ -437,11 +382,16 @@ class TfExampleDecoderTest(tf.test.TestCase):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
bbox_classes
=
[
1
,
2
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
'image/object/class/label'
:
self
.
_Int64Feature
(
bbox_classes
),
})).
SerializeToString
()
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
dataset_util
.
bytes_feature
(
encoded_jpeg
),
'image/format'
:
dataset_util
.
bytes_feature
(
'jpeg'
),
'image/object/class/label'
:
dataset_util
.
int64_list_feature
(
bbox_classes
),
})).
SerializeToString
()
label_map_string
=
"""
item {
id:1
...
...
@@ -460,9 +410,8 @@ class TfExampleDecoderTest(tf.test.TestCase):
label_map_proto_file
=
label_map_path
)
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
].
get_shape
().
as_list
()),
[
None
])
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
]
.
get_shape
().
as_list
()),
[
None
])
init
=
tf
.
tables_initializer
()
with
self
.
test_session
()
as
sess
:
...
...
@@ -480,11 +429,11 @@ class TfExampleDecoderTest(tf.test.TestCase):
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
self
.
_B
ytes
F
eature
(
encoded_jpeg
),
dataset_util
.
b
ytes
_f
eature
(
encoded_jpeg
),
'image/format'
:
self
.
_B
ytes
F
eature
(
'jpeg'
),
dataset_util
.
b
ytes
_f
eature
(
'jpeg'
),
'image/object/class/text'
:
self
.
_BytesF
eature
(
bbox_classes_text
),
dataset_util
.
bytes_list_f
eature
(
bbox_classes_text
),
})).
SerializeToString
()
label_map_string
=
"""
...
...
@@ -514,7 +463,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
self
.
assertAllEqual
([
2
,
-
1
],
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
])
def
testDecodeObjectLabelWithMapping
(
self
):
def
testDecodeObjectLabelWithMapping
WithDisplayName
(
self
):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
bbox_classes_text
=
[
'cat'
,
'dog'
]
...
...
@@ -522,11 +471,53 @@ class TfExampleDecoderTest(tf.test.TestCase):
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
self
.
_B
ytes
F
eature
(
encoded_jpeg
),
dataset_util
.
b
ytes
_f
eature
(
encoded_jpeg
),
'image/format'
:
self
.
_B
ytes
F
eature
(
'jpeg'
),
dataset_util
.
b
ytes
_f
eature
(
'jpeg'
),
'image/object/class/text'
:
self
.
_BytesFeature
(
bbox_classes_text
),
dataset_util
.
bytes_list_feature
(
bbox_classes_text
),
})).
SerializeToString
()
label_map_string
=
"""
item {
id:3
display_name:'cat'
}
item {
id:1
display_name:'dog'
}
"""
label_map_path
=
os
.
path
.
join
(
self
.
get_temp_dir
(),
'label_map.pbtxt'
)
with
tf
.
gfile
.
Open
(
label_map_path
,
'wb'
)
as
f
:
f
.
write
(
label_map_string
)
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
(
label_map_proto_file
=
label_map_path
)
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
]
.
get_shape
().
as_list
()),
[
None
])
with
self
.
test_session
()
as
sess
:
sess
.
run
(
tf
.
tables_initializer
())
tensor_dict
=
sess
.
run
(
tensor_dict
)
self
.
assertAllEqual
([
3
,
1
],
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
])
def
testDecodeObjectLabelWithMappingWithName
(
self
):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
bbox_classes_text
=
[
'cat'
,
'dog'
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
dataset_util
.
bytes_feature
(
encoded_jpeg
),
'image/format'
:
dataset_util
.
bytes_feature
(
'jpeg'
),
'image/object/class/text'
:
dataset_util
.
bytes_list_feature
(
bbox_classes_text
),
})).
SerializeToString
()
label_map_string
=
"""
...
...
@@ -561,17 +552,22 @@ class TfExampleDecoderTest(tf.test.TestCase):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
object_area
=
[
100.
,
174.
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
'image/object/area'
:
self
.
_FloatFeature
(
object_area
),
})).
SerializeToString
()
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
dataset_util
.
bytes_feature
(
encoded_jpeg
),
'image/format'
:
dataset_util
.
bytes_feature
(
'jpeg'
),
'image/object/area'
:
dataset_util
.
float_list_feature
(
object_area
),
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_area
]
.
get_shape
().
as_list
()),
[
2
])
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_area
]
.
get_shape
().
as_list
()),
[
2
])
with
self
.
test_session
()
as
sess
:
tensor_dict
=
sess
.
run
(
tensor_dict
)
...
...
@@ -583,67 +579,81 @@ class TfExampleDecoderTest(tf.test.TestCase):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
object_is_crowd
=
[
0
,
1
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
'image/object/is_crowd'
:
self
.
_Int64Feature
(
object_is_crowd
),
})).
SerializeToString
()
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
dataset_util
.
bytes_feature
(
encoded_jpeg
),
'image/format'
:
dataset_util
.
bytes_feature
(
'jpeg'
),
'image/object/is_crowd'
:
dataset_util
.
int64_list_feature
(
object_is_crowd
),
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
(
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_is_crowd
].
get_shape
()
.
as_list
()),
[
2
])
self
.
assertAllEqual
(
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_is_crowd
].
get_shape
()
.
as_list
()),
[
2
])
with
self
.
test_session
()
as
sess
:
tensor_dict
=
sess
.
run
(
tensor_dict
)
self
.
assertAllEqual
(
[
bool
(
item
)
for
item
in
object_is_crowd
],
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_is_crowd
])
self
.
assertAllEqual
(
[
bool
(
item
)
for
item
in
object_is_crowd
],
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_is_crowd
])
@
test_util
.
enable_c_shapes
def
testDecodeObjectDifficult
(
self
):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
object_difficult
=
[
0
,
1
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
'image/object/difficult'
:
self
.
_Int64Feature
(
object_difficult
),
})).
SerializeToString
()
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
dataset_util
.
bytes_feature
(
encoded_jpeg
),
'image/format'
:
dataset_util
.
bytes_feature
(
'jpeg'
),
'image/object/difficult'
:
dataset_util
.
int64_list_feature
(
object_difficult
),
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
(
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_difficult
].
get_shape
()
.
as_list
()),
[
2
])
self
.
assertAllEqual
(
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_difficult
].
get_shape
()
.
as_list
()),
[
2
])
with
self
.
test_session
()
as
sess
:
tensor_dict
=
sess
.
run
(
tensor_dict
)
self
.
assertAllEqual
(
[
bool
(
item
)
for
item
in
object_difficult
],
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_difficult
])
self
.
assertAllEqual
(
[
bool
(
item
)
for
item
in
object_difficult
],
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_difficult
])
@
test_util
.
enable_c_shapes
def
testDecodeObjectGroupOf
(
self
):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
object_group_of
=
[
0
,
1
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
'image/object/group_of'
:
self
.
_Int64Feature
(
object_group_of
),
})).
SerializeToString
()
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
dataset_util
.
bytes_feature
(
encoded_jpeg
),
'image/format'
:
dataset_util
.
bytes_feature
(
'jpeg'
),
'image/object/group_of'
:
dataset_util
.
int64_list_feature
(
object_group_of
),
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
(
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_group_of
].
get_shape
()
.
as_list
()),
[
2
])
self
.
assertAllEqual
(
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_group_of
].
get_shape
()
.
as_list
()),
[
2
])
with
self
.
test_session
()
as
sess
:
tensor_dict
=
sess
.
run
(
tensor_dict
)
...
...
@@ -655,25 +665,27 @@ class TfExampleDecoderTest(tf.test.TestCase):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
object_weights
=
[
0.75
,
1.0
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
'image/object/weight'
:
self
.
_FloatFeature
(
object_weights
),
})).
SerializeToString
()
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
dataset_util
.
bytes_feature
(
encoded_jpeg
),
'image/format'
:
dataset_util
.
bytes_feature
(
'jpeg'
),
'image/object/weight'
:
dataset_util
.
float_list_feature
(
object_weights
),
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_weights
].
get_shape
().
as_list
()),
[
None
])
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_weights
]
.
get_shape
().
as_list
()),
[
None
])
with
self
.
test_session
()
as
sess
:
tensor_dict
=
sess
.
run
(
tensor_dict
)
self
.
assertAllEqual
(
object_weights
,
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_weights
])
self
.
assertAllEqual
(
object_weights
,
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_weights
])
@
test_util
.
enable_c_shapes
def
testDecodeInstanceSegmentation
(
self
):
...
...
@@ -682,15 +694,13 @@ class TfExampleDecoderTest(tf.test.TestCase):
image_width
=
3
# Randomly generate image.
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
image_height
,
image_width
,
3
)).
astype
(
np
.
uint8
)
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
image_height
,
image_width
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
# Randomly generate instance segmentation masks.
instance_masks
=
(
np
.
random
.
randint
(
2
,
size
=
(
num_instances
,
image_height
,
np
.
random
.
randint
(
2
,
size
=
(
num_instances
,
image_height
,
image_width
)).
astype
(
np
.
float32
))
instance_masks_flattened
=
np
.
reshape
(
instance_masks
,
[
-
1
])
...
...
@@ -698,25 +708,32 @@ class TfExampleDecoderTest(tf.test.TestCase):
object_classes
=
np
.
random
.
randint
(
100
,
size
=
(
num_instances
)).
astype
(
np
.
int64
)
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
'image/height'
:
self
.
_Int64Feature
([
image_height
]),
'image/width'
:
self
.
_Int64Feature
([
image_width
]),
'image/object/mask'
:
self
.
_FloatFeature
(
instance_masks_flattened
),
'image/object/class/label'
:
self
.
_Int64Feature
(
object_classes
)})).
SerializeToString
()
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
dataset_util
.
bytes_feature
(
encoded_jpeg
),
'image/format'
:
dataset_util
.
bytes_feature
(
'jpeg'
),
'image/height'
:
dataset_util
.
int64_feature
(
image_height
),
'image/width'
:
dataset_util
.
int64_feature
(
image_width
),
'image/object/mask'
:
dataset_util
.
float_list_feature
(
instance_masks_flattened
),
'image/object/class/label'
:
dataset_util
.
int64_list_feature
(
object_classes
)
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
(
load_instance_masks
=
True
)
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
(
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_instance_masks
]
.
get_shape
().
as_list
()),
[
4
,
5
,
3
])
self
.
assertAllEqual
(
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_instance_masks
]
.
get_shape
().
as_list
()),
[
4
,
5
,
3
])
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
].
get_shape
().
as_list
()),
[
4
])
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
]
.
get_shape
().
as_list
()),
[
4
])
with
self
.
test_session
()
as
sess
:
tensor_dict
=
sess
.
run
(
tensor_dict
)
...
...
@@ -724,24 +741,21 @@ class TfExampleDecoderTest(tf.test.TestCase):
self
.
assertAllEqual
(
instance_masks
.
astype
(
np
.
float32
),
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_instance_masks
])
self
.
assertAllEqual
(
object_classes
,
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
])
self
.
assertAllEqual
(
object_classes
,
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
])
def
testInstancesNotAvailableByDefault
(
self
):
num_instances
=
4
image_height
=
5
image_width
=
3
# Randomly generate image.
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
image_height
,
image_width
,
3
)).
astype
(
np
.
uint8
)
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
image_height
,
image_width
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
# Randomly generate instance segmentation masks.
instance_masks
=
(
np
.
random
.
randint
(
2
,
size
=
(
num_instances
,
image_height
,
np
.
random
.
randint
(
2
,
size
=
(
num_instances
,
image_height
,
image_width
)).
astype
(
np
.
float32
))
instance_masks_flattened
=
np
.
reshape
(
instance_masks
,
[
-
1
])
...
...
@@ -749,18 +763,26 @@ class TfExampleDecoderTest(tf.test.TestCase):
object_classes
=
np
.
random
.
randint
(
100
,
size
=
(
num_instances
)).
astype
(
np
.
int64
)
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
'image/height'
:
self
.
_Int64Feature
([
image_height
]),
'image/width'
:
self
.
_Int64Feature
([
image_width
]),
'image/object/mask'
:
self
.
_FloatFeature
(
instance_masks_flattened
),
'image/object/class/label'
:
self
.
_Int64Feature
(
object_classes
)})).
SerializeToString
()
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
dataset_util
.
bytes_feature
(
encoded_jpeg
),
'image/format'
:
dataset_util
.
bytes_feature
(
'jpeg'
),
'image/height'
:
dataset_util
.
int64_feature
(
image_height
),
'image/width'
:
dataset_util
.
int64_feature
(
image_width
),
'image/object/mask'
:
dataset_util
.
float_list_feature
(
instance_masks_flattened
),
'image/object/class/label'
:
dataset_util
.
int64_list_feature
(
object_classes
)
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertTrue
(
fields
.
InputDataFields
.
groundtruth_instance_masks
not
in
tensor_dict
)
self
.
assertTrue
(
fields
.
InputDataFields
.
groundtruth_instance_masks
not
in
tensor_dict
)
def
testDecodeImageLabels
(
self
):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
...
...
@@ -768,9 +790,9 @@ class TfExampleDecoderTest(tf.test.TestCase):
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
self
.
_B
ytes
F
eature
(
encoded_jpeg
),
'image/format'
:
self
.
_B
ytes
F
eature
(
'jpeg'
),
'image/class/label'
:
self
.
_Int64F
eature
([
1
,
2
]),
'image/encoded'
:
dataset_util
.
b
ytes
_f
eature
(
encoded_jpeg
),
'image/format'
:
dataset_util
.
b
ytes
_f
eature
(
'jpeg'
),
'image/class/label'
:
dataset_util
.
int64_list_f
eature
([
1
,
2
]),
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
...
...
@@ -784,9 +806,12 @@ class TfExampleDecoderTest(tf.test.TestCase):
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
'image/class/text'
:
self
.
_BytesFeature
([
'dog'
,
'cat'
]),
'image/encoded'
:
dataset_util
.
bytes_feature
(
encoded_jpeg
),
'image/format'
:
dataset_util
.
bytes_feature
(
'jpeg'
),
'image/class/text'
:
dataset_util
.
bytes_list_feature
([
'dog'
,
'cat'
]),
})).
SerializeToString
()
label_map_string
=
"""
item {
...
...
research/object_detection/dataset_tools/create_coco_tf_record.py
View file @
27b4acd4
...
...
@@ -177,8 +177,8 @@ def create_tf_example(image,
dataset_util
.
float_list_feature
(
ymin
),
'image/object/bbox/ymax'
:
dataset_util
.
float_list_feature
(
ymax
),
'image/object/class/
label
'
:
dataset_util
.
int64
_list_feature
(
category_
id
s
),
'image/object/class/
text
'
:
dataset_util
.
bytes
_list_feature
(
category_
name
s
),
'image/object/is_crowd'
:
dataset_util
.
int64_list_feature
(
is_crowd
),
'image/object/area'
:
...
...
research/object_detection/dataset_tools/create_coco_tf_record_test.py
View file @
27b4acd4
...
...
@@ -106,6 +106,9 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/object/bbox/ymax'
].
float_list
.
value
,
[
0.75
])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/object/class/text'
].
bytes_list
.
value
,
[
'cat'
])
def
test_create_tf_example_with_instance_masks
(
self
):
image_file_name
=
'tmp_image.jpg'
...
...
@@ -169,6 +172,9 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/object/bbox/ymax'
].
float_list
.
value
,
[
1
])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/object/class/text'
].
bytes_list
.
value
,
[
'dog'
])
encoded_mask_pngs
=
[
io
.
BytesIO
(
encoded_masks
)
for
encoded_masks
in
example
.
features
.
feature
[
'image/object/mask'
].
bytes_list
.
value
...
...
research/object_detection/eval_util.py
View file @
27b4acd4
...
...
@@ -14,7 +14,6 @@
# ==============================================================================
"""Common utility functions for evaluation."""
import
collections
import
logging
import
os
import
time
...
...
@@ -53,15 +52,15 @@ def write_metrics(metrics, global_step, summary_dir):
global_step: Global step at which the metrics are computed.
summary_dir: Directory to write tensorflow summaries to.
"""
logging
.
info
(
'Writing metrics to tf summary.'
)
tf
.
logging
.
info
(
'Writing metrics to tf summary.'
)
summary_writer
=
tf
.
summary
.
FileWriterCache
.
get
(
summary_dir
)
for
key
in
sorted
(
metrics
):
summary
=
tf
.
Summary
(
value
=
[
tf
.
Summary
.
Value
(
tag
=
key
,
simple_value
=
metrics
[
key
]),
])
summary_writer
.
add_summary
(
summary
,
global_step
)
logging
.
info
(
'%s: %f'
,
key
,
metrics
[
key
])
logging
.
info
(
'Metrics written to tf summary.'
)
tf
.
logging
.
info
(
'%s: %f'
,
key
,
metrics
[
key
])
tf
.
logging
.
info
(
'Metrics written to tf summary.'
)
# TODO(rathodv): Add tests.
...
...
@@ -141,7 +140,7 @@ def visualize_detection_results(result_dict,
if
show_groundtruth
and
input_fields
.
groundtruth_boxes
not
in
result_dict
:
raise
ValueError
(
'If show_groundtruth is enabled, result_dict must contain '
'groundtruth_boxes.'
)
logging
.
info
(
'Creating detection visualizations.'
)
tf
.
logging
.
info
(
'Creating detection visualizations.'
)
category_index
=
label_map_util
.
create_category_index
(
categories
)
image
=
np
.
squeeze
(
result_dict
[
input_fields
.
original_image
],
axis
=
0
)
...
...
@@ -205,7 +204,8 @@ def visualize_detection_results(result_dict,
summary_writer
=
tf
.
summary
.
FileWriterCache
.
get
(
summary_dir
)
summary_writer
.
add_summary
(
summary
,
global_step
)
logging
.
info
(
'Detection visualizations written to summary with tag %s.'
,
tag
)
tf
.
logging
.
info
(
'Detection visualizations written to summary with tag %s.'
,
tag
)
def
_run_checkpoint_once
(
tensor_dict
,
...
...
@@ -218,7 +218,8 @@ def _run_checkpoint_once(tensor_dict,
master
=
''
,
save_graph
=
False
,
save_graph_dir
=
''
,
losses_dict
=
None
):
losses_dict
=
None
,
eval_export_path
=
None
):
"""Evaluates metrics defined in evaluators and returns summaries.
This function loads the latest checkpoint in checkpoint_dirs and evaluates
...
...
@@ -258,6 +259,8 @@ def _run_checkpoint_once(tensor_dict,
save_graph_dir: where to store the Tensorflow graph on disk. If save_graph
is True this must be non-empty.
losses_dict: optional dictionary of scalar detection losses.
eval_export_path: Path for saving a json file that contains the detection
results in json format.
Returns:
global_step: the count of global steps.
...
...
@@ -292,7 +295,8 @@ def _run_checkpoint_once(tensor_dict,
try
:
for
batch
in
range
(
int
(
num_batches
)):
if
(
batch
+
1
)
%
100
==
0
:
logging
.
info
(
'Running eval ops batch %d/%d'
,
batch
+
1
,
num_batches
)
tf
.
logging
.
info
(
'Running eval ops batch %d/%d'
,
batch
+
1
,
num_batches
)
if
not
batch_processor
:
try
:
if
not
losses_dict
:
...
...
@@ -301,7 +305,7 @@ def _run_checkpoint_once(tensor_dict,
losses_dict
])
counters
[
'success'
]
+=
1
except
tf
.
errors
.
InvalidArgumentError
:
logging
.
info
(
'Skipping image'
)
tf
.
logging
.
info
(
'Skipping image'
)
counters
[
'skipped'
]
+=
1
result_dict
=
{}
else
:
...
...
@@ -316,18 +320,31 @@ def _run_checkpoint_once(tensor_dict,
# decoders to return correct image_id.
# TODO(akuznetsa): result_dict contains batches of images, while
# add_single_ground_truth_image_info expects a single image. Fix
if
(
isinstance
(
result_dict
,
dict
)
and
result_dict
[
fields
.
InputDataFields
.
key
]):
image_id
=
result_dict
[
fields
.
InputDataFields
.
key
]
else
:
image_id
=
batch
evaluator
.
add_single_ground_truth_image_info
(
image_id
=
batch
,
groundtruth_dict
=
result_dict
)
image_id
=
image_id
,
groundtruth_dict
=
result_dict
)
evaluator
.
add_single_detected_image_info
(
image_id
=
batch
,
detections_dict
=
result_dict
)
logging
.
info
(
'Running eval batches done.'
)
image_id
=
image_id
,
detections_dict
=
result_dict
)
tf
.
logging
.
info
(
'Running eval batches done.'
)
except
tf
.
errors
.
OutOfRangeError
:
logging
.
info
(
'Done evaluating -- epoch limit reached'
)
tf
.
logging
.
info
(
'Done evaluating -- epoch limit reached'
)
finally
:
# When done, ask the threads to stop.
logging
.
info
(
'# success: %d'
,
counters
[
'success'
])
logging
.
info
(
'# skipped: %d'
,
counters
[
'skipped'
])
tf
.
logging
.
info
(
'# success: %d'
,
counters
[
'success'
])
tf
.
logging
.
info
(
'# skipped: %d'
,
counters
[
'skipped'
])
all_evaluator_metrics
=
{}
if
eval_export_path
and
eval_export_path
is
not
None
:
for
evaluator
in
evaluators
:
if
(
isinstance
(
evaluator
,
coco_evaluation
.
CocoDetectionEvaluator
)
or
isinstance
(
evaluator
,
coco_evaluation
.
CocoMaskEvaluator
)):
tf
.
logging
.
info
(
'Started dumping to json file.'
)
evaluator
.
dump_detections_to_json_file
(
json_output_path
=
eval_export_path
)
tf
.
logging
.
info
(
'Finished dumping to json file.'
)
for
evaluator
in
evaluators
:
metrics
=
evaluator
.
evaluate
()
evaluator
.
clear
()
...
...
@@ -356,7 +373,8 @@ def repeated_checkpoint_run(tensor_dict,
master
=
''
,
save_graph
=
False
,
save_graph_dir
=
''
,
losses_dict
=
None
):
losses_dict
=
None
,
eval_export_path
=
None
):
"""Periodically evaluates desired tensors using checkpoint_dirs or restore_fn.
This function repeatedly loads a checkpoint and evaluates a desired
...
...
@@ -397,6 +415,8 @@ def repeated_checkpoint_run(tensor_dict,
save_graph_dir: where to save on disk the Tensorflow graph. If store_graph
is True this must be non-empty.
losses_dict: optional dictionary of scalar detection losses.
eval_export_path: Path for saving a json file that contains the detection
results in json format.
Returns:
metrics: A dictionary containing metric names and values in the latest
...
...
@@ -417,31 +437,36 @@ def repeated_checkpoint_run(tensor_dict,
number_of_evaluations
=
0
while
True
:
start
=
time
.
time
()
logging
.
info
(
'Starting evaluation at '
+
time
.
strftime
(
tf
.
logging
.
info
(
'Starting evaluation at '
+
time
.
strftime
(
'%Y-%m-%d-%H:%M:%S'
,
time
.
gmtime
()))
model_path
=
tf
.
train
.
latest_checkpoint
(
checkpoint_dirs
[
0
])
if
not
model_path
:
logging
.
info
(
'No model found in %s. Will try again in %d seconds'
,
checkpoint_dirs
[
0
],
eval_interval_secs
)
tf
.
logging
.
info
(
'No model found in %s. Will try again in %d seconds'
,
checkpoint_dirs
[
0
],
eval_interval_secs
)
elif
model_path
==
last_evaluated_model_path
:
logging
.
info
(
'Found already evaluated checkpoint. Will try again in
%d
'
'
seconds'
,
eval_interval_secs
)
tf
.
logging
.
info
(
'Found already evaluated checkpoint. Will try again in '
'%d
seconds'
,
eval_interval_secs
)
else
:
last_evaluated_model_path
=
model_path
global_step
,
metrics
=
_run_checkpoint_once
(
tensor_dict
,
evaluators
,
batch_processor
,
checkpoint_dirs
,
variables_to_restore
,
restore_fn
,
num_batches
,
master
,
save_graph
,
save_graph_dir
,
losses_dict
=
losses_dict
)
global_step
,
metrics
=
_run_checkpoint_once
(
tensor_dict
,
evaluators
,
batch_processor
,
checkpoint_dirs
,
variables_to_restore
,
restore_fn
,
num_batches
,
master
,
save_graph
,
save_graph_dir
,
losses_dict
=
losses_dict
,
eval_export_path
=
eval_export_path
)
write_metrics
(
metrics
,
global_step
,
summary_dir
)
number_of_evaluations
+=
1
if
(
max_number_of_evaluations
and
number_of_evaluations
>=
max_number_of_evaluations
):
logging
.
info
(
'Finished evaluation!'
)
tf
.
logging
.
info
(
'Finished evaluation!'
)
break
time_to_next_eval
=
start
+
eval_interval_secs
-
time
.
time
()
if
time_to_next_eval
>
0
:
...
...
@@ -680,4 +705,3 @@ def evaluator_options_from_eval_config(eval_config):
eval_config
.
include_metrics_per_category
)
}
return
evaluator_options
research/object_detection/g3doc/detection_model_zoo.md
View file @
27b4acd4
...
...
@@ -2,13 +2,12 @@
We provide a collection of detection models pre-trained on the
[
COCO
dataset
](
http://mscoco.org
)
, the
[
Kitti dataset
](
http://www.cvlibs.net/datasets/kitti/
)
,
the
[
Open Images dataset
](
https://github.com/openimages/dataset
)
and the
[
AVA v2.1 dataset
](
https://research.google.com/ava/
)
. These models can
be useful for
out-of-the-box inference if you are interested in categories already in COCO
(e.g., humans, cars, etc) or in Open Images (e.g.,
surfboard, jacuzzi, etc). They are also useful for initializing your models when
training on novel datasets.
the
[
Open Images dataset
](
https://github.com/openimages/dataset
)
, the
[
AVA v2.1 dataset
](
https://research.google.com/ava/
)
and the
[
iNaturalist Species Detection Dataset
](
https://github.com/visipedia/inat_comp/blob/master/2017/README.md#bounding-boxes
)
.
These models can be useful for out-of-the-box inference if you are interested in
categories already in those datasets. They are also useful for initializing your
models when training on novel datasets.
In the table below, we list each such pre-trained model including:
...
...
@@ -113,6 +112,13 @@ Model name
[
faster_rcnn_inception_resnet_v2_atrous_oid
](
http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_oid_2018_01_28.tar.gz
)
| 727 | 37 | Boxes
[
faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid
](
http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid_2018_01_28.tar.gz
)
| 347 | | Boxes
## iNaturalist Species-trained models
Model name | Speed (ms) | Pascal mAP@0.5 | Outputs
----------------------------------------------------------------------------------------------------------------------------------------------------------------- | :---: | :-------------: | :-----:
[
faster_rcnn_resnet101_fgvc
](
http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_fgvc_2018_07_19.tar.gz
)
| 395 | 58 | Boxes
[
faster_rcnn_resnet50_fgvc
](
http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet50_fgvc_2018_07_19.tar.gz
)
| 366 | 55 | Boxes
## AVA v2.1 trained models
...
...
research/object_detection/g3doc/running_locally.md
View file @
27b4acd4
...
...
@@ -37,12 +37,12 @@ A local training job can be run with the following command:
PIPELINE_CONFIG_PATH
={
path to pipeline config file
}
MODEL_DIR
={
path to model directory
}
NUM_TRAIN_STEPS
=
50000
NUM_EVAL_STEPS
=
2000
SAMPLE_1_OF_N_EVAL_EXAMPLES
=
1
python object_detection/model_main.py
\
--pipeline_config_path
=
${
PIPELINE_CONFIG_PATH
}
\
--model_dir
=
${
MODEL_DIR
}
\
--num_train_steps
=
${
NUM_TRAIN_STEPS
}
\
--
num_eval_steps
=
${
NUM_EVAL_STEPS
}
\
--
sample_1_of_n_eval_examples
=
$SAMPLE_1_OF_N_EVAL_EXAMPLES
\
--alsologtostderr
```
...
...
research/object_detection/g3doc/running_pets.md
View file @
27b4acd4
...
...
@@ -216,7 +216,7 @@ To start training and evaluation, execute the following command from the
```
bash
# From tensorflow/models/research/
gcloud ml-engine
jobs
submit training
`
whoami
`
_object_detection_pets_
`
date
+%m_%d_%Y_%H_%M_%S
`
\
--runtime-version
1.
9
\
--runtime-version
1.
8
\
--job-dir
=
gs://
${
YOUR_GCS_BUCKET
}
/model_dir
\
--packages
dist/object_detection-0.1.tar.gz,slim/dist/slim-0.1.tar.gz,/tmp/pycocotools/pycocotools-2.0.tar.gz
\
--module-name
object_detection.model_main
\
...
...
research/object_detection/inputs.py
View file @
27b4acd4
...
...
@@ -52,7 +52,8 @@ def transform_input_data(tensor_dict,
num_classes
,
data_augmentation_fn
=
None
,
merge_multiple_boxes
=
False
,
retain_original_image
=
False
):
retain_original_image
=
False
,
use_bfloat16
=
False
):
"""A single function that is responsible for all input data transformations.
Data transformation functions are applied in the following order.
...
...
@@ -86,6 +87,7 @@ def transform_input_data(tensor_dict,
and classes for a given image if the boxes are exactly the same.
retain_original_image: (optional) whether to retain original image in the
output dictionary.
use_bfloat16: (optional) a bool, whether to use bfloat16 in training.
Returns:
A dictionary keyed by fields.InputDataFields containing the tensors obtained
...
...
@@ -101,7 +103,8 @@ def transform_input_data(tensor_dict,
if
retain_original_image
:
tensor_dict
[
fields
.
InputDataFields
.
original_image
]
=
tf
.
cast
(
tensor_dict
[
fields
.
InputDataFields
.
image
],
tf
.
uint8
)
image_resizer_fn
(
tensor_dict
[
fields
.
InputDataFields
.
image
],
None
)[
0
],
tf
.
uint8
)
# Apply data augmentation ops.
if
data_augmentation_fn
is
not
None
:
...
...
@@ -111,6 +114,9 @@ def transform_input_data(tensor_dict,
image
=
tensor_dict
[
fields
.
InputDataFields
.
image
]
preprocessed_resized_image
,
true_image_shape
=
model_preprocess_fn
(
tf
.
expand_dims
(
tf
.
to_float
(
image
),
axis
=
0
))
if
use_bfloat16
:
preprocessed_resized_image
=
tf
.
cast
(
preprocessed_resized_image
,
tf
.
bfloat16
)
tensor_dict
[
fields
.
InputDataFields
.
image
]
=
tf
.
squeeze
(
preprocessed_resized_image
,
axis
=
0
)
tensor_dict
[
fields
.
InputDataFields
.
true_image_shape
]
=
tf
.
squeeze
(
...
...
@@ -128,13 +134,33 @@ def transform_input_data(tensor_dict,
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
]
=
tf
.
one_hot
(
zero_indexed_groundtruth_classes
,
num_classes
)
if
fields
.
InputDataFields
.
groundtruth_confidences
in
tensor_dict
:
groundtruth_confidences
=
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_confidences
]
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_confidences
]
=
(
tf
.
sparse_to_dense
(
zero_indexed_groundtruth_classes
,
[
num_classes
],
groundtruth_confidences
,
validate_indices
=
False
))
else
:
groundtruth_confidences
=
tf
.
ones_like
(
zero_indexed_groundtruth_classes
,
dtype
=
tf
.
float32
)
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_confidences
]
=
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
])
if
merge_multiple_boxes
:
merged_boxes
,
merged_classes
,
_
=
util_ops
.
merge_boxes_with_multiple_labels
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
],
zero_indexed_groundtruth_classes
,
num_classes
)
merged_boxes
,
merged_classes
,
merged_confidences
,
_
=
(
util_ops
.
merge_boxes_with_multiple_labels
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
],
zero_indexed_groundtruth_classes
,
groundtruth_confidences
,
num_classes
))
merged_classes
=
tf
.
cast
(
merged_classes
,
tf
.
float32
)
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
=
merged_boxes
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
]
=
merged_classes
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_confidences
]
=
(
merged_confidences
)
return
tensor_dict
...
...
@@ -174,6 +200,7 @@ def pad_input_data_to_static_shapes(tensor_dict, max_num_boxes, num_classes,
fields
.
InputDataFields
.
image
:
[
height
,
width
,
3
+
num_additional_channels
],
fields
.
InputDataFields
.
original_image_spatial_shape
:
[
2
],
fields
.
InputDataFields
.
image_additional_channels
:
[
height
,
width
,
num_additional_channels
],
...
...
@@ -183,6 +210,8 @@ def pad_input_data_to_static_shapes(tensor_dict, max_num_boxes, num_classes,
fields
.
InputDataFields
.
groundtruth_difficult
:
[
max_num_boxes
],
fields
.
InputDataFields
.
groundtruth_boxes
:
[
max_num_boxes
,
4
],
fields
.
InputDataFields
.
groundtruth_classes
:
[
max_num_boxes
,
num_classes
],
fields
.
InputDataFields
.
groundtruth_confidences
:
[
max_num_boxes
,
num_classes
],
fields
.
InputDataFields
.
groundtruth_instance_masks
:
[
max_num_boxes
,
height
,
width
],
...
...
@@ -198,11 +227,12 @@ def pad_input_data_to_static_shapes(tensor_dict, max_num_boxes, num_classes,
max_num_boxes
,
num_classes
+
1
if
num_classes
is
not
None
else
None
],
fields
.
InputDataFields
.
groundtruth_image_classes
:
[
num_classes
],
fields
.
InputDataFields
.
groundtruth_image_confidences
:
[
num_classes
],
}
if
fields
.
InputDataFields
.
original_image
in
tensor_dict
:
padding_shapes
[
fields
.
InputDataFields
.
original_image
]
=
[
None
,
None
,
3
+
num_additional_channels
height
,
width
,
3
+
num_additional_channels
]
if
fields
.
InputDataFields
.
groundtruth_keypoints
in
tensor_dict
:
tensor_shape
=
(
...
...
@@ -252,9 +282,12 @@ def augment_input_data(tensor_dict, data_augmentation_options):
in
tensor_dict
)
include_keypoints
=
(
fields
.
InputDataFields
.
groundtruth_keypoints
in
tensor_dict
)
include_label_scores
=
(
fields
.
InputDataFields
.
groundtruth_confidences
in
tensor_dict
)
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
data_augmentation_options
,
func_arg_map
=
preprocessor
.
get_default_func_arg_map
(
include_label_scores
=
include_label_scores
,
include_instance_masks
=
include_instance_masks
,
include_keypoints
=
include_keypoints
))
tensor_dict
[
fields
.
InputDataFields
.
image
]
=
tf
.
squeeze
(
...
...
@@ -275,6 +308,7 @@ def _get_labels_dict(input_dict):
labels_dict
[
key
]
=
input_dict
[
key
]
optional_label_keys
=
[
fields
.
InputDataFields
.
groundtruth_confidences
,
fields
.
InputDataFields
.
groundtruth_keypoints
,
fields
.
InputDataFields
.
groundtruth_instance_masks
,
fields
.
InputDataFields
.
groundtruth_area
,
...
...
@@ -291,16 +325,50 @@ def _get_labels_dict(input_dict):
return
labels_dict
def
_replace_empty_string_with_random_number
(
string_tensor
):
"""Returns string unchanged if non-empty, and random string tensor otherwise.
The random string is an integer 0 and 2**63 - 1, casted as string.
Args:
string_tensor: A tf.tensor of dtype string.
Returns:
out_string: A tf.tensor of dtype string. If string_tensor contains the empty
string, out_string will contain a random integer casted to a string.
Otherwise string_tensor is returned unchanged.
"""
empty_string
=
tf
.
constant
(
''
,
dtype
=
tf
.
string
,
name
=
'EmptyString'
)
random_source_id
=
tf
.
as_string
(
tf
.
random_uniform
(
shape
=
[],
maxval
=
2
**
63
-
1
,
dtype
=
tf
.
int64
))
out_string
=
tf
.
cond
(
tf
.
equal
(
string_tensor
,
empty_string
),
true_fn
=
lambda
:
random_source_id
,
false_fn
=
lambda
:
string_tensor
)
return
out_string
def
_get_features_dict
(
input_dict
):
"""Extracts features dict from input dict."""
hash_from_source_id
=
tf
.
string_to_hash_bucket_fast
(
input_dict
[
fields
.
InputDataFields
.
source_id
],
HASH_BINS
)
source_id
=
_replace_empty_string_with_random_number
(
input_dict
[
fields
.
InputDataFields
.
source_id
])
hash_from_source_id
=
tf
.
string_to_hash_bucket_fast
(
source_id
,
HASH_BINS
)
features
=
{
fields
.
InputDataFields
.
image
:
input_dict
[
fields
.
InputDataFields
.
image
],
HASH_KEY
:
tf
.
cast
(
hash_from_source_id
,
tf
.
int32
),
fields
.
InputDataFields
.
true_image_shape
:
input_dict
[
fields
.
InputDataFields
.
true_image_shape
]
input_dict
[
fields
.
InputDataFields
.
true_image_shape
],
fields
.
InputDataFields
.
original_image_spatial_shape
:
input_dict
[
fields
.
InputDataFields
.
original_image_spatial_shape
]
}
if
fields
.
InputDataFields
.
original_image
in
input_dict
:
features
[
fields
.
InputDataFields
.
original_image
]
=
input_dict
[
...
...
@@ -392,7 +460,8 @@ def create_train_input_fn(train_config, train_input_config,
num_classes
=
config_util
.
get_number_of_classes
(
model_config
),
data_augmentation_fn
=
data_augmentation_fn
,
merge_multiple_boxes
=
train_config
.
merge_multiple_label_boxes
,
retain_original_image
=
train_config
.
retain_original_images
)
retain_original_image
=
train_config
.
retain_original_images
,
use_bfloat16
=
train_config
.
use_bfloat16
)
tensor_dict
=
pad_input_data_to_static_shapes
(
tensor_dict
=
transform_data_fn
(
tensor_dict
),
...
...
@@ -414,8 +483,6 @@ def create_train_input_fn(train_config, train_input_config,
def
create_eval_input_fn
(
eval_config
,
eval_input_config
,
model_config
):
"""Creates an eval `input` function for `Estimator`.
# TODO(ronnyvotel,rathodv): Allow batch sizes of more than 1 for eval.
Args:
eval_config: An eval_pb2.EvalConfig.
eval_input_config: An input_reader_pb2.InputReader.
...
...
@@ -497,7 +564,7 @@ def create_eval_input_fn(eval_config, eval_input_config, model_config):
return
(
_get_features_dict
(
tensor_dict
),
_get_labels_dict
(
tensor_dict
))
dataset
=
INPUT_BUILDER_UTIL_MAP
[
'dataset_build'
](
eval_input_config
,
batch_size
=
1
,
# Currently only support
batch
size
of 1 for eval.
batch_size
=
params
[
'
batch
_
size
'
]
if
params
else
eval_config
.
batch_size
,
transform_input_data_fn
=
transform_and_pad_input_data_fn
)
return
dataset
...
...
research/object_detection/inputs_test.py
View file @
27b4acd4
...
...
@@ -20,6 +20,7 @@ from __future__ import print_function
import
functools
import
os
from
absl.testing
import
parameterized
import
numpy
as
np
import
tensorflow
as
tf
...
...
@@ -28,6 +29,7 @@ from object_detection import inputs
from
object_detection.core
import
preprocessor
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.utils
import
config_util
from
object_detection.utils
import
test_case
FLAGS
=
tf
.
flags
.
FLAGS
...
...
@@ -41,11 +43,13 @@ def _get_configs_for_model(model_name):
data_path
=
os
.
path
.
join
(
tf
.
resource_loader
.
get_data_files_path
(),
'test_data/pets_examples.record'
)
configs
=
config_util
.
get_configs_from_pipeline_file
(
fname
)
override_dict
=
{
'train_input_path'
:
data_path
,
'eval_input_path'
:
data_path
,
'label_map_path'
:
label_map_path
}
return
config_util
.
merge_external_params_with_configs
(
configs
,
train_input_path
=
data_path
,
eval_input_path
=
data_path
,
label_map_path
=
label_map_path
)
configs
,
kwargs_dict
=
override_dict
)
def
_make_initializable_iterator
(
dataset
):
...
...
@@ -62,7 +66,7 @@ def _make_initializable_iterator(dataset):
return
iterator
class
InputsTest
(
t
f
.
t
est
.
TestCase
):
class
InputsTest
(
test
_case
.
TestCase
,
parameterized
.
TestCase
):
def
test_faster_rcnn_resnet50_train_input
(
self
):
"""Tests the training input function for FasterRcnnResnet50."""
...
...
@@ -89,52 +93,71 @@ class InputsTest(tf.test.TestCase):
labels
[
fields
.
InputDataFields
.
groundtruth_classes
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
float32
,
labels
[
fields
.
InputDataFields
.
groundtruth_classes
].
dtype
)
self
.
assertAllEqual
(
[
1
,
100
,
model_config
.
faster_rcnn
.
num_classes
],
labels
[
fields
.
InputDataFields
.
groundtruth_confidences
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
float32
,
labels
[
fields
.
InputDataFields
.
groundtruth_confidences
].
dtype
)
self
.
assertAllEqual
(
[
1
,
100
],
labels
[
fields
.
InputDataFields
.
groundtruth_weights
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
float32
,
labels
[
fields
.
InputDataFields
.
groundtruth_weights
].
dtype
)
def
test_faster_rcnn_resnet50_eval_input
(
self
):
@
parameterized
.
parameters
(
{
'eval_batch_size'
:
1
},
{
'eval_batch_size'
:
8
}
)
def
test_faster_rcnn_resnet50_eval_input
(
self
,
eval_batch_size
=
1
):
"""Tests the eval input function for FasterRcnnResnet50."""
configs
=
_get_configs_for_model
(
'faster_rcnn_resnet50_pets'
)
model_config
=
configs
[
'model'
]
model_config
.
faster_rcnn
.
num_classes
=
37
eval_config
=
configs
[
'eval_config'
]
eval_config
.
batch_size
=
eval_batch_size
eval_input_fn
=
inputs
.
create_eval_input_fn
(
configs
[
'
eval_config
'
]
,
configs
[
'eval_input_config
'
],
model_config
)
eval_config
,
configs
[
'eval_input_config
s'
][
0
],
model_config
)
features
,
labels
=
_make_initializable_iterator
(
eval_input_fn
()).
get_next
()
self
.
assertAllEqual
([
1
,
None
,
None
,
3
],
self
.
assertAllEqual
([
eval_batch_size
,
None
,
None
,
3
],
features
[
fields
.
InputDataFields
.
image
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
float32
,
features
[
fields
.
InputDataFields
.
image
].
dtype
)
self
.
assertAllEqual
(
[
1
,
None
,
None
,
3
],
[
eval_batch_size
,
None
,
None
,
3
],
features
[
fields
.
InputDataFields
.
original_image
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
uint8
,
features
[
fields
.
InputDataFields
.
original_image
].
dtype
)
self
.
assertAllEqual
([
1
],
features
[
inputs
.
HASH_KEY
].
shape
.
as_list
())
self
.
assertAllEqual
([
eval_batch_size
],
features
[
inputs
.
HASH_KEY
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
int32
,
features
[
inputs
.
HASH_KEY
].
dtype
)
self
.
assertAllEqual
(
[
1
,
100
,
4
],
[
eval_batch_size
,
100
,
4
],
labels
[
fields
.
InputDataFields
.
groundtruth_boxes
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
float32
,
labels
[
fields
.
InputDataFields
.
groundtruth_boxes
].
dtype
)
self
.
assertAllEqual
(
[
1
,
100
,
model_config
.
faster_rcnn
.
num_classes
],
[
eval_batch_size
,
100
,
model_config
.
faster_rcnn
.
num_classes
],
labels
[
fields
.
InputDataFields
.
groundtruth_classes
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
float32
,
labels
[
fields
.
InputDataFields
.
groundtruth_classes
].
dtype
)
self
.
assertAllEqual
(
[
1
,
100
],
[
eval_batch_size
,
100
,
model_config
.
faster_rcnn
.
num_classes
],
labels
[
fields
.
InputDataFields
.
groundtruth_confidences
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
float32
,
labels
[
fields
.
InputDataFields
.
groundtruth_confidences
].
dtype
)
self
.
assertAllEqual
(
[
eval_batch_size
,
100
],
labels
[
fields
.
InputDataFields
.
groundtruth_area
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
float32
,
labels
[
fields
.
InputDataFields
.
groundtruth_area
].
dtype
)
self
.
assertAllEqual
(
[
1
,
100
],
[
eval_batch_size
,
100
],
labels
[
fields
.
InputDataFields
.
groundtruth_is_crowd
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
bool
,
labels
[
fields
.
InputDataFields
.
groundtruth_is_crowd
].
dtype
)
self
.
assertAllEqual
(
[
1
,
100
],
[
eval_batch_size
,
100
],
labels
[
fields
.
InputDataFields
.
groundtruth_difficult
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
int32
,
labels
[
fields
.
InputDataFields
.
groundtruth_difficult
].
dtype
)
...
...
@@ -170,52 +193,73 @@ class InputsTest(tf.test.TestCase):
labels
[
fields
.
InputDataFields
.
groundtruth_classes
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
float32
,
labels
[
fields
.
InputDataFields
.
groundtruth_classes
].
dtype
)
self
.
assertAllEqual
(
[
batch_size
,
100
,
model_config
.
ssd
.
num_classes
],
labels
[
fields
.
InputDataFields
.
groundtruth_confidences
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
float32
,
labels
[
fields
.
InputDataFields
.
groundtruth_confidences
].
dtype
)
self
.
assertAllEqual
(
[
batch_size
,
100
],
labels
[
fields
.
InputDataFields
.
groundtruth_weights
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
float32
,
labels
[
fields
.
InputDataFields
.
groundtruth_weights
].
dtype
)
def
test_ssd_inceptionV2_eval_input
(
self
):
@
parameterized
.
parameters
(
{
'eval_batch_size'
:
1
},
{
'eval_batch_size'
:
8
}
)
def
test_ssd_inceptionV2_eval_input
(
self
,
eval_batch_size
=
1
):
"""Tests the eval input function for SSDInceptionV2."""
configs
=
_get_configs_for_model
(
'ssd_inception_v2_pets'
)
model_config
=
configs
[
'model'
]
model_config
.
ssd
.
num_classes
=
37
eval_config
=
configs
[
'eval_config'
]
eval_config
.
batch_size
=
eval_batch_size
eval_input_fn
=
inputs
.
create_eval_input_fn
(
configs
[
'
eval_config
'
]
,
configs
[
'eval_input_config
'
],
model_config
)
eval_config
,
configs
[
'eval_input_config
s'
][
0
],
model_config
)
features
,
labels
=
_make_initializable_iterator
(
eval_input_fn
()).
get_next
()
self
.
assertAllEqual
([
1
,
300
,
300
,
3
],
self
.
assertAllEqual
([
eval_batch_size
,
300
,
300
,
3
],
features
[
fields
.
InputDataFields
.
image
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
float32
,
features
[
fields
.
InputDataFields
.
image
].
dtype
)
self
.
assertAllEqual
(
[
1
,
None
,
None
,
3
],
[
eval_batch_size
,
300
,
300
,
3
],
features
[
fields
.
InputDataFields
.
original_image
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
uint8
,
features
[
fields
.
InputDataFields
.
original_image
].
dtype
)
self
.
assertAllEqual
([
1
],
features
[
inputs
.
HASH_KEY
].
shape
.
as_list
())
self
.
assertAllEqual
([
eval_batch_size
],
features
[
inputs
.
HASH_KEY
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
int32
,
features
[
inputs
.
HASH_KEY
].
dtype
)
self
.
assertAllEqual
(
[
1
,
100
,
4
],
[
eval_batch_size
,
100
,
4
],
labels
[
fields
.
InputDataFields
.
groundtruth_boxes
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
float32
,
labels
[
fields
.
InputDataFields
.
groundtruth_boxes
].
dtype
)
self
.
assertAllEqual
(
[
1
,
100
,
model_config
.
ssd
.
num_classes
],
[
eval_batch_size
,
100
,
model_config
.
ssd
.
num_classes
],
labels
[
fields
.
InputDataFields
.
groundtruth_classes
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
float32
,
labels
[
fields
.
InputDataFields
.
groundtruth_classes
].
dtype
)
self
.
assertAllEqual
(
[
1
,
100
],
[
eval_batch_size
,
100
,
model_config
.
ssd
.
num_classes
],
labels
[
fields
.
InputDataFields
.
groundtruth_confidences
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
float32
,
labels
[
fields
.
InputDataFields
.
groundtruth_confidences
].
dtype
)
self
.
assertAllEqual
(
[
eval_batch_size
,
100
],
labels
[
fields
.
InputDataFields
.
groundtruth_area
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
float32
,
labels
[
fields
.
InputDataFields
.
groundtruth_area
].
dtype
)
self
.
assertAllEqual
(
[
1
,
100
],
[
eval_batch_size
,
100
],
labels
[
fields
.
InputDataFields
.
groundtruth_is_crowd
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
bool
,
labels
[
fields
.
InputDataFields
.
groundtruth_is_crowd
].
dtype
)
self
.
assertAllEqual
(
[
1
,
100
],
[
eval_batch_size
,
100
],
labels
[
fields
.
InputDataFields
.
groundtruth_difficult
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
int32
,
labels
[
fields
.
InputDataFields
.
groundtruth_difficult
].
dtype
)
...
...
@@ -225,7 +269,7 @@ class InputsTest(tf.test.TestCase):
configs
=
_get_configs_for_model
(
'ssd_inception_v2_pets'
)
predict_input_fn
=
inputs
.
create_predict_input_fn
(
model_config
=
configs
[
'model'
],
predict_input_config
=
configs
[
'eval_input_config
'
])
predict_input_config
=
configs
[
'eval_input_config
s'
][
0
])
serving_input_receiver
=
predict_input_fn
()
image
=
serving_input_receiver
.
features
[
fields
.
InputDataFields
.
image
]
...
...
@@ -238,10 +282,10 @@ class InputsTest(tf.test.TestCase):
def
test_predict_input_with_additional_channels
(
self
):
"""Tests the predict input function with additional channels."""
configs
=
_get_configs_for_model
(
'ssd_inception_v2_pets'
)
configs
[
'eval_input_config
'
].
num_additional_channels
=
2
configs
[
'eval_input_config
s'
][
0
].
num_additional_channels
=
2
predict_input_fn
=
inputs
.
create_predict_input_fn
(
model_config
=
configs
[
'model'
],
predict_input_config
=
configs
[
'eval_input_config
'
])
predict_input_config
=
configs
[
'eval_input_config
s'
][
0
])
serving_input_receiver
=
predict_input_fn
()
image
=
serving_input_receiver
.
features
[
fields
.
InputDataFields
.
image
]
...
...
@@ -291,7 +335,7 @@ class InputsTest(tf.test.TestCase):
configs
[
'model'
].
ssd
.
num_classes
=
37
eval_input_fn
=
inputs
.
create_eval_input_fn
(
eval_config
=
configs
[
'train_config'
],
# Expecting `EvalConfig`.
eval_input_config
=
configs
[
'eval_input_config
'
],
eval_input_config
=
configs
[
'eval_input_config
s'
][
0
],
model_config
=
configs
[
'model'
])
with
self
.
assertRaises
(
TypeError
):
eval_input_fn
()
...
...
@@ -313,13 +357,45 @@ class InputsTest(tf.test.TestCase):
configs
[
'model'
].
ssd
.
num_classes
=
37
eval_input_fn
=
inputs
.
create_eval_input_fn
(
eval_config
=
configs
[
'eval_config'
],
eval_input_config
=
configs
[
'eval_input_config
'
],
eval_input_config
=
configs
[
'eval_input_config
s'
][
0
],
model_config
=
configs
[
'eval_config'
])
# Expecting `DetectionModel`.
with
self
.
assertRaises
(
TypeError
):
eval_input_fn
()
def
test_output_equal_in_replace_empty_string_with_random_number
(
self
):
string_placeholder
=
tf
.
placeholder
(
tf
.
string
,
shape
=
[])
replaced_string
=
inputs
.
_replace_empty_string_with_random_number
(
string_placeholder
)
test_string
=
'hello world'
feed_dict
=
{
string_placeholder
:
test_string
}
with
self
.
test_session
()
as
sess
:
out_string
=
sess
.
run
(
replaced_string
,
feed_dict
=
feed_dict
)
self
.
assertEqual
(
test_string
,
out_string
)
def
test_output_is_integer_in_replace_empty_string_with_random_number
(
self
):
string_placeholder
=
tf
.
placeholder
(
tf
.
string
,
shape
=
[])
replaced_string
=
inputs
.
_replace_empty_string_with_random_number
(
string_placeholder
)
empty_string
=
''
feed_dict
=
{
string_placeholder
:
empty_string
}
tf
.
set_random_seed
(
0
)
with
self
.
test_session
()
as
sess
:
out_string
=
sess
.
run
(
replaced_string
,
feed_dict
=
feed_dict
)
# Test whether out_string is a string which represents an integer.
int
(
out_string
)
# throws an error if out_string is not castable to int.
self
.
assertEqual
(
out_string
,
'2798129067578209328'
)
class
DataAugmentationFnTest
(
tf
.
test
.
TestCase
):
class
DataAugmentationFnTest
(
test_case
.
TestCase
):
def
test_apply_image_and_box_augmentation
(
self
):
data_augmentation_options
=
[
...
...
@@ -352,6 +428,50 @@ class DataAugmentationFnTest(tf.test.TestCase):
[[
10
,
10
,
20
,
20
]]
)
def
test_apply_image_and_box_augmentation_with_scores
(
self
):
data_augmentation_options
=
[
(
preprocessor
.
resize_image
,
{
'new_height'
:
20
,
'new_width'
:
20
,
'method'
:
tf
.
image
.
ResizeMethod
.
NEAREST_NEIGHBOR
}),
(
preprocessor
.
scale_boxes_to_pixel_coordinates
,
{}),
]
data_augmentation_fn
=
functools
.
partial
(
inputs
.
augment_input_data
,
data_augmentation_options
=
data_augmentation_options
)
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
tf
.
constant
(
np
.
random
.
rand
(
10
,
10
,
3
).
astype
(
np
.
float32
)),
fields
.
InputDataFields
.
groundtruth_boxes
:
tf
.
constant
(
np
.
array
([[.
5
,
.
5
,
1.
,
1.
]],
np
.
float32
)),
fields
.
InputDataFields
.
groundtruth_classes
:
tf
.
constant
(
np
.
array
([
1.0
],
np
.
float32
)),
fields
.
InputDataFields
.
groundtruth_confidences
:
tf
.
constant
(
np
.
array
([
0.8
],
np
.
float32
)),
}
augmented_tensor_dict
=
data_augmentation_fn
(
tensor_dict
=
tensor_dict
)
with
self
.
test_session
()
as
sess
:
augmented_tensor_dict_out
=
sess
.
run
(
augmented_tensor_dict
)
self
.
assertAllEqual
(
augmented_tensor_dict_out
[
fields
.
InputDataFields
.
image
].
shape
,
[
20
,
20
,
3
]
)
self
.
assertAllClose
(
augmented_tensor_dict_out
[
fields
.
InputDataFields
.
groundtruth_boxes
],
[[
10
,
10
,
20
,
20
]]
)
self
.
assertAllClose
(
augmented_tensor_dict_out
[
fields
.
InputDataFields
.
groundtruth_classes
],
[
1.0
]
)
self
.
assertAllClose
(
augmented_tensor_dict_out
[
fields
.
InputDataFields
.
groundtruth_confidences
],
[
0.8
]
)
def
test_include_masks_in_data_augmentation
(
self
):
data_augmentation_options
=
[
(
preprocessor
.
resize_image
,
{
...
...
@@ -425,7 +545,7 @@ def _fake_image_resizer_fn(image, mask):
return
(
image
,
mask
,
tf
.
shape
(
image
))
class
DataTransformationFnTest
(
t
f
.
t
est
.
TestCase
):
class
DataTransformationFnTest
(
test
_case
.
TestCase
):
def
test_combine_additional_channels_if_present
(
self
):
image
=
np
.
random
.
rand
(
4
,
4
,
3
).
astype
(
np
.
float32
)
...
...
@@ -476,6 +596,9 @@ class DataTransformationFnTest(tf.test.TestCase):
self
.
assertAllClose
(
transformed_inputs
[
fields
.
InputDataFields
.
groundtruth_classes
],
[[
0
,
0
,
1
],
[
1
,
0
,
0
]])
self
.
assertAllClose
(
transformed_inputs
[
fields
.
InputDataFields
.
groundtruth_confidences
],
[[
0
,
0
,
1
],
[
1
,
0
,
0
]])
def
test_returns_correct_merged_boxes
(
self
):
tensor_dict
=
{
...
...
@@ -504,6 +627,9 @@ class DataTransformationFnTest(tf.test.TestCase):
self
.
assertAllClose
(
transformed_inputs
[
fields
.
InputDataFields
.
groundtruth_classes
],
[[
1
,
0
,
1
]])
self
.
assertAllClose
(
transformed_inputs
[
fields
.
InputDataFields
.
groundtruth_confidences
],
[[
1
,
0
,
1
]])
def
test_returns_resized_masks
(
self
):
tensor_dict
=
{
...
...
@@ -512,8 +638,11 @@ class DataTransformationFnTest(tf.test.TestCase):
fields
.
InputDataFields
.
groundtruth_instance_masks
:
tf
.
constant
(
np
.
random
.
rand
(
2
,
4
,
4
).
astype
(
np
.
float32
)),
fields
.
InputDataFields
.
groundtruth_classes
:
tf
.
constant
(
np
.
array
([
3
,
1
],
np
.
int32
))
tf
.
constant
(
np
.
array
([
3
,
1
],
np
.
int32
)),
fields
.
InputDataFields
.
original_image_spatial_shape
:
tf
.
constant
(
np
.
array
([
4
,
4
],
np
.
int32
))
}
def
fake_image_resizer_fn
(
image
,
masks
=
None
):
resized_image
=
tf
.
image
.
resize_images
(
image
,
[
8
,
8
])
results
=
[
resized_image
]
...
...
@@ -538,7 +667,9 @@ class DataTransformationFnTest(tf.test.TestCase):
self
.
assertAllEqual
(
transformed_inputs
[
fields
.
InputDataFields
.
original_image
].
dtype
,
tf
.
uint8
)
self
.
assertAllEqual
(
transformed_inputs
[
fields
.
InputDataFields
.
original_image
].
shape
,
[
4
,
4
,
3
])
fields
.
InputDataFields
.
original_image_spatial_shape
],
[
4
,
4
])
self
.
assertAllEqual
(
transformed_inputs
[
fields
.
InputDataFields
.
original_image
].
shape
,
[
8
,
8
,
3
])
self
.
assertAllEqual
(
transformed_inputs
[
fields
.
InputDataFields
.
groundtruth_instance_masks
].
shape
,
[
2
,
8
,
8
])
...
...
@@ -550,6 +681,7 @@ class DataTransformationFnTest(tf.test.TestCase):
fields
.
InputDataFields
.
groundtruth_classes
:
tf
.
constant
(
np
.
array
([
3
,
1
],
np
.
int32
))
}
def
fake_model_preprocessor_fn
(
image
):
return
(
image
/
255.
,
tf
.
expand_dims
(
tf
.
shape
(
image
)[
1
:],
axis
=
0
))
...
...
@@ -577,6 +709,7 @@ class DataTransformationFnTest(tf.test.TestCase):
fields
.
InputDataFields
.
groundtruth_classes
:
tf
.
constant
(
np
.
array
([
3
,
1
],
np
.
int32
))
}
def
add_one_data_augmentation_fn
(
tensor_dict
):
return
{
key
:
value
+
1
for
key
,
value
in
tensor_dict
.
items
()}
...
...
@@ -605,8 +738,10 @@ class DataTransformationFnTest(tf.test.TestCase):
fields
.
InputDataFields
.
groundtruth_classes
:
tf
.
constant
(
np
.
array
([
3
,
1
],
np
.
int32
))
}
def
mul_two_model_preprocessor_fn
(
image
):
return
(
image
*
2
,
tf
.
expand_dims
(
tf
.
shape
(
image
)[
1
:],
axis
=
0
))
def
add_five_to_image_data_augmentation_fn
(
tensor_dict
):
tensor_dict
[
fields
.
InputDataFields
.
image
]
+=
5
return
tensor_dict
...
...
@@ -626,7 +761,7 @@ class DataTransformationFnTest(tf.test.TestCase):
(
np_image
+
5
)
*
2
)
class
PadInputDataToStaticShapesFnTest
(
t
f
.
t
est
.
TestCase
):
class
PadInputDataToStaticShapesFnTest
(
test
_case
.
TestCase
):
def
test_pad_images_boxes_and_classes
(
self
):
input_tensor_dict
=
{
...
...
@@ -636,7 +771,10 @@ class PadInputDataToStaticShapesFnTest(tf.test.TestCase):
tf
.
placeholder
(
tf
.
float32
,
[
None
,
4
]),
fields
.
InputDataFields
.
groundtruth_classes
:
tf
.
placeholder
(
tf
.
int32
,
[
None
,
3
]),
fields
.
InputDataFields
.
true_image_shape
:
tf
.
placeholder
(
tf
.
int32
,
[
3
]),
fields
.
InputDataFields
.
true_image_shape
:
tf
.
placeholder
(
tf
.
int32
,
[
3
]),
fields
.
InputDataFields
.
original_image_spatial_shape
:
tf
.
placeholder
(
tf
.
int32
,
[
2
])
}
padded_tensor_dict
=
inputs
.
pad_input_data_to_static_shapes
(
tensor_dict
=
input_tensor_dict
,
...
...
@@ -650,6 +788,9 @@ class PadInputDataToStaticShapesFnTest(tf.test.TestCase):
self
.
assertAllEqual
(
padded_tensor_dict
[
fields
.
InputDataFields
.
true_image_shape
]
.
shape
.
as_list
(),
[
3
])
self
.
assertAllEqual
(
padded_tensor_dict
[
fields
.
InputDataFields
.
original_image_spatial_shape
]
.
shape
.
as_list
(),
[
2
])
self
.
assertAllEqual
(
padded_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
.
shape
.
as_list
(),
[
3
,
4
])
...
...
research/object_detection/legacy/eval.py
View file @
27b4acd4
...
...
@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r
"""Evaluation executable for detection models.
This executable is used to evaluate DetectionModels. There are two ways of
...
...
@@ -54,29 +53,30 @@ from object_detection.legacy import evaluator
from
object_detection.utils
import
config_util
from
object_detection.utils
import
label_map_util
tf
.
logging
.
set_verbosity
(
tf
.
logging
.
INFO
)
flags
=
tf
.
app
.
flags
flags
.
DEFINE_boolean
(
'eval_training_data'
,
False
,
'If training data should be evaluated for this job.'
)
flags
.
DEFINE_string
(
'checkpoint_dir'
,
''
,
'Directory containing checkpoints to evaluate, typically '
'set to `train_dir` used in the training job.'
)
flags
.
DEFINE_string
(
'eval_dir'
,
''
,
'Directory to write eval summaries to.'
)
flags
.
DEFINE_string
(
'pipeline_config_path'
,
''
,
'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
'file. If provided, other configs are ignored'
)
flags
.
DEFINE_string
(
'checkpoint_dir'
,
''
,
'Directory containing checkpoints to evaluate, typically '
'set to `train_dir` used in the training job.'
)
flags
.
DEFINE_string
(
'eval_dir'
,
''
,
'Directory to write eval summaries to.'
)
flags
.
DEFINE_string
(
'pipeline_config_path'
,
''
,
'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
'file. If provided, other configs are ignored'
)
flags
.
DEFINE_string
(
'eval_config_path'
,
''
,
'Path to an eval_pb2.EvalConfig config file.'
)
flags
.
DEFINE_string
(
'input_config_path'
,
''
,
'Path to an input_reader_pb2.InputReader config file.'
)
flags
.
DEFINE_string
(
'model_config_path'
,
''
,
'Path to a model_pb2.DetectionModel config file.'
)
flags
.
DEFINE_boolean
(
'run_once'
,
False
,
'Option to only run a single pass of '
'evaluation. Overrides the `max_evals` parameter in the '
'provided config.'
)
flags
.
DEFINE_boolean
(
'run_once'
,
False
,
'Option to only run a single pass of '
'evaluation. Overrides the `max_evals` parameter in the '
'provided config.'
)
FLAGS
=
flags
.
FLAGS
...
...
@@ -88,9 +88,10 @@ def main(unused_argv):
if
FLAGS
.
pipeline_config_path
:
configs
=
config_util
.
get_configs_from_pipeline_file
(
FLAGS
.
pipeline_config_path
)
tf
.
gfile
.
Copy
(
FLAGS
.
pipeline_config_path
,
os
.
path
.
join
(
FLAGS
.
eval_dir
,
'pipeline.config'
),
overwrite
=
True
)
tf
.
gfile
.
Copy
(
FLAGS
.
pipeline_config_path
,
os
.
path
.
join
(
FLAGS
.
eval_dir
,
'pipeline.config'
),
overwrite
=
True
)
else
:
configs
=
config_util
.
get_configs_from_multiple_files
(
model_config_path
=
FLAGS
.
model_config_path
,
...
...
@@ -99,9 +100,7 @@ def main(unused_argv):
for
name
,
config
in
[(
'model.config'
,
FLAGS
.
model_config_path
),
(
'eval.config'
,
FLAGS
.
eval_config_path
),
(
'input.config'
,
FLAGS
.
input_config_path
)]:
tf
.
gfile
.
Copy
(
config
,
os
.
path
.
join
(
FLAGS
.
eval_dir
,
name
),
overwrite
=
True
)
tf
.
gfile
.
Copy
(
config
,
os
.
path
.
join
(
FLAGS
.
eval_dir
,
name
),
overwrite
=
True
)
model_config
=
configs
[
'model'
]
eval_config
=
configs
[
'eval_config'
]
...
...
@@ -110,9 +109,7 @@ def main(unused_argv):
input_config
=
configs
[
'train_input_config'
]
model_fn
=
functools
.
partial
(
model_builder
.
build
,
model_config
=
model_config
,
is_training
=
False
)
model_builder
.
build
,
model_config
=
model_config
,
is_training
=
False
)
def
get_next
(
config
):
return
dataset_builder
.
make_initializable_iterator
(
...
...
@@ -120,10 +117,8 @@ def main(unused_argv):
create_input_dict_fn
=
functools
.
partial
(
get_next
,
input_config
)
label_map
=
label_map_util
.
load_labelmap
(
input_config
.
label_map_path
)
max_num_classes
=
max
([
item
.
id
for
item
in
label_map
.
item
])
categories
=
label_map_util
.
convert_label_map_to_categories
(
label_map
,
max_num_classes
)
categories
=
label_map_util
.
create_categories_from_labelmap
(
input_config
.
label_map_path
)
if
FLAGS
.
run_once
:
eval_config
.
max_evals
=
1
...
...
research/object_detection/legacy/evaluator.py
View file @
27b4acd4
...
...
@@ -273,6 +273,7 @@ def evaluate(create_input_dict_fn, create_model_fn, eval_config, categories,
master
=
eval_config
.
eval_master
,
save_graph
=
eval_config
.
save_graph
,
save_graph_dir
=
(
eval_dir
if
eval_config
.
save_graph
else
''
),
losses_dict
=
losses_dict
)
losses_dict
=
losses_dict
,
eval_export_path
=
eval_config
.
export_path
)
return
metrics
research/object_detection/matchers/argmax_matcher.py
View file @
27b4acd4
...
...
@@ -99,17 +99,19 @@ class ArgMaxMatcher(matcher.Matcher):
if
self
.
_unmatched_threshold
==
self
.
_matched_threshold
:
raise
ValueError
(
'When negatives are in between matched and '
'unmatched thresholds, these cannot be of equal '
'value. matched: %s, unmatched: %s'
,
self
.
_matched_threshold
,
self
.
_unmatched_threshold
)
'value. matched: {}, unmatched: {}'
.
format
(
self
.
_matched_threshold
,
self
.
_unmatched_threshold
))
self
.
_force_match_for_each_row
=
force_match_for_each_row
self
.
_negatives_lower_than_unmatched
=
negatives_lower_than_unmatched
def
_match
(
self
,
similarity_matrix
):
def
_match
(
self
,
similarity_matrix
,
valid_rows
):
"""Tries to match each column of the similarity matrix to a row.
Args:
similarity_matrix: tensor of shape [N, M] representing any similarity
metric.
valid_rows: a boolean tensor of shape [N] indicating valid rows.
Returns:
Match object with corresponding matches for each of M columns.
...
...
@@ -167,8 +169,10 @@ class ArgMaxMatcher(matcher.Matcher):
similarity_matrix
)
force_match_column_ids
=
tf
.
argmax
(
similarity_matrix
,
1
,
output_type
=
tf
.
int32
)
force_match_column_indicators
=
tf
.
one_hot
(
force_match_column_ids
,
depth
=
similarity_matrix_shape
[
1
])
force_match_column_indicators
=
(
tf
.
one_hot
(
force_match_column_ids
,
depth
=
similarity_matrix_shape
[
1
])
*
tf
.
cast
(
tf
.
expand_dims
(
valid_rows
,
axis
=-
1
),
dtype
=
tf
.
float32
))
force_match_row_ids
=
tf
.
argmax
(
force_match_column_indicators
,
0
,
output_type
=
tf
.
int32
)
force_match_column_mask
=
tf
.
cast
(
...
...
research/object_detection/matchers/argmax_matcher_test.py
View file @
27b4acd4
...
...
@@ -182,6 +182,34 @@ class ArgMaxMatcherTest(test_case.TestCase):
self
.
assertAllEqual
(
np
.
nonzero
(
res_unmatched_cols
)[
0
],
expected_unmatched_cols
)
def
test_return_correct_matches_using_force_match_padded_groundtruth
(
self
):
def
graph_fn
(
similarity
,
valid_rows
):
matcher
=
argmax_matcher
.
ArgMaxMatcher
(
matched_threshold
=
3.
,
unmatched_threshold
=
2.
,
force_match_for_each_row
=
True
)
match
=
matcher
.
match
(
similarity
,
valid_rows
)
matched_cols
=
match
.
matched_column_indicator
()
unmatched_cols
=
match
.
unmatched_column_indicator
()
match_results
=
match
.
match_results
return
(
matched_cols
,
unmatched_cols
,
match_results
)
similarity
=
np
.
array
([[
1
,
1
,
1
,
3
,
1
],
[
-
1
,
0
,
-
2
,
-
2
,
-
1
],
[
0
,
0
,
0
,
0
,
0
],
[
3
,
0
,
-
1
,
2
,
0
],
[
0
,
0
,
0
,
0
,
0
]],
dtype
=
np
.
float32
)
valid_rows
=
np
.
array
([
True
,
True
,
False
,
True
,
False
])
expected_matched_cols
=
np
.
array
([
0
,
1
,
3
])
expected_matched_rows
=
np
.
array
([
3
,
1
,
0
])
expected_unmatched_cols
=
np
.
array
([
2
,
4
])
# col 2 has too high max val
(
res_matched_cols
,
res_unmatched_cols
,
match_results
)
=
self
.
execute
(
graph_fn
,
[
similarity
,
valid_rows
])
self
.
assertAllEqual
(
match_results
[
res_matched_cols
],
expected_matched_rows
)
self
.
assertAllEqual
(
np
.
nonzero
(
res_matched_cols
)[
0
],
expected_matched_cols
)
self
.
assertAllEqual
(
np
.
nonzero
(
res_unmatched_cols
)[
0
],
expected_unmatched_cols
)
def
test_valid_arguments_corner_case
(
self
):
argmax_matcher
.
ArgMaxMatcher
(
matched_threshold
=
1
,
unmatched_threshold
=
1
)
...
...
research/object_detection/matchers/bipartite_matcher.py
View file @
27b4acd4
...
...
@@ -35,7 +35,7 @@ class GreedyBipartiteMatcher(matcher.Matcher):
super
(
GreedyBipartiteMatcher
,
self
).
__init__
(
use_matmul_gather
=
use_matmul_gather
)
def
_match
(
self
,
similarity_matrix
,
num_
valid_rows
=-
1
):
def
_match
(
self
,
similarity_matrix
,
valid_rows
):
"""Bipartite matches a collection rows and columns. A greedy bi-partite.
TODO(rathodv): Add num_valid_columns options to match only that many columns
...
...
@@ -44,21 +44,27 @@ class GreedyBipartiteMatcher(matcher.Matcher):
Args:
similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
where higher values mean more similar.
num_valid_rows: A scalar or a 1-D tensor with one element describing the
number of valid rows of similarity_matrix to consider for the bipartite
matching. If set to be negative, then all rows from similarity_matrix
are used.
valid_rows: A boolean tensor of shape [N] indicating the rows that are
valid.
Returns:
match_results: int32 tensor of shape [M] with match_results[i]=-1
meaning that column i is not matched and otherwise that it is matched to
row match_results[i].
"""
valid_row_sim_matrix
=
tf
.
gather
(
similarity_matrix
,
tf
.
squeeze
(
tf
.
where
(
valid_rows
),
axis
=-
1
))
invalid_row_sim_matrix
=
tf
.
gather
(
similarity_matrix
,
tf
.
squeeze
(
tf
.
where
(
tf
.
logical_not
(
valid_rows
)),
axis
=-
1
))
similarity_matrix
=
tf
.
concat
(
[
valid_row_sim_matrix
,
invalid_row_sim_matrix
],
axis
=
0
)
# Convert similarity matrix to distance matrix as tf.image.bipartite tries
# to find minimum distance matches.
distance_matrix
=
-
1
*
similarity_matrix
num_valid_rows
=
tf
.
reduce_sum
(
tf
.
to_float
(
valid_rows
))
_
,
match_results
=
image_ops
.
bipartite_match
(
distance_matrix
,
num_valid_rows
)
distance_matrix
,
num_valid_rows
=
num_valid_rows
)
match_results
=
tf
.
reshape
(
match_results
,
[
-
1
])
match_results
=
tf
.
cast
(
match_results
,
tf
.
int32
)
return
match_results
research/object_detection/matchers/bipartite_matcher_test.py
View file @
27b4acd4
...
...
@@ -24,44 +24,54 @@ class GreedyBipartiteMatcherTest(tf.test.TestCase):
def
test_get_expected_matches_when_all_rows_are_valid
(
self
):
similarity_matrix
=
tf
.
constant
([[
0.50
,
0.1
,
0.8
],
[
0.15
,
0.2
,
0.3
]])
num_
valid_rows
=
2
valid_rows
=
tf
.
ones
([
2
],
dtype
=
tf
.
bool
)
expected_match_results
=
[
-
1
,
1
,
0
]
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
match
=
matcher
.
match
(
similarity_matrix
,
num_
valid_rows
=
num_
valid_rows
)
match
=
matcher
.
match
(
similarity_matrix
,
valid_rows
=
valid_rows
)
with
self
.
test_session
()
as
sess
:
match_results_out
=
sess
.
run
(
match
.
_match_results
)
self
.
assertAllEqual
(
match_results_out
,
expected_match_results
)
def
test_get_expected_matches_with_
v
al
id
_rows_
set_to_minus_one
(
self
):
def
test_get_expected_matches_with_al
l
_rows_
be_default
(
self
):
similarity_matrix
=
tf
.
constant
([[
0.50
,
0.1
,
0.8
],
[
0.15
,
0.2
,
0.3
]])
num_valid_rows
=
-
1
expected_match_results
=
[
-
1
,
1
,
0
]
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
match
=
matcher
.
match
(
similarity_matrix
,
num_valid_rows
=
num_valid_rows
)
match
=
matcher
.
match
(
similarity_matrix
)
with
self
.
test_session
()
as
sess
:
match_results_out
=
sess
.
run
(
match
.
_match_results
)
self
.
assertAllEqual
(
match_results_out
,
expected_match_results
)
def
test_get_no_matches_with_zero_valid_rows
(
self
):
similarity_matrix
=
tf
.
constant
([[
0.50
,
0.1
,
0.8
],
[
0.15
,
0.2
,
0.3
]])
num_
valid_rows
=
0
valid_rows
=
tf
.
zeros
([
2
],
dtype
=
tf
.
bool
)
expected_match_results
=
[
-
1
,
-
1
,
-
1
]
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
match
=
matcher
.
match
(
similarity_matrix
,
num_valid_rows
=
num_
valid_rows
)
match
=
matcher
.
match
(
similarity_matrix
,
valid_rows
)
with
self
.
test_session
()
as
sess
:
match_results_out
=
sess
.
run
(
match
.
_match_results
)
self
.
assertAllEqual
(
match_results_out
,
expected_match_results
)
def
test_get_expected_matches_with_only_one_valid_row
(
self
):
similarity_matrix
=
tf
.
constant
([[
0.50
,
0.1
,
0.8
],
[
0.15
,
0.2
,
0.3
]])
num_
valid_rows
=
1
valid_rows
=
tf
.
constant
([
True
,
False
],
dtype
=
tf
.
bool
)
expected_match_results
=
[
-
1
,
-
1
,
0
]
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
match
=
matcher
.
match
(
similarity_matrix
,
num_valid_rows
=
num_valid_rows
)
match
=
matcher
.
match
(
similarity_matrix
,
valid_rows
)
with
self
.
test_session
()
as
sess
:
match_results_out
=
sess
.
run
(
match
.
_match_results
)
self
.
assertAllEqual
(
match_results_out
,
expected_match_results
)
def
test_get_expected_matches_with_only_one_valid_row_at_bottom
(
self
):
similarity_matrix
=
tf
.
constant
([[
0.15
,
0.2
,
0.3
],
[
0.50
,
0.1
,
0.8
]])
valid_rows
=
tf
.
constant
([
False
,
True
],
dtype
=
tf
.
bool
)
expected_match_results
=
[
-
1
,
-
1
,
0
]
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
match
=
matcher
.
match
(
similarity_matrix
,
valid_rows
)
with
self
.
test_session
()
as
sess
:
match_results_out
=
sess
.
run
(
match
.
_match_results
)
self
.
assertAllEqual
(
match_results_out
,
expected_match_results
)
...
...
research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
View file @
27b4acd4
...
...
@@ -103,7 +103,6 @@ from object_detection.core import box_list_ops
from
object_detection.core
import
box_predictor
from
object_detection.core
import
losses
from
object_detection.core
import
model
from
object_detection.core
import
post_processing
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.core
import
target_assigner
from
object_detection.utils
import
ops
...
...
@@ -234,11 +233,11 @@ class FasterRCNNMetaArch(model.DetectionModel):
first_stage_box_predictor_depth
,
first_stage_minibatch_size
,
first_stage_sampler
,
first_stage_nms_score_threshold
,
first_stage_nms_iou_threshold
,
first_stage_non_max_suppression_fn
,
first_stage_max_proposals
,
first_stage_localization_loss_weight
,
first_stage_objectness_loss_weight
,
crop_and_resize_fn
,
initial_crop_size
,
maxpool_kernel_size
,
maxpool_stride
,
...
...
@@ -255,8 +254,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
hard_example_miner
=
None
,
parallel_iterations
=
16
,
add_summaries
=
True
,
use_matmul_crop_and_resize
=
False
,
clip_anchors_to_image
=
False
):
clip_anchors_to_image
=
False
,
use_static_shapes
=
False
,
resize_masks
=
True
):
"""FasterRCNNMetaArch Constructor.
Args:
...
...
@@ -309,18 +309,22 @@ class FasterRCNNMetaArch(model.DetectionModel):
to the loss function for any given image within the image batch and is
only called "batch_size" due to terminology from the Faster R-CNN paper.
first_stage_sampler: Sampler to use for first stage loss (RPN loss).
first_stage_nms_score_threshold: Score threshold for non max suppression
for the Region Proposal Network (RPN). This value is expected to be in
[0, 1] as it is applied directly after a softmax transformation. The
recommended value for Faster R-CNN is 0.
first_stage_nms_iou_threshold: The Intersection Over Union (IOU) threshold
for performing Non-Max Suppression (NMS) on the boxes predicted by the
Region Proposal Network (RPN).
first_stage_non_max_suppression_fn: batch_multiclass_non_max_suppression
callable that takes `boxes`, `scores` and optional `clip_window`(with
all other inputs already set) and returns a dictionary containing
tensors with keys: `detection_boxes`, `detection_scores`,
`detection_classes`, `num_detections`. This is used to perform non max
suppression on the boxes predicted by the Region Proposal Network
(RPN).
See `post_processing.batch_multiclass_non_max_suppression` for the type
and shape of these tensors.
first_stage_max_proposals: Maximum number of boxes to retain after
performing Non-Max Suppression (NMS) on the boxes predicted by the
Region Proposal Network (RPN).
first_stage_localization_loss_weight: A float
first_stage_objectness_loss_weight: A float
crop_and_resize_fn: A differentiable resampler to use for cropping RPN
proposal features.
initial_crop_size: A single integer indicating the output size
(width and height are set to be the same) of the initial bilinear
interpolation based cropping during ROI pooling.
...
...
@@ -367,12 +371,13 @@ class FasterRCNNMetaArch(model.DetectionModel):
in parallel for calls to tf.map_fn.
add_summaries: boolean (default: True) controlling whether summary ops
should be added to tensorflow graph.
use_matmul_crop_and_resize: Force the use of matrix multiplication based
crop and resize instead of standard tf.image.crop_and_resize while
computing second stage input feature maps.
clip_anchors_to_image: Normally, anchors generated for a given image size
are pruned during training if they lie outside the image window. This
option clips the anchors to be within the image instead of pruning.
are pruned during training if they lie outside the image window. This
option clips the anchors to be within the image instead of pruning.
use_static_shapes: If True, uses implementation of ops with static shape
guarantees.
resize_masks: Indicates whether the masks presend in the groundtruth
should be resized in the model with `image_resizer_fn`
Raises:
ValueError: If `second_stage_batch_size` > `first_stage_max_proposals` at
...
...
@@ -384,9 +389,6 @@ class FasterRCNNMetaArch(model.DetectionModel):
# in the future.
super
(
FasterRCNNMetaArch
,
self
).
__init__
(
num_classes
=
num_classes
)
if
is_training
and
second_stage_batch_size
>
first_stage_max_proposals
:
raise
ValueError
(
'second_stage_batch_size should be no greater than '
'first_stage_max_proposals.'
)
if
not
isinstance
(
first_stage_anchor_generator
,
grid_anchor_generator
.
GridAnchorGenerator
):
raise
ValueError
(
'first_stage_anchor_generator must be of type '
...
...
@@ -394,6 +396,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
self
.
_is_training
=
is_training
self
.
_image_resizer_fn
=
image_resizer_fn
self
.
_resize_masks
=
resize_masks
self
.
_feature_extractor
=
feature_extractor
self
.
_number_of_stages
=
number_of_stages
...
...
@@ -425,9 +428,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
min_depth
=
0
,
max_depth
=
0
))
self
.
_first_stage_nms_score_threshold
=
first_stage_nms_score_threshold
self
.
_first_stage_nms_iou_threshold
=
first_stage_nms_iou_threshold
self
.
_first_stage_nms_fn
=
first_stage_non_max_suppression_fn
self
.
_first_stage_max_proposals
=
first_stage_max_proposals
self
.
_use_static_shapes
=
use_static_shapes
self
.
_first_stage_localization_loss
=
(
losses
.
WeightedSmoothL1LocalizationLoss
())
...
...
@@ -437,6 +440,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
self
.
_first_stage_obj_loss_weight
=
first_stage_objectness_loss_weight
# Per-region cropping parameters
self
.
_crop_and_resize_fn
=
crop_and_resize_fn
self
.
_initial_crop_size
=
initial_crop_size
self
.
_maxpool_kernel_size
=
maxpool_kernel_size
self
.
_maxpool_stride
=
maxpool_stride
...
...
@@ -458,7 +462,6 @@ class FasterRCNNMetaArch(model.DetectionModel):
self
.
_second_stage_cls_loss_weight
=
second_stage_classification_loss_weight
self
.
_second_stage_mask_loss_weight
=
(
second_stage_mask_prediction_loss_weight
)
self
.
_use_matmul_crop_and_resize
=
use_matmul_crop_and_resize
self
.
_hard_example_miner
=
hard_example_miner
self
.
_parallel_iterations
=
parallel_iterations
...
...
@@ -673,9 +676,13 @@ class FasterRCNNMetaArch(model.DetectionModel):
}
if
self
.
_number_of_stages
>=
2
:
# If mixed-precision training on TPU is enabled, rpn_box_encodings and
# rpn_objectness_predictions_with_background are bfloat16 tensors.
# Considered prediction results, they need to be casted to float32
# tensors for correct postprocess_rpn computation in predict_second_stage.
prediction_dict
.
update
(
self
.
_predict_second_stage
(
rpn_box_encodings
,
rpn_objectness_predictions_with_background
,
tf
.
to_float
(
rpn_box_encodings
)
,
tf
.
to_float
(
rpn_objectness_predictions_with_background
)
,
rpn_features_to_crop
,
self
.
_anchors
.
get
(),
image_shape
,
true_image_shapes
))
...
...
@@ -719,7 +726,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
[batch_size, num_valid_anchors, 2] containing class
predictions (logits) for each of the anchors. Note that this
tensor *includes* background class predictions (at class index 0).
rpn_features_to_crop: A 4-D float32 tensor with shape
rpn_features_to_crop: A 4-D float32
or bfloat16
tensor with shape
[batch_size, height, width, depth] representing image features to crop
using the proposal boxes predicted by the RPN.
anchors: 2-D float tensor of shape
...
...
@@ -758,17 +765,22 @@ class FasterRCNNMetaArch(model.DetectionModel):
boxes proposed by the RPN, thus enabling one to extract features and
get box classification and prediction for externally selected areas
of the image.
6) box_classifier_features: a 4-D float32
tensor representing the
features for each proposal.
6) box_classifier_features: a 4-D float32
or bfloat16 tensor
representing the
features for each proposal.
"""
image_shape_2d
=
self
.
_image_batch_shape_2d
(
image_shape
)
proposal_boxes_normalized
,
_
,
num_proposals
=
self
.
_postprocess_rpn
(
rpn_box_encodings
,
rpn_objectness_predictions_with_background
,
anchors
,
image_shape_2d
,
true_image_shapes
)
# If mixed-precision training on TPU is enabled, the dtype of
# rpn_features_to_crop is bfloat16, otherwise it is float32. tf.cast is
# used to match the dtype of proposal_boxes_normalized to that of
# rpn_features_to_crop for further computation.
flattened_proposal_feature_maps
=
(
self
.
_compute_second_stage_input_feature_maps
(
rpn_features_to_crop
,
proposal_boxes_normalized
))
rpn_features_to_crop
,
tf
.
cast
(
proposal_boxes_normalized
,
rpn_features_to_crop
.
dtype
)))
box_classifier_features
=
(
self
.
_feature_extractor
.
extract_box_classifier_features
(
...
...
@@ -956,8 +968,11 @@ class FasterRCNNMetaArch(model.DetectionModel):
image_shape: A 1-D tensor representing the input image shape.
"""
image_shape
=
tf
.
shape
(
preprocessed_inputs
)
rpn_features_to_crop
,
_
=
self
.
_feature_extractor
.
extract_proposal_features
(
preprocessed_inputs
,
scope
=
self
.
first_stage_feature_extractor_scope
)
rpn_features_to_crop
,
self
.
endpoints
=
(
self
.
_feature_extractor
.
extract_proposal_features
(
preprocessed_inputs
,
scope
=
self
.
first_stage_feature_extractor_scope
))
feature_map_shape
=
tf
.
shape
(
rpn_features_to_crop
)
anchors
=
box_list_ops
.
concatenate
(
...
...
@@ -965,12 +980,15 @@ class FasterRCNNMetaArch(model.DetectionModel):
feature_map_shape
[
2
])]))
with
slim
.
arg_scope
(
self
.
_first_stage_box_predictor_arg_scope_fn
()):
kernel_size
=
self
.
_first_stage_box_predictor_kernel_size
reuse
=
tf
.
get_variable_scope
().
reuse
rpn_box_predictor_features
=
slim
.
conv2d
(
rpn_features_to_crop
,
self
.
_first_stage_box_predictor_depth
,
kernel_size
=
[
kernel_size
,
kernel_size
],
rate
=
self
.
_first_stage_atrous_rate
,
activation_fn
=
tf
.
nn
.
relu6
)
activation_fn
=
tf
.
nn
.
relu6
,
scope
=
'Conv'
,
reuse
=
reuse
)
return
(
rpn_box_predictor_features
,
rpn_features_to_crop
,
anchors
,
image_shape
)
...
...
@@ -1223,14 +1241,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
rpn_objectness_predictions_with_background_batch
)[:,
:,
1
]
clip_window
=
self
.
_compute_clip_window
(
image_shapes
)
(
proposal_boxes
,
proposal_scores
,
_
,
_
,
_
,
num_proposals
)
=
post_processing
.
batch_multiclass_non_max_suppressio
n
(
num_proposals
)
=
self
.
_first_stage_nms_f
n
(
tf
.
expand_dims
(
proposal_boxes
,
axis
=
2
),
tf
.
expand_dims
(
rpn_objectness_softmax_without_background
,
axis
=
2
),
self
.
_first_stage_nms_score_threshold
,
self
.
_first_stage_nms_iou_threshold
,
self
.
_first_stage_max_proposals
,
self
.
_first_stage_max_proposals
,
tf
.
expand_dims
(
rpn_objectness_softmax_without_background
,
axis
=
2
),
clip_window
=
clip_window
)
if
self
.
_is_training
:
proposal_boxes
=
tf
.
stop_gradient
(
proposal_boxes
)
...
...
@@ -1377,16 +1390,19 @@ class FasterRCNNMetaArch(model.DetectionModel):
groundtruth_masks_list
=
self
.
_groundtruth_lists
.
get
(
fields
.
BoxListFields
.
masks
)
if
groundtruth_masks_list
is
not
None
:
# TODO(rathodv): Remove mask resizing once the legacy pipeline is deleted.
if
groundtruth_masks_list
is
not
None
and
self
.
_resize_masks
:
resized_masks_list
=
[]
for
mask
in
groundtruth_masks_list
:
_
,
resized_mask
,
_
=
self
.
_image_resizer_fn
(
# Reuse the given `image_resizer_fn` to resize groundtruth masks.
# `mask` tensor for an image is of the shape [num_masks,
# image_height, image_width]. Below we create a dummy image of the
# the shape [image_height, image_width, 1] to use with
# `image_resizer_fn`.
image
=
tf
.
zeros
(
tf
.
stack
([
tf
.
shape
(
mask
)[
1
],
tf
.
shape
(
mask
)[
2
],
1
])),
image
=
tf
.
zeros
(
tf
.
stack
([
tf
.
shape
(
mask
)[
1
],
tf
.
shape
(
mask
)[
2
],
1
])),
masks
=
mask
)
resized_masks_list
.
append
(
resized_mask
)
...
...
@@ -1443,11 +1459,16 @@ class FasterRCNNMetaArch(model.DetectionModel):
tf
.
range
(
proposal_boxlist
.
num_boxes
())
<
num_valid_proposals
,
cls_weights
>
0
)
s
ampled_indice
s
=
self
.
_second_stage_sampler
.
subsample
(
s
elected_position
s
=
self
.
_second_stage_sampler
.
subsample
(
valid_indicator
,
self
.
_second_stage_batch_size
,
positive_indicator
)
return
box_list_ops
.
boolean_mask
(
proposal_boxlist
,
sampled_indices
)
return
box_list_ops
.
boolean_mask
(
proposal_boxlist
,
selected_positions
,
use_static_shapes
=
self
.
_use_static_shapes
,
indicator_sum
=
(
self
.
_second_stage_batch_size
if
self
.
_use_static_shapes
else
None
))
def
_compute_second_stage_input_feature_maps
(
self
,
features_to_crop
,
proposal_boxes_normalized
):
...
...
@@ -1467,35 +1488,10 @@ class FasterRCNNMetaArch(model.DetectionModel):
Returns:
A float32 tensor with shape [K, new_height, new_width, depth].
"""
def
get_box_inds
(
proposals
):
proposals_shape
=
proposals
.
get_shape
().
as_list
()
if
any
(
dim
is
None
for
dim
in
proposals_shape
):
proposals_shape
=
tf
.
shape
(
proposals
)
ones_mat
=
tf
.
ones
(
proposals_shape
[:
2
],
dtype
=
tf
.
int32
)
multiplier
=
tf
.
expand_dims
(
tf
.
range
(
start
=
0
,
limit
=
proposals_shape
[
0
]),
1
)
return
tf
.
reshape
(
ones_mat
*
multiplier
,
[
-
1
])
if
self
.
_use_matmul_crop_and_resize
:
def
_single_image_crop_and_resize
(
inputs
):
single_image_features_to_crop
,
proposal_boxes_normalized
=
inputs
return
ops
.
matmul_crop_and_resize
(
tf
.
expand_dims
(
single_image_features_to_crop
,
0
),
proposal_boxes_normalized
,
[
self
.
_initial_crop_size
,
self
.
_initial_crop_size
])
cropped_regions
=
self
.
_flatten_first_two_dimensions
(
shape_utils
.
static_or_dynamic_map_fn
(
_single_image_crop_and_resize
,
elems
=
[
features_to_crop
,
proposal_boxes_normalized
],
dtype
=
tf
.
float32
,
parallel_iterations
=
self
.
_parallel_iterations
))
else
:
cropped_regions
=
tf
.
image
.
crop_and_resize
(
features_to_crop
,
self
.
_flatten_first_two_dimensions
(
proposal_boxes_normalized
),
get_box_inds
(
proposal_boxes_normalized
),
(
self
.
_initial_crop_size
,
self
.
_initial_crop_size
))
cropped_regions
=
self
.
_flatten_first_two_dimensions
(
self
.
_crop_and_resize_fn
(
features_to_crop
,
proposal_boxes_normalized
,
[
self
.
_initial_crop_size
,
self
.
_initial_crop_size
]))
return
slim
.
max_pool2d
(
cropped_regions
,
[
self
.
_maxpool_kernel_size
,
self
.
_maxpool_kernel_size
],
...
...
@@ -1738,11 +1734,17 @@ class FasterRCNNMetaArch(model.DetectionModel):
sampled_reg_indices
=
tf
.
multiply
(
batch_sampled_indices
,
batch_reg_weights
)
losses_mask
=
None
if
self
.
groundtruth_has_field
(
fields
.
InputDataFields
.
is_annotated
):
losses_mask
=
tf
.
stack
(
self
.
groundtruth_lists
(
fields
.
InputDataFields
.
is_annotated
))
localization_losses
=
self
.
_first_stage_localization_loss
(
rpn_box_encodings
,
batch_reg_targets
,
weights
=
sampled_reg_indices
)
rpn_box_encodings
,
batch_reg_targets
,
weights
=
sampled_reg_indices
,
losses_mask
=
losses_mask
)
objectness_losses
=
self
.
_first_stage_objectness_loss
(
rpn_objectness_predictions_with_background
,
batch_one_hot_targets
,
weights
=
batch_sampled_indices
)
batch_one_hot_targets
,
weights
=
batch_sampled_indices
,
losses_mask
=
losses_mask
)
localization_loss
=
tf
.
reduce_mean
(
tf
.
reduce_sum
(
localization_losses
,
axis
=
1
)
/
normalizer
)
objectness_loss
=
tf
.
reduce_mean
(
...
...
@@ -1866,32 +1868,32 @@ class FasterRCNNMetaArch(model.DetectionModel):
# for just one class to avoid over-counting for regression loss and
# (optionally) mask loss.
else
:
# We only predict refined location encodings for the non background
# classes, but we now pad it to make it compatible with the class
# predictions
refined_box_encodings_with_background
=
tf
.
pad
(
refined_box_encodings
,
[[
0
,
0
],
[
1
,
0
],
[
0
,
0
]])
refined_box_encodings_masked_by_class_targets
=
tf
.
boolean_mask
(
refined_box_encodings_with_background
,
tf
.
greater
(
one_hot_flat_cls_targets_with_background
,
0
))
reshaped_refined_box_encodings
=
tf
.
reshape
(
refined_box_encodings_masked_by_class_targets
,
[
batch_size
,
self
.
max_num_proposals
,
self
.
_box_coder
.
code_size
])
reshaped_refined_box_encodings
=
(
self
.
_get_refined_encodings_for_postitive_class
(
refined_box_encodings
,
one_hot_flat_cls_targets_with_background
,
batch_size
))
losses_mask
=
None
if
self
.
groundtruth_has_field
(
fields
.
InputDataFields
.
is_annotated
):
losses_mask
=
tf
.
stack
(
self
.
groundtruth_lists
(
fields
.
InputDataFields
.
is_annotated
))
second_stage_loc_losses
=
self
.
_second_stage_localization_loss
(
reshaped_refined_box_encodings
,
batch_reg_targets
,
weights
=
batch_reg_weights
)
/
normalizer
batch_reg_targets
,
weights
=
batch_reg_weights
,
losses_mask
=
losses_mask
)
/
normalizer
second_stage_cls_losses
=
ops
.
reduce_sum_trailing_dimensions
(
self
.
_second_stage_classification_loss
(
class_predictions_with_background
,
batch_cls_targets_with_background
,
weights
=
batch_cls_weights
),
weights
=
batch_cls_weights
,
losses_mask
=
losses_mask
),
ndims
=
2
)
/
normalizer
second_stage_loc_loss
=
tf
.
reduce_sum
(
tf
.
boolean_mask
(
second_stage_loc_losses
,
paddings_indicator
))
second_stage_loc_losses
*
tf
.
to_float
(
paddings_indicator
))
second_stage_cls_loss
=
tf
.
reduce_sum
(
tf
.
boolean_mask
(
second_stage_cls_losses
,
paddings_indicator
))
second_stage_cls_losses
*
tf
.
to_float
(
paddings_indicator
))
if
self
.
_hard_example_miner
:
(
second_stage_loc_loss
,
second_stage_cls_loss
...
...
@@ -1954,10 +1956,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
box_list
.
BoxList
(
tf
.
reshape
(
proposal_boxes
,
[
-
1
,
4
])),
image_shape
[
1
],
image_shape
[
2
]).
get
()
flat_cropped_gt_mask
=
tf
.
image
.
crop_and_resize
(
flat_cropped_gt_mask
=
self
.
_
crop_and_resize
_fn
(
tf
.
expand_dims
(
flat_gt_masks
,
-
1
),
flat_normalized_proposals
,
tf
.
range
(
flat_normalized_proposals
.
shape
[
0
].
value
),
tf
.
expand_dims
(
flat_normalized_proposals
,
axis
=
1
),
[
mask_height
,
mask_width
])
batch_cropped_gt_mask
=
tf
.
reshape
(
...
...
@@ -1968,14 +1969,16 @@ class FasterRCNNMetaArch(model.DetectionModel):
self
.
_second_stage_mask_loss
(
reshaped_prediction_masks
,
batch_cropped_gt_mask
,
weights
=
batch_mask_target_weights
),
weights
=
batch_mask_target_weights
,
losses_mask
=
losses_mask
),
ndims
=
2
)
/
(
mask_height
*
mask_width
*
tf
.
maximum
(
tf
.
reduce_sum
(
batch_mask_target_weights
,
axis
=
1
,
keep_dims
=
True
),
tf
.
ones
((
batch_size
,
1
))))
second_stage_mask_loss
=
tf
.
reduce_sum
(
tf
.
boolean_mask
(
second_stage_mask_losses
,
paddings_indicator
))
tf
.
where
(
paddings_indicator
,
second_stage_mask_losses
,
tf
.
zeros_like
(
second_stage_mask_losses
)))
if
second_stage_mask_loss
is
not
None
:
mask_loss
=
tf
.
multiply
(
self
.
_second_stage_mask_loss_weight
,
...
...
@@ -1983,6 +1986,29 @@ class FasterRCNNMetaArch(model.DetectionModel):
loss_dict
[
mask_loss
.
op
.
name
]
=
mask_loss
return
loss_dict
def
_get_refined_encodings_for_postitive_class
(
self
,
refined_box_encodings
,
flat_cls_targets_with_background
,
batch_size
):
# We only predict refined location encodings for the non background
# classes, but we now pad it to make it compatible with the class
# predictions
refined_box_encodings_with_background
=
tf
.
pad
(
refined_box_encodings
,
[[
0
,
0
],
[
1
,
0
],
[
0
,
0
]])
refined_box_encodings_masked_by_class_targets
=
(
box_list_ops
.
boolean_mask
(
box_list
.
BoxList
(
tf
.
reshape
(
refined_box_encodings_with_background
,
[
-
1
,
self
.
_box_coder
.
code_size
])),
tf
.
reshape
(
tf
.
greater
(
flat_cls_targets_with_background
,
0
),
[
-
1
]),
use_static_shapes
=
self
.
_use_static_shapes
,
indicator_sum
=
batch_size
*
self
.
max_num_proposals
if
self
.
_use_static_shapes
else
None
).
get
())
return
tf
.
reshape
(
refined_box_encodings_masked_by_class_targets
,
[
batch_size
,
self
.
max_num_proposals
,
self
.
_box_coder
.
code_size
])
def
_padded_batched_proposals_indicator
(
self
,
num_proposals
,
max_num_proposals
):
...
...
Prev
1
…
5
6
7
8
9
10
11
12
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment