Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
27b4acd4
Commit
27b4acd4
authored
Sep 25, 2018
by
Aman Gupta
Browse files
Merge remote-tracking branch 'upstream/master'
parents
5133522f
d4e1f97f
Changes
240
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
15202 additions
and
576 deletions
+15202
-576
research/object_detection/core/target_assigner.py
research/object_detection/core/target_assigner.py
+8
-6
research/object_detection/core/target_assigner_test.py
research/object_detection/core/target_assigner_test.py
+2
-4
research/object_detection/data/fgvc_2854_classes_label_map.pbtxt
...h/object_detection/data/fgvc_2854_classes_label_map.pbtxt
+14270
-0
research/object_detection/data_decoders/tf_example_decoder.py
...arch/object_detection/data_decoders/tf_example_decoder.py
+53
-36
research/object_detection/data_decoders/tf_example_decoder_test.py
...object_detection/data_decoders/tf_example_decoder_test.py
+321
-296
research/object_detection/dataset_tools/create_coco_tf_record.py
...h/object_detection/dataset_tools/create_coco_tf_record.py
+2
-2
research/object_detection/dataset_tools/create_coco_tf_record_test.py
...ect_detection/dataset_tools/create_coco_tf_record_test.py
+6
-0
research/object_detection/eval_util.py
research/object_detection/eval_util.py
+55
-31
research/object_detection/g3doc/detection_model_zoo.md
research/object_detection/g3doc/detection_model_zoo.md
+13
-7
research/object_detection/g3doc/running_locally.md
research/object_detection/g3doc/running_locally.md
+2
-2
research/object_detection/g3doc/running_pets.md
research/object_detection/g3doc/running_pets.md
+1
-1
research/object_detection/inputs.py
research/object_detection/inputs.py
+80
-13
research/object_detection/inputs_test.py
research/object_detection/inputs_test.py
+177
-36
research/object_detection/legacy/eval.py
research/object_detection/legacy/eval.py
+21
-26
research/object_detection/legacy/evaluator.py
research/object_detection/legacy/evaluator.py
+2
-1
research/object_detection/matchers/argmax_matcher.py
research/object_detection/matchers/argmax_matcher.py
+9
-5
research/object_detection/matchers/argmax_matcher_test.py
research/object_detection/matchers/argmax_matcher_test.py
+28
-0
research/object_detection/matchers/bipartite_matcher.py
research/object_detection/matchers/bipartite_matcher.py
+12
-6
research/object_detection/matchers/bipartite_matcher_test.py
research/object_detection/matchers/bipartite_matcher_test.py
+19
-9
research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
...ect_detection/meta_architectures/faster_rcnn_meta_arch.py
+121
-95
No files found.
research/object_detection/core/target_assigner.py
View file @
27b4acd4
...
@@ -93,8 +93,7 @@ class TargetAssigner(object):
...
@@ -93,8 +93,7 @@ class TargetAssigner(object):
groundtruth_boxes
,
groundtruth_boxes
,
groundtruth_labels
=
None
,
groundtruth_labels
=
None
,
unmatched_class_label
=
None
,
unmatched_class_label
=
None
,
groundtruth_weights
=
None
,
groundtruth_weights
=
None
):
**
params
):
"""Assign classification and regression targets to each anchor.
"""Assign classification and regression targets to each anchor.
For a given set of anchors and groundtruth detections, match anchors
For a given set of anchors and groundtruth detections, match anchors
...
@@ -121,9 +120,11 @@ class TargetAssigner(object):
...
@@ -121,9 +120,11 @@ class TargetAssigner(object):
If set to None, unmatched_cls_target is set to be [0] for each anchor.
If set to None, unmatched_cls_target is set to be [0] for each anchor.
groundtruth_weights: a float tensor of shape [M] indicating the weight to
groundtruth_weights: a float tensor of shape [M] indicating the weight to
assign to all anchors match to a particular groundtruth box. The weights
assign to all anchors match to a particular groundtruth box. The weights
must be in [0., 1.]. If None, all weights are set to 1.
must be in [0., 1.]. If None, all weights are set to 1. Generally no
**params: Additional keyword arguments for specific implementations of
groundtruth boxes with zero weight match to any anchors as matchers are
the Matcher.
aware of groundtruth weights. Additionally, `cls_weights` and
`reg_weights` are calculated using groundtruth weights as an added
safety.
Returns:
Returns:
cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
...
@@ -177,7 +178,8 @@ class TargetAssigner(object):
...
@@ -177,7 +178,8 @@ class TargetAssigner(object):
[
unmatched_shape_assert
,
labels_and_box_shapes_assert
]):
[
unmatched_shape_assert
,
labels_and_box_shapes_assert
]):
match_quality_matrix
=
self
.
_similarity_calc
.
compare
(
groundtruth_boxes
,
match_quality_matrix
=
self
.
_similarity_calc
.
compare
(
groundtruth_boxes
,
anchors
)
anchors
)
match
=
self
.
_matcher
.
match
(
match_quality_matrix
,
**
params
)
match
=
self
.
_matcher
.
match
(
match_quality_matrix
,
valid_rows
=
tf
.
greater
(
groundtruth_weights
,
0
))
reg_targets
=
self
.
_create_regression_targets
(
anchors
,
reg_targets
=
self
.
_create_regression_targets
(
anchors
,
groundtruth_boxes
,
groundtruth_boxes
,
match
)
match
)
...
...
research/object_detection/core/target_assigner_test.py
View file @
27b4acd4
...
@@ -495,8 +495,7 @@ class TargetAssignerTest(test_case.TestCase):
...
@@ -495,8 +495,7 @@ class TargetAssignerTest(test_case.TestCase):
priors
,
priors
,
boxes
,
boxes
,
groundtruth_labels
,
groundtruth_labels
,
unmatched_class_label
=
unmatched_class_label
,
unmatched_class_label
=
unmatched_class_label
)
num_valid_rows
=
3
)
def
test_raises_error_on_invalid_groundtruth_labels
(
self
):
def
test_raises_error_on_invalid_groundtruth_labels
(
self
):
similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
...
@@ -520,8 +519,7 @@ class TargetAssignerTest(test_case.TestCase):
...
@@ -520,8 +519,7 @@ class TargetAssignerTest(test_case.TestCase):
priors
,
priors
,
boxes
,
boxes
,
groundtruth_labels
,
groundtruth_labels
,
unmatched_class_label
=
unmatched_class_label
,
unmatched_class_label
=
unmatched_class_label
)
num_valid_rows
=
3
)
class
BatchTargetAssignerTest
(
test_case
.
TestCase
):
class
BatchTargetAssignerTest
(
test_case
.
TestCase
):
...
...
research/object_detection/data/fgvc_2854_classes_label_map.pbtxt
0 → 100644
View file @
27b4acd4
This source diff could not be displayed because it is too large. You can
view the blob
instead.
research/object_detection/data_decoders/tf_example_decoder.py
View file @
27b4acd4
...
@@ -19,9 +19,6 @@ protos for object detection.
...
@@ -19,9 +19,6 @@ protos for object detection.
"""
"""
import
tensorflow
as
tf
import
tensorflow
as
tf
from
tensorflow.python.ops
import
array_ops
from
tensorflow.python.ops
import
control_flow_ops
from
tensorflow.python.ops
import
math_ops
from
object_detection.core
import
data_decoder
from
object_detection.core
import
data_decoder
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.protos
import
input_reader_pb2
from
object_detection.protos
import
input_reader_pb2
...
@@ -30,14 +27,12 @@ from object_detection.utils import label_map_util
...
@@ -30,14 +27,12 @@ from object_detection.utils import label_map_util
slim_example_decoder
=
tf
.
contrib
.
slim
.
tfexample_decoder
slim_example_decoder
=
tf
.
contrib
.
slim
.
tfexample_decoder
# TODO(lzc): keep LookupTensor and BackupHandler in sync with
class
_ClassTensorHandler
(
slim_example_decoder
.
Tensor
):
# tf.contrib.slim.tfexample_decoder version.
"""An ItemHandler to fetch class ids from class text."""
class
LookupTensor
(
slim_example_decoder
.
Tensor
):
"""An ItemHandler that returns a parsed Tensor, the result of a lookup."""
def
__init__
(
self
,
def
__init__
(
self
,
tensor_key
,
tensor_key
,
t
able
,
l
ab
el_map_proto_fi
le
,
shape_keys
=
None
,
shape_keys
=
None
,
shape
=
None
,
shape
=
None
,
default_value
=
''
):
default_value
=
''
):
...
@@ -47,7 +42,8 @@ class LookupTensor(slim_example_decoder.Tensor):
...
@@ -47,7 +42,8 @@ class LookupTensor(slim_example_decoder.Tensor):
Args:
Args:
tensor_key: the name of the `TFExample` feature to read the tensor from.
tensor_key: the name of the `TFExample` feature to read the tensor from.
table: A tf.lookup table.
label_map_proto_file: File path to a text format LabelMapProto message
mapping class text to id.
shape_keys: Optional name or list of names of the TF-Example feature in
shape_keys: Optional name or list of names of the TF-Example feature in
which the tensor shape is stored. If a list, then each corresponds to
which the tensor shape is stored. If a list, then each corresponds to
one dimension of the shape.
one dimension of the shape.
...
@@ -59,16 +55,39 @@ class LookupTensor(slim_example_decoder.Tensor):
...
@@ -59,16 +55,39 @@ class LookupTensor(slim_example_decoder.Tensor):
Raises:
Raises:
ValueError: if both `shape_keys` and `shape` are specified.
ValueError: if both `shape_keys` and `shape` are specified.
"""
"""
self
.
_table
=
table
name_to_id
=
label_map_util
.
get_label_map_dict
(
super
(
LookupTensor
,
self
).
__init__
(
tensor_key
,
shape_keys
,
shape
,
label_map_proto_file
,
use_display_name
=
False
)
default_value
)
# We use a default_value of -1, but we expect all labels to be contained
# in the label map.
name_to_id_table
=
tf
.
contrib
.
lookup
.
HashTable
(
initializer
=
tf
.
contrib
.
lookup
.
KeyValueTensorInitializer
(
keys
=
tf
.
constant
(
list
(
name_to_id
.
keys
())),
values
=
tf
.
constant
(
list
(
name_to_id
.
values
()),
dtype
=
tf
.
int64
)),
default_value
=-
1
)
display_name_to_id
=
label_map_util
.
get_label_map_dict
(
label_map_proto_file
,
use_display_name
=
True
)
# We use a default_value of -1, but we expect all labels to be contained
# in the label map.
display_name_to_id_table
=
tf
.
contrib
.
lookup
.
HashTable
(
initializer
=
tf
.
contrib
.
lookup
.
KeyValueTensorInitializer
(
keys
=
tf
.
constant
(
list
(
display_name_to_id
.
keys
())),
values
=
tf
.
constant
(
list
(
display_name_to_id
.
values
()),
dtype
=
tf
.
int64
)),
default_value
=-
1
)
self
.
_name_to_id_table
=
name_to_id_table
self
.
_display_name_to_id_table
=
display_name_to_id_table
super
(
_ClassTensorHandler
,
self
).
__init__
(
tensor_key
,
shape_keys
,
shape
,
default_value
)
def
tensors_to_item
(
self
,
keys_to_tensors
):
def
tensors_to_item
(
self
,
keys_to_tensors
):
unmapped_tensor
=
super
(
LookupTensor
,
self
).
tensors_to_item
(
keys_to_tensors
)
unmapped_tensor
=
super
(
_ClassTensorHandler
,
return
self
.
_table
.
lookup
(
unmapped_tensor
)
self
).
tensors_to_item
(
keys_to_tensors
)
return
tf
.
maximum
(
self
.
_name_to_id_table
.
lookup
(
unmapped_tensor
),
self
.
_display_name_to_id_table
.
lookup
(
unmapped_tensor
))
class
BackupHandler
(
slim_example_decoder
.
ItemHandler
):
class
_
BackupHandler
(
slim_example_decoder
.
ItemHandler
):
"""An ItemHandler that tries two ItemHandlers in order."""
"""An ItemHandler that tries two ItemHandlers in order."""
def
__init__
(
self
,
handler
,
backup
):
def
__init__
(
self
,
handler
,
backup
):
...
@@ -92,12 +111,12 @@ class BackupHandler(slim_example_decoder.ItemHandler):
...
@@ -92,12 +111,12 @@ class BackupHandler(slim_example_decoder.ItemHandler):
'Backup handler is of type %s instead of ItemHandler'
%
type
(
backup
))
'Backup handler is of type %s instead of ItemHandler'
%
type
(
backup
))
self
.
_handler
=
handler
self
.
_handler
=
handler
self
.
_backup
=
backup
self
.
_backup
=
backup
super
(
BackupHandler
,
self
).
__init__
(
handler
.
keys
+
backup
.
keys
)
super
(
_
BackupHandler
,
self
).
__init__
(
handler
.
keys
+
backup
.
keys
)
def
tensors_to_item
(
self
,
keys_to_tensors
):
def
tensors_to_item
(
self
,
keys_to_tensors
):
item
=
self
.
_handler
.
tensors_to_item
(
keys_to_tensors
)
item
=
self
.
_handler
.
tensors_to_item
(
keys_to_tensors
)
return
control_flow_ops
.
cond
(
return
tf
.
cond
(
pred
=
math_ops
.
equal
(
math_ops
.
reduce_prod
(
array_ops
.
shape
(
item
)),
0
),
pred
=
tf
.
equal
(
tf
.
reduce_prod
(
tf
.
shape
(
item
)),
0
),
true_fn
=
lambda
:
self
.
_backup
.
tensors_to_item
(
keys_to_tensors
),
true_fn
=
lambda
:
self
.
_backup
.
tensors_to_item
(
keys_to_tensors
),
false_fn
=
lambda
:
item
)
false_fn
=
lambda
:
item
)
...
@@ -140,6 +159,9 @@ class TfExampleDecoder(data_decoder.DataDecoder):
...
@@ -140,6 +159,9 @@ class TfExampleDecoder(data_decoder.DataDecoder):
input_reader_pb2.DEFAULT, input_reader_pb2.NUMERICAL, or
input_reader_pb2.DEFAULT, input_reader_pb2.NUMERICAL, or
input_reader_pb2.PNG_MASKS.
input_reader_pb2.PNG_MASKS.
"""
"""
# TODO(rathodv): delete unused `use_display_name` argument once we change
# other decoders to handle label maps similarly.
del
use_display_name
self
.
keys_to_features
=
{
self
.
keys_to_features
=
{
'image/encoded'
:
'image/encoded'
:
tf
.
FixedLenFeature
((),
tf
.
string
,
default_value
=
''
),
tf
.
FixedLenFeature
((),
tf
.
string
,
default_value
=
''
),
...
@@ -267,27 +289,18 @@ class TfExampleDecoder(data_decoder.DataDecoder):
...
@@ -267,27 +289,18 @@ class TfExampleDecoder(data_decoder.DataDecoder):
else
:
else
:
raise
ValueError
(
'Did not recognize the `instance_mask_type` option.'
)
raise
ValueError
(
'Did not recognize the `instance_mask_type` option.'
)
if
label_map_proto_file
:
if
label_map_proto_file
:
label_map
=
label_map_util
.
get_label_map_dict
(
label_map_proto_file
,
use_display_name
)
# We use a default_value of -1, but we expect all labels to be contained
# in the label map.
table
=
tf
.
contrib
.
lookup
.
HashTable
(
initializer
=
tf
.
contrib
.
lookup
.
KeyValueTensorInitializer
(
keys
=
tf
.
constant
(
list
(
label_map
.
keys
())),
values
=
tf
.
constant
(
list
(
label_map
.
values
()),
dtype
=
tf
.
int64
)),
default_value
=-
1
)
# If the label_map_proto is provided, try to use it in conjunction with
# If the label_map_proto is provided, try to use it in conjunction with
# the class text, and fall back to a materialized ID.
# the class text, and fall back to a materialized ID.
# TODO(lzc): note that here we are using BackupHandler defined in this
label_handler
=
_BackupHandler
(
# file(which is branching slim_example_decoder.BackupHandler). Need to
_ClassTensorHandler
(
# switch back to slim_example_decoder.BackupHandler once tf 1.5 becomes
'image/object/class/text'
,
label_map_proto_file
,
# more popular.
default_value
=
''
),
label_handler
=
BackupHandler
(
LookupTensor
(
'image/object/class/text'
,
table
,
default_value
=
''
),
slim_example_decoder
.
Tensor
(
'image/object/class/label'
))
slim_example_decoder
.
Tensor
(
'image/object/class/label'
))
image_label_handler
=
BackupHandler
(
image_label_handler
=
_BackupHandler
(
LookupTensor
(
_ClassTensorHandler
(
fields
.
TfExampleFields
.
image_class_text
,
table
,
default_value
=
''
),
fields
.
TfExampleFields
.
image_class_text
,
label_map_proto_file
,
default_value
=
''
),
slim_example_decoder
.
Tensor
(
fields
.
TfExampleFields
.
image_class_label
))
slim_example_decoder
.
Tensor
(
fields
.
TfExampleFields
.
image_class_label
))
else
:
else
:
label_handler
=
slim_example_decoder
.
Tensor
(
'image/object/class/label'
)
label_handler
=
slim_example_decoder
.
Tensor
(
'image/object/class/label'
)
...
@@ -309,6 +322,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
...
@@ -309,6 +322,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
A dictionary of the following tensors.
A dictionary of the following tensors.
fields.InputDataFields.image - 3D uint8 tensor of shape [None, None, 3]
fields.InputDataFields.image - 3D uint8 tensor of shape [None, None, 3]
containing image.
containing image.
fields.InputDataFields.original_image_spatial_shape - 1D int32 tensor of
shape [2] containing shape of the image.
fields.InputDataFields.source_id - string tensor containing original
fields.InputDataFields.source_id - string tensor containing original
image id.
image id.
fields.InputDataFields.key - string tensor with unique sha256 hash key.
fields.InputDataFields.key - string tensor with unique sha256 hash key.
...
@@ -352,6 +367,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
...
@@ -352,6 +367,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
is_crowd
=
fields
.
InputDataFields
.
groundtruth_is_crowd
is_crowd
=
fields
.
InputDataFields
.
groundtruth_is_crowd
tensor_dict
[
is_crowd
]
=
tf
.
cast
(
tensor_dict
[
is_crowd
],
dtype
=
tf
.
bool
)
tensor_dict
[
is_crowd
]
=
tf
.
cast
(
tensor_dict
[
is_crowd
],
dtype
=
tf
.
bool
)
tensor_dict
[
fields
.
InputDataFields
.
image
].
set_shape
([
None
,
None
,
3
])
tensor_dict
[
fields
.
InputDataFields
.
image
].
set_shape
([
None
,
None
,
3
])
tensor_dict
[
fields
.
InputDataFields
.
original_image_spatial_shape
]
=
tf
.
shape
(
tensor_dict
[
fields
.
InputDataFields
.
image
])[:
2
]
tensor_dict
[
fields
.
InputDataFields
.
num_groundtruth_boxes
]
=
tf
.
shape
(
tensor_dict
[
fields
.
InputDataFields
.
num_groundtruth_boxes
]
=
tf
.
shape
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
])[
0
]
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
])[
0
]
...
...
research/object_detection/data_decoders/tf_example_decoder_test.py
View file @
27b4acd4
...
@@ -12,24 +12,17 @@
...
@@ -12,24 +12,17 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
# ==============================================================================
# ==============================================================================
"""Tests for object_detection.data_decoders.tf_example_decoder."""
"""Tests for object_detection.data_decoders.tf_example_decoder."""
import
os
import
os
import
numpy
as
np
import
numpy
as
np
import
tensorflow
as
tf
import
tensorflow
as
tf
from
tensorflow.core.example
import
example_pb2
from
tensorflow.core.example
import
feature_pb2
from
tensorflow.python.framework
import
constant_op
from
tensorflow.python.framework
import
dtypes
from
tensorflow.python.framework
import
test_util
from
tensorflow.python.framework
import
test_util
from
tensorflow.python.ops
import
array_ops
from
tensorflow.python.ops
import
lookup_ops
from
tensorflow.python.ops
import
parsing_ops
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.data_decoders
import
tf_example_decoder
from
object_detection.data_decoders
import
tf_example_decoder
from
object_detection.protos
import
input_reader_pb2
from
object_detection.protos
import
input_reader_pb2
from
object_detection.utils
import
dataset_util
slim_example_decoder
=
tf
.
contrib
.
slim
.
tfexample_decoder
slim_example_decoder
=
tf
.
contrib
.
slim
.
tfexample_decoder
...
@@ -56,25 +49,6 @@ class TfExampleDecoderTest(tf.test.TestCase):
...
@@ -56,25 +49,6 @@ class TfExampleDecoderTest(tf.test.TestCase):
raise
ValueError
(
'Invalid encoding type.'
)
raise
ValueError
(
'Invalid encoding type.'
)
return
image_decoded
return
image_decoded
def
_Int64Feature
(
self
,
value
):
return
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
value
))
def
_FloatFeature
(
self
,
value
):
return
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
value
))
def
_BytesFeature
(
self
,
value
):
if
isinstance
(
value
,
list
):
return
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
value
))
return
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
[
value
]))
def
_Int64FeatureFromList
(
self
,
ndarray
):
return
feature_pb2
.
Feature
(
int64_list
=
feature_pb2
.
Int64List
(
value
=
ndarray
.
flatten
().
tolist
()))
def
_BytesFeatureFromList
(
self
,
ndarray
):
values
=
ndarray
.
flatten
().
tolist
()
return
feature_pb2
.
Feature
(
bytes_list
=
feature_pb2
.
BytesList
(
value
=
values
))
def
testDecodeAdditionalChannels
(
self
):
def
testDecodeAdditionalChannels
(
self
):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
...
@@ -88,14 +62,14 @@ class TfExampleDecoderTest(tf.test.TestCase):
...
@@ -88,14 +62,14 @@ class TfExampleDecoderTest(tf.test.TestCase):
features
=
tf
.
train
.
Features
(
features
=
tf
.
train
.
Features
(
feature
=
{
feature
=
{
'image/encoded'
:
'image/encoded'
:
self
.
_B
ytes
F
eature
(
encoded_jpeg
),
dataset_util
.
b
ytes
_f
eature
(
encoded_jpeg
),
'image/additional_channels/encoded'
:
'image/additional_channels/encoded'
:
self
.
_BytesFeatureFromList
(
dataset_util
.
bytes_list_feature
(
np
.
array
(
[
encoded_additional_channel
]
*
2
)
)
,
[
encoded_additional_channel
]
*
2
),
'image/format'
:
'image/format'
:
self
.
_B
ytes
F
eature
(
'jpeg'
),
dataset_util
.
b
ytes
_f
eature
(
'jpeg'
),
'image/source_id'
:
'image/source_id'
:
self
.
_B
ytes
F
eature
(
'image_id'
),
dataset_util
.
b
ytes
_f
eature
(
'image_id'
),
})).
SerializeToString
()
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
(
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
(
...
@@ -108,118 +82,44 @@ class TfExampleDecoderTest(tf.test.TestCase):
...
@@ -108,118 +82,44 @@ class TfExampleDecoderTest(tf.test.TestCase):
np
.
concatenate
([
decoded_additional_channel
]
*
2
,
axis
=
2
),
np
.
concatenate
([
decoded_additional_channel
]
*
2
,
axis
=
2
),
tensor_dict
[
fields
.
InputDataFields
.
image_additional_channels
])
tensor_dict
[
fields
.
InputDataFields
.
image_additional_channels
])
def
testDecodeExampleWithBranchedBackupHandler
(
self
):
example1
=
example_pb2
.
Example
(
features
=
feature_pb2
.
Features
(
feature
=
{
'image/object/class/text'
:
self
.
_BytesFeatureFromList
(
np
.
array
([
'cat'
,
'dog'
,
'guinea pig'
])),
'image/object/class/label'
:
self
.
_Int64FeatureFromList
(
np
.
array
([
42
,
10
,
900
]))
}))
example2
=
example_pb2
.
Example
(
features
=
feature_pb2
.
Features
(
feature
=
{
'image/object/class/text'
:
self
.
_BytesFeatureFromList
(
np
.
array
([
'cat'
,
'dog'
,
'guinea pig'
])),
}))
example3
=
example_pb2
.
Example
(
features
=
feature_pb2
.
Features
(
feature
=
{
'image/object/class/label'
:
self
.
_Int64FeatureFromList
(
np
.
array
([
42
,
10
,
901
]))
}))
# 'dog' -> 0, 'guinea pig' -> 1, 'cat' -> 2
table
=
lookup_ops
.
index_table_from_tensor
(
constant_op
.
constant
([
'dog'
,
'guinea pig'
,
'cat'
]))
keys_to_features
=
{
'image/object/class/text'
:
parsing_ops
.
VarLenFeature
(
dtypes
.
string
),
'image/object/class/label'
:
parsing_ops
.
VarLenFeature
(
dtypes
.
int64
),
}
backup_handler
=
tf_example_decoder
.
BackupHandler
(
handler
=
slim_example_decoder
.
Tensor
(
'image/object/class/label'
),
backup
=
tf_example_decoder
.
LookupTensor
(
'image/object/class/text'
,
table
))
items_to_handlers
=
{
'labels'
:
backup_handler
,
}
decoder
=
slim_example_decoder
.
TFExampleDecoder
(
keys_to_features
,
items_to_handlers
)
obtained_class_ids_each_example
=
[]
with
self
.
test_session
()
as
sess
:
sess
.
run
(
lookup_ops
.
tables_initializer
())
for
example
in
[
example1
,
example2
,
example3
]:
serialized_example
=
array_ops
.
reshape
(
example
.
SerializeToString
(),
shape
=
[])
obtained_class_ids_each_example
.
append
(
decoder
.
decode
(
serialized_example
)[
0
].
eval
())
self
.
assertAllClose
([
42
,
10
,
900
],
obtained_class_ids_each_example
[
0
])
self
.
assertAllClose
([
2
,
0
,
1
],
obtained_class_ids_each_example
[
1
])
self
.
assertAllClose
([
42
,
10
,
901
],
obtained_class_ids_each_example
[
2
])
def
testDecodeExampleWithBranchedLookup
(
self
):
example
=
example_pb2
.
Example
(
features
=
feature_pb2
.
Features
(
feature
=
{
'image/object/class/text'
:
self
.
_BytesFeatureFromList
(
np
.
array
([
'cat'
,
'dog'
,
'guinea pig'
])),
}))
serialized_example
=
example
.
SerializeToString
()
# 'dog' -> 0, 'guinea pig' -> 1, 'cat' -> 2
table
=
lookup_ops
.
index_table_from_tensor
(
constant_op
.
constant
([
'dog'
,
'guinea pig'
,
'cat'
]))
with
self
.
test_session
()
as
sess
:
sess
.
run
(
lookup_ops
.
tables_initializer
())
serialized_example
=
array_ops
.
reshape
(
serialized_example
,
shape
=
[])
keys_to_features
=
{
'image/object/class/text'
:
parsing_ops
.
VarLenFeature
(
dtypes
.
string
),
}
items_to_handlers
=
{
'labels'
:
tf_example_decoder
.
LookupTensor
(
'image/object/class/text'
,
table
),
}
decoder
=
slim_example_decoder
.
TFExampleDecoder
(
keys_to_features
,
items_to_handlers
)
obtained_class_ids
=
decoder
.
decode
(
serialized_example
)[
0
].
eval
()
self
.
assertAllClose
([
2
,
0
,
1
],
obtained_class_ids
)
def
testDecodeJpegImage
(
self
):
def
testDecodeJpegImage
(
self
):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
decoded_jpeg
=
self
.
_DecodeImage
(
encoded_jpeg
)
decoded_jpeg
=
self
.
_DecodeImage
(
encoded_jpeg
)
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
example
=
tf
.
train
.
Example
(
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
features
=
tf
.
train
.
Features
(
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
feature
=
{
'image/source_id'
:
self
.
_BytesFeature
(
'image_id'
),
'image/encoded'
:
dataset_util
.
bytes_feature
(
encoded_jpeg
),
})).
SerializeToString
()
'image/format'
:
dataset_util
.
bytes_feature
(
'jpeg'
),
'image/source_id'
:
dataset_util
.
bytes_feature
(
'image_id'
),
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
image
].
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
image
].
get_shape
().
as_list
()),
[
None
,
None
,
3
])
get_shape
().
as_list
()),
[
None
,
None
,
3
])
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
original_image_spatial_shape
].
get_shape
().
as_list
()),
[
2
])
with
self
.
test_session
()
as
sess
:
with
self
.
test_session
()
as
sess
:
tensor_dict
=
sess
.
run
(
tensor_dict
)
tensor_dict
=
sess
.
run
(
tensor_dict
)
self
.
assertAllEqual
(
decoded_jpeg
,
tensor_dict
[
fields
.
InputDataFields
.
image
])
self
.
assertAllEqual
(
decoded_jpeg
,
tensor_dict
[
fields
.
InputDataFields
.
image
])
self
.
assertAllEqual
([
4
,
5
],
tensor_dict
[
fields
.
InputDataFields
.
original_image_spatial_shape
])
self
.
assertEqual
(
'image_id'
,
tensor_dict
[
fields
.
InputDataFields
.
source_id
])
self
.
assertEqual
(
'image_id'
,
tensor_dict
[
fields
.
InputDataFields
.
source_id
])
def
testDecodeImageKeyAndFilename
(
self
):
def
testDecodeImageKeyAndFilename
(
self
):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
example
=
tf
.
train
.
Example
(
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
features
=
tf
.
train
.
Features
(
'image/key/sha256'
:
self
.
_BytesFeature
(
'abc'
),
feature
=
{
'image/filename'
:
self
.
_BytesFeature
(
'filename'
)
'image/encoded'
:
dataset_util
.
bytes_feature
(
encoded_jpeg
),
})).
SerializeToString
()
'image/key/sha256'
:
dataset_util
.
bytes_feature
(
'abc'
),
'image/filename'
:
dataset_util
.
bytes_feature
(
'filename'
)
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
...
@@ -234,21 +134,28 @@ class TfExampleDecoderTest(tf.test.TestCase):
...
@@ -234,21 +134,28 @@ class TfExampleDecoderTest(tf.test.TestCase):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_png
=
self
.
_EncodeImage
(
image_tensor
,
encoding_type
=
'png'
)
encoded_png
=
self
.
_EncodeImage
(
image_tensor
,
encoding_type
=
'png'
)
decoded_png
=
self
.
_DecodeImage
(
encoded_png
,
encoding_type
=
'png'
)
decoded_png
=
self
.
_DecodeImage
(
encoded_png
,
encoding_type
=
'png'
)
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
example
=
tf
.
train
.
Example
(
'image/encoded'
:
self
.
_BytesFeature
(
encoded_png
),
features
=
tf
.
train
.
Features
(
'image/format'
:
self
.
_BytesFeature
(
'png'
),
feature
=
{
'image/source_id'
:
self
.
_BytesFeature
(
'image_id'
)
'image/encoded'
:
dataset_util
.
bytes_feature
(
encoded_png
),
})).
SerializeToString
()
'image/format'
:
dataset_util
.
bytes_feature
(
'png'
),
'image/source_id'
:
dataset_util
.
bytes_feature
(
'image_id'
)
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
image
].
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
image
].
get_shape
().
as_list
()),
[
None
,
None
,
3
])
get_shape
().
as_list
()),
[
None
,
None
,
3
])
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
original_image_spatial_shape
].
get_shape
().
as_list
()),
[
2
])
with
self
.
test_session
()
as
sess
:
with
self
.
test_session
()
as
sess
:
tensor_dict
=
sess
.
run
(
tensor_dict
)
tensor_dict
=
sess
.
run
(
tensor_dict
)
self
.
assertAllEqual
(
decoded_png
,
tensor_dict
[
fields
.
InputDataFields
.
image
])
self
.
assertAllEqual
(
decoded_png
,
tensor_dict
[
fields
.
InputDataFields
.
image
])
self
.
assertAllEqual
([
4
,
5
],
tensor_dict
[
fields
.
InputDataFields
.
original_image_spatial_shape
])
self
.
assertEqual
(
'image_id'
,
tensor_dict
[
fields
.
InputDataFields
.
source_id
])
self
.
assertEqual
(
'image_id'
,
tensor_dict
[
fields
.
InputDataFields
.
source_id
])
def
testDecodePngInstanceMasks
(
self
):
def
testDecodePngInstanceMasks
(
self
):
...
@@ -265,9 +172,12 @@ class TfExampleDecoderTest(tf.test.TestCase):
...
@@ -265,9 +172,12 @@ class TfExampleDecoderTest(tf.test.TestCase):
example
=
tf
.
train
.
Example
(
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
features
=
tf
.
train
.
Features
(
feature
=
{
feature
=
{
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
'image/encoded'
:
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
dataset_util
.
bytes_feature
(
encoded_jpeg
),
'image/object/mask'
:
self
.
_BytesFeature
(
encoded_masks
)
'image/format'
:
dataset_util
.
bytes_feature
(
'jpeg'
),
'image/object/mask'
:
dataset_util
.
bytes_list_feature
(
encoded_masks
)
})).
SerializeToString
()
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
(
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
(
...
@@ -288,11 +198,16 @@ class TfExampleDecoderTest(tf.test.TestCase):
...
@@ -288,11 +198,16 @@ class TfExampleDecoderTest(tf.test.TestCase):
example
=
tf
.
train
.
Example
(
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
features
=
tf
.
train
.
Features
(
feature
=
{
feature
=
{
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
'image/encoded'
:
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
dataset_util
.
bytes_feature
(
encoded_jpeg
),
'image/object/mask'
:
self
.
_BytesFeature
(
encoded_masks
),
'image/format'
:
'image/height'
:
self
.
_Int64Feature
([
10
]),
dataset_util
.
bytes_feature
(
'jpeg'
),
'image/width'
:
self
.
_Int64Feature
([
10
]),
'image/object/mask'
:
dataset_util
.
bytes_list_feature
(
encoded_masks
),
'image/height'
:
dataset_util
.
int64_feature
(
10
),
'image/width'
:
dataset_util
.
int64_feature
(
10
),
})).
SerializeToString
()
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
(
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
(
...
@@ -312,25 +227,33 @@ class TfExampleDecoderTest(tf.test.TestCase):
...
@@ -312,25 +227,33 @@ class TfExampleDecoderTest(tf.test.TestCase):
bbox_xmins
=
[
1.0
,
5.0
]
bbox_xmins
=
[
1.0
,
5.0
]
bbox_ymaxs
=
[
2.0
,
6.0
]
bbox_ymaxs
=
[
2.0
,
6.0
]
bbox_xmaxs
=
[
3.0
,
7.0
]
bbox_xmaxs
=
[
3.0
,
7.0
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
example
=
tf
.
train
.
Example
(
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
features
=
tf
.
train
.
Features
(
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
feature
=
{
'image/object/bbox/ymin'
:
self
.
_FloatFeature
(
bbox_ymins
),
'image/encoded'
:
'image/object/bbox/xmin'
:
self
.
_FloatFeature
(
bbox_xmins
),
dataset_util
.
bytes_feature
(
encoded_jpeg
),
'image/object/bbox/ymax'
:
self
.
_FloatFeature
(
bbox_ymaxs
),
'image/format'
:
'image/object/bbox/xmax'
:
self
.
_FloatFeature
(
bbox_xmaxs
),
dataset_util
.
bytes_feature
(
'jpeg'
),
})).
SerializeToString
()
'image/object/bbox/ymin'
:
dataset_util
.
float_list_feature
(
bbox_ymins
),
'image/object/bbox/xmin'
:
dataset_util
.
float_list_feature
(
bbox_xmins
),
'image/object/bbox/ymax'
:
dataset_util
.
float_list_feature
(
bbox_ymaxs
),
'image/object/bbox/xmax'
:
dataset_util
.
float_list_feature
(
bbox_xmaxs
),
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
.
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
get_shape
().
as_list
()),
[
None
,
4
])
.
get_shape
().
as_list
()),
[
None
,
4
])
with
self
.
test_session
()
as
sess
:
with
self
.
test_session
()
as
sess
:
tensor_dict
=
sess
.
run
(
tensor_dict
)
tensor_dict
=
sess
.
run
(
tensor_dict
)
expected_boxes
=
np
.
vstack
([
bbox_ymins
,
bbox_xmins
,
expected_boxes
=
np
.
vstack
([
bbox_ymins
,
bbox_xmins
,
bbox_ymaxs
,
bbox_ymaxs
,
bbox_xmaxs
]).
transpose
()
bbox_xmaxs
]).
transpose
()
self
.
assertAllEqual
(
expected_boxes
,
self
.
assertAllEqual
(
expected_boxes
,
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
])
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
])
self
.
assertAllEqual
(
self
.
assertAllEqual
(
...
@@ -346,30 +269,40 @@ class TfExampleDecoderTest(tf.test.TestCase):
...
@@ -346,30 +269,40 @@ class TfExampleDecoderTest(tf.test.TestCase):
bbox_xmaxs
=
[
3.0
,
7.0
]
bbox_xmaxs
=
[
3.0
,
7.0
]
keypoint_ys
=
[
0.0
,
1.0
,
2.0
,
3.0
,
4.0
,
5.0
]
keypoint_ys
=
[
0.0
,
1.0
,
2.0
,
3.0
,
4.0
,
5.0
]
keypoint_xs
=
[
1.0
,
2.0
,
3.0
,
4.0
,
5.0
,
6.0
]
keypoint_xs
=
[
1.0
,
2.0
,
3.0
,
4.0
,
5.0
,
6.0
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
example
=
tf
.
train
.
Example
(
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
features
=
tf
.
train
.
Features
(
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
feature
=
{
'image/object/bbox/ymin'
:
self
.
_FloatFeature
(
bbox_ymins
),
'image/encoded'
:
'image/object/bbox/xmin'
:
self
.
_FloatFeature
(
bbox_xmins
),
dataset_util
.
bytes_feature
(
encoded_jpeg
),
'image/object/bbox/ymax'
:
self
.
_FloatFeature
(
bbox_ymaxs
),
'image/format'
:
'image/object/bbox/xmax'
:
self
.
_FloatFeature
(
bbox_xmaxs
),
dataset_util
.
bytes_feature
(
'jpeg'
),
'image/object/keypoint/y'
:
self
.
_FloatFeature
(
keypoint_ys
),
'image/object/bbox/ymin'
:
'image/object/keypoint/x'
:
self
.
_FloatFeature
(
keypoint_xs
),
dataset_util
.
float_list_feature
(
bbox_ymins
),
})).
SerializeToString
()
'image/object/bbox/xmin'
:
dataset_util
.
float_list_feature
(
bbox_xmins
),
'image/object/bbox/ymax'
:
dataset_util
.
float_list_feature
(
bbox_ymaxs
),
'image/object/bbox/xmax'
:
dataset_util
.
float_list_feature
(
bbox_xmaxs
),
'image/object/keypoint/y'
:
dataset_util
.
float_list_feature
(
keypoint_ys
),
'image/object/keypoint/x'
:
dataset_util
.
float_list_feature
(
keypoint_xs
),
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
(
num_keypoints
=
3
)
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
(
num_keypoints
=
3
)
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
.
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
get_shape
().
as_list
()),
[
None
,
4
])
.
get_shape
().
as_list
()),
[
None
,
4
])
self
.
assertAllEqual
(
(
tensor_dict
[
fields
.
InputDataFields
.
self
.
assertAllEqual
(
groundtruth_keypoints
].
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_keypoints
].
get_shape
()
get_shape
()
.
as_list
()),
[
2
,
3
,
2
])
.
as_list
()),
[
2
,
3
,
2
])
with
self
.
test_session
()
as
sess
:
with
self
.
test_session
()
as
sess
:
tensor_dict
=
sess
.
run
(
tensor_dict
)
tensor_dict
=
sess
.
run
(
tensor_dict
)
expected_boxes
=
np
.
vstack
([
bbox_ymins
,
bbox_xmins
,
expected_boxes
=
np
.
vstack
([
bbox_ymins
,
bbox_xmins
,
bbox_ymaxs
,
bbox_ymaxs
,
bbox_xmaxs
]).
transpose
()
bbox_xmaxs
]).
transpose
()
self
.
assertAllEqual
(
expected_boxes
,
self
.
assertAllEqual
(
expected_boxes
,
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
])
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
])
self
.
assertAllEqual
(
self
.
assertAllEqual
(
...
@@ -377,9 +310,9 @@ class TfExampleDecoderTest(tf.test.TestCase):
...
@@ -377,9 +310,9 @@ class TfExampleDecoderTest(tf.test.TestCase):
expected_keypoints
=
(
expected_keypoints
=
(
np
.
vstack
([
keypoint_ys
,
keypoint_xs
]).
transpose
().
reshape
((
2
,
3
,
2
)))
np
.
vstack
([
keypoint_ys
,
keypoint_xs
]).
transpose
().
reshape
((
2
,
3
,
2
)))
self
.
assertAllEqual
(
expected_keypoints
,
self
.
assertAllEqual
(
tensor_dict
[
expected_keypoints
,
fields
.
InputDataFields
.
groundtruth_keypoints
])
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_keypoints
])
def
testDecodeDefaultGroundtruthWeights
(
self
):
def
testDecodeDefaultGroundtruthWeights
(
self
):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
...
@@ -388,20 +321,28 @@ class TfExampleDecoderTest(tf.test.TestCase):
...
@@ -388,20 +321,28 @@ class TfExampleDecoderTest(tf.test.TestCase):
bbox_xmins
=
[
1.0
,
5.0
]
bbox_xmins
=
[
1.0
,
5.0
]
bbox_ymaxs
=
[
2.0
,
6.0
]
bbox_ymaxs
=
[
2.0
,
6.0
]
bbox_xmaxs
=
[
3.0
,
7.0
]
bbox_xmaxs
=
[
3.0
,
7.0
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
example
=
tf
.
train
.
Example
(
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
features
=
tf
.
train
.
Features
(
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
feature
=
{
'image/object/bbox/ymin'
:
self
.
_FloatFeature
(
bbox_ymins
),
'image/encoded'
:
'image/object/bbox/xmin'
:
self
.
_FloatFeature
(
bbox_xmins
),
dataset_util
.
bytes_feature
(
encoded_jpeg
),
'image/object/bbox/ymax'
:
self
.
_FloatFeature
(
bbox_ymaxs
),
'image/format'
:
'image/object/bbox/xmax'
:
self
.
_FloatFeature
(
bbox_xmaxs
),
dataset_util
.
bytes_feature
(
'jpeg'
),
})).
SerializeToString
()
'image/object/bbox/ymin'
:
dataset_util
.
float_list_feature
(
bbox_ymins
),
'image/object/bbox/xmin'
:
dataset_util
.
float_list_feature
(
bbox_xmins
),
'image/object/bbox/ymax'
:
dataset_util
.
float_list_feature
(
bbox_ymaxs
),
'image/object/bbox/xmax'
:
dataset_util
.
float_list_feature
(
bbox_xmaxs
),
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
.
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
get_shape
().
as_list
()),
[
None
,
4
])
.
get_shape
().
as_list
()),
[
None
,
4
])
with
self
.
test_session
()
as
sess
:
with
self
.
test_session
()
as
sess
:
tensor_dict
=
sess
.
run
(
tensor_dict
)
tensor_dict
=
sess
.
run
(
tensor_dict
)
...
@@ -414,18 +355,22 @@ class TfExampleDecoderTest(tf.test.TestCase):
...
@@ -414,18 +355,22 @@ class TfExampleDecoderTest(tf.test.TestCase):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
bbox_classes
=
[
0
,
1
]
bbox_classes
=
[
0
,
1
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
example
=
tf
.
train
.
Example
(
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
features
=
tf
.
train
.
Features
(
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
feature
=
{
'image/object/class/label'
:
self
.
_Int64Feature
(
bbox_classes
),
'image/encoded'
:
})).
SerializeToString
()
dataset_util
.
bytes_feature
(
encoded_jpeg
),
'image/format'
:
dataset_util
.
bytes_feature
(
'jpeg'
),
'image/object/class/label'
:
dataset_util
.
int64_list_feature
(
bbox_classes
),
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
((
tensor_dict
[
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
]
fields
.
InputDataFields
.
groundtruth_classes
].
get_shape
().
as_list
()),
.
get_shape
().
as_list
()),
[
2
])
[
2
])
with
self
.
test_session
()
as
sess
:
with
self
.
test_session
()
as
sess
:
tensor_dict
=
sess
.
run
(
tensor_dict
)
tensor_dict
=
sess
.
run
(
tensor_dict
)
...
@@ -437,11 +382,16 @@ class TfExampleDecoderTest(tf.test.TestCase):
...
@@ -437,11 +382,16 @@ class TfExampleDecoderTest(tf.test.TestCase):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
bbox_classes
=
[
1
,
2
]
bbox_classes
=
[
1
,
2
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
example
=
tf
.
train
.
Example
(
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
features
=
tf
.
train
.
Features
(
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
feature
=
{
'image/object/class/label'
:
self
.
_Int64Feature
(
bbox_classes
),
'image/encoded'
:
})).
SerializeToString
()
dataset_util
.
bytes_feature
(
encoded_jpeg
),
'image/format'
:
dataset_util
.
bytes_feature
(
'jpeg'
),
'image/object/class/label'
:
dataset_util
.
int64_list_feature
(
bbox_classes
),
})).
SerializeToString
()
label_map_string
=
"""
label_map_string
=
"""
item {
item {
id:1
id:1
...
@@ -460,9 +410,8 @@ class TfExampleDecoderTest(tf.test.TestCase):
...
@@ -460,9 +410,8 @@ class TfExampleDecoderTest(tf.test.TestCase):
label_map_proto_file
=
label_map_path
)
label_map_proto_file
=
label_map_path
)
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
((
tensor_dict
[
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
]
fields
.
InputDataFields
.
groundtruth_classes
].
get_shape
().
as_list
()),
.
get_shape
().
as_list
()),
[
None
])
[
None
])
init
=
tf
.
tables_initializer
()
init
=
tf
.
tables_initializer
()
with
self
.
test_session
()
as
sess
:
with
self
.
test_session
()
as
sess
:
...
@@ -480,11 +429,11 @@ class TfExampleDecoderTest(tf.test.TestCase):
...
@@ -480,11 +429,11 @@ class TfExampleDecoderTest(tf.test.TestCase):
features
=
tf
.
train
.
Features
(
features
=
tf
.
train
.
Features
(
feature
=
{
feature
=
{
'image/encoded'
:
'image/encoded'
:
self
.
_B
ytes
F
eature
(
encoded_jpeg
),
dataset_util
.
b
ytes
_f
eature
(
encoded_jpeg
),
'image/format'
:
'image/format'
:
self
.
_B
ytes
F
eature
(
'jpeg'
),
dataset_util
.
b
ytes
_f
eature
(
'jpeg'
),
'image/object/class/text'
:
'image/object/class/text'
:
self
.
_BytesF
eature
(
bbox_classes_text
),
dataset_util
.
bytes_list_f
eature
(
bbox_classes_text
),
})).
SerializeToString
()
})).
SerializeToString
()
label_map_string
=
"""
label_map_string
=
"""
...
@@ -514,7 +463,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
...
@@ -514,7 +463,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
self
.
assertAllEqual
([
2
,
-
1
],
self
.
assertAllEqual
([
2
,
-
1
],
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
])
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
])
def
testDecodeObjectLabelWithMapping
(
self
):
def
testDecodeObjectLabelWithMapping
WithDisplayName
(
self
):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
bbox_classes_text
=
[
'cat'
,
'dog'
]
bbox_classes_text
=
[
'cat'
,
'dog'
]
...
@@ -522,11 +471,53 @@ class TfExampleDecoderTest(tf.test.TestCase):
...
@@ -522,11 +471,53 @@ class TfExampleDecoderTest(tf.test.TestCase):
features
=
tf
.
train
.
Features
(
features
=
tf
.
train
.
Features
(
feature
=
{
feature
=
{
'image/encoded'
:
'image/encoded'
:
self
.
_B
ytes
F
eature
(
encoded_jpeg
),
dataset_util
.
b
ytes
_f
eature
(
encoded_jpeg
),
'image/format'
:
'image/format'
:
self
.
_B
ytes
F
eature
(
'jpeg'
),
dataset_util
.
b
ytes
_f
eature
(
'jpeg'
),
'image/object/class/text'
:
'image/object/class/text'
:
self
.
_BytesFeature
(
bbox_classes_text
),
dataset_util
.
bytes_list_feature
(
bbox_classes_text
),
})).
SerializeToString
()
label_map_string
=
"""
item {
id:3
display_name:'cat'
}
item {
id:1
display_name:'dog'
}
"""
label_map_path
=
os
.
path
.
join
(
self
.
get_temp_dir
(),
'label_map.pbtxt'
)
with
tf
.
gfile
.
Open
(
label_map_path
,
'wb'
)
as
f
:
f
.
write
(
label_map_string
)
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
(
label_map_proto_file
=
label_map_path
)
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
]
.
get_shape
().
as_list
()),
[
None
])
with
self
.
test_session
()
as
sess
:
sess
.
run
(
tf
.
tables_initializer
())
tensor_dict
=
sess
.
run
(
tensor_dict
)
self
.
assertAllEqual
([
3
,
1
],
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
])
def
testDecodeObjectLabelWithMappingWithName
(
self
):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
bbox_classes_text
=
[
'cat'
,
'dog'
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
dataset_util
.
bytes_feature
(
encoded_jpeg
),
'image/format'
:
dataset_util
.
bytes_feature
(
'jpeg'
),
'image/object/class/text'
:
dataset_util
.
bytes_list_feature
(
bbox_classes_text
),
})).
SerializeToString
()
})).
SerializeToString
()
label_map_string
=
"""
label_map_string
=
"""
...
@@ -561,17 +552,22 @@ class TfExampleDecoderTest(tf.test.TestCase):
...
@@ -561,17 +552,22 @@ class TfExampleDecoderTest(tf.test.TestCase):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
object_area
=
[
100.
,
174.
]
object_area
=
[
100.
,
174.
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
example
=
tf
.
train
.
Example
(
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
features
=
tf
.
train
.
Features
(
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
feature
=
{
'image/object/area'
:
self
.
_FloatFeature
(
object_area
),
'image/encoded'
:
})).
SerializeToString
()
dataset_util
.
bytes_feature
(
encoded_jpeg
),
'image/format'
:
dataset_util
.
bytes_feature
(
'jpeg'
),
'image/object/area'
:
dataset_util
.
float_list_feature
(
object_area
),
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_area
]
.
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_area
]
get_shape
().
as_list
()),
[
2
])
.
get_shape
().
as_list
()),
[
2
])
with
self
.
test_session
()
as
sess
:
with
self
.
test_session
()
as
sess
:
tensor_dict
=
sess
.
run
(
tensor_dict
)
tensor_dict
=
sess
.
run
(
tensor_dict
)
...
@@ -583,67 +579,81 @@ class TfExampleDecoderTest(tf.test.TestCase):
...
@@ -583,67 +579,81 @@ class TfExampleDecoderTest(tf.test.TestCase):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
object_is_crowd
=
[
0
,
1
]
object_is_crowd
=
[
0
,
1
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
example
=
tf
.
train
.
Example
(
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
features
=
tf
.
train
.
Features
(
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
feature
=
{
'image/object/is_crowd'
:
self
.
_Int64Feature
(
object_is_crowd
),
'image/encoded'
:
})).
SerializeToString
()
dataset_util
.
bytes_feature
(
encoded_jpeg
),
'image/format'
:
dataset_util
.
bytes_feature
(
'jpeg'
),
'image/object/is_crowd'
:
dataset_util
.
int64_list_feature
(
object_is_crowd
),
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
(
(
tensor_dict
[
self
.
assertAllEqual
(
fields
.
InputDataFields
.
groundtruth_is_crowd
].
get_shape
()
.
as_list
()),
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_is_crowd
].
get_shape
()
[
2
])
.
as_list
()),
[
2
])
with
self
.
test_session
()
as
sess
:
with
self
.
test_session
()
as
sess
:
tensor_dict
=
sess
.
run
(
tensor_dict
)
tensor_dict
=
sess
.
run
(
tensor_dict
)
self
.
assertAllEqual
(
[
bool
(
item
)
for
item
in
object_is_crowd
],
self
.
assertAllEqual
(
tensor_dict
[
[
bool
(
item
)
for
item
in
object_is_crowd
],
fields
.
InputDataFields
.
groundtruth_is_crowd
])
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_is_crowd
])
@
test_util
.
enable_c_shapes
@
test_util
.
enable_c_shapes
def
testDecodeObjectDifficult
(
self
):
def
testDecodeObjectDifficult
(
self
):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
object_difficult
=
[
0
,
1
]
object_difficult
=
[
0
,
1
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
example
=
tf
.
train
.
Example
(
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
features
=
tf
.
train
.
Features
(
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
feature
=
{
'image/object/difficult'
:
self
.
_Int64Feature
(
object_difficult
),
'image/encoded'
:
})).
SerializeToString
()
dataset_util
.
bytes_feature
(
encoded_jpeg
),
'image/format'
:
dataset_util
.
bytes_feature
(
'jpeg'
),
'image/object/difficult'
:
dataset_util
.
int64_list_feature
(
object_difficult
),
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
(
(
tensor_dict
[
self
.
assertAllEqual
(
fields
.
InputDataFields
.
groundtruth_difficult
].
get_shape
()
.
as_list
()),
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_difficult
].
get_shape
()
[
2
])
.
as_list
()),
[
2
])
with
self
.
test_session
()
as
sess
:
with
self
.
test_session
()
as
sess
:
tensor_dict
=
sess
.
run
(
tensor_dict
)
tensor_dict
=
sess
.
run
(
tensor_dict
)
self
.
assertAllEqual
(
[
bool
(
item
)
for
item
in
object_difficult
],
self
.
assertAllEqual
(
tensor_dict
[
[
bool
(
item
)
for
item
in
object_difficult
],
fields
.
InputDataFields
.
groundtruth_difficult
])
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_difficult
])
@
test_util
.
enable_c_shapes
@
test_util
.
enable_c_shapes
def
testDecodeObjectGroupOf
(
self
):
def
testDecodeObjectGroupOf
(
self
):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
object_group_of
=
[
0
,
1
]
object_group_of
=
[
0
,
1
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
example
=
tf
.
train
.
Example
(
feature
=
{
features
=
tf
.
train
.
Features
(
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
feature
=
{
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
'image/encoded'
:
'image/object/group_of'
:
self
.
_Int64Feature
(
object_group_of
),
dataset_util
.
bytes_feature
(
encoded_jpeg
),
})).
SerializeToString
()
'image/format'
:
dataset_util
.
bytes_feature
(
'jpeg'
),
'image/object/group_of'
:
dataset_util
.
int64_list_feature
(
object_group_of
),
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
(
(
tensor_dict
[
self
.
assertAllEqual
(
fields
.
InputDataFields
.
groundtruth_group_of
].
get_shape
()
.
as_list
()),
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_group_of
].
get_shape
()
[
2
])
.
as_list
()),
[
2
])
with
self
.
test_session
()
as
sess
:
with
self
.
test_session
()
as
sess
:
tensor_dict
=
sess
.
run
(
tensor_dict
)
tensor_dict
=
sess
.
run
(
tensor_dict
)
...
@@ -655,25 +665,27 @@ class TfExampleDecoderTest(tf.test.TestCase):
...
@@ -655,25 +665,27 @@ class TfExampleDecoderTest(tf.test.TestCase):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
object_weights
=
[
0.75
,
1.0
]
object_weights
=
[
0.75
,
1.0
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
example
=
tf
.
train
.
Example
(
feature
=
{
features
=
tf
.
train
.
Features
(
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
feature
=
{
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
'image/encoded'
:
'image/object/weight'
:
self
.
_FloatFeature
(
object_weights
),
dataset_util
.
bytes_feature
(
encoded_jpeg
),
})).
SerializeToString
()
'image/format'
:
dataset_util
.
bytes_feature
(
'jpeg'
),
'image/object/weight'
:
dataset_util
.
float_list_feature
(
object_weights
),
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
((
tensor_dict
[
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_weights
]
fields
.
InputDataFields
.
groundtruth_weights
].
get_shape
().
as_list
()),
.
get_shape
().
as_list
()),
[
None
])
[
None
])
with
self
.
test_session
()
as
sess
:
with
self
.
test_session
()
as
sess
:
tensor_dict
=
sess
.
run
(
tensor_dict
)
tensor_dict
=
sess
.
run
(
tensor_dict
)
self
.
assertAllEqual
(
self
.
assertAllEqual
(
object_weights
,
object_weights
,
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_weights
])
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_weights
])
@
test_util
.
enable_c_shapes
@
test_util
.
enable_c_shapes
def
testDecodeInstanceSegmentation
(
self
):
def
testDecodeInstanceSegmentation
(
self
):
...
@@ -682,15 +694,13 @@ class TfExampleDecoderTest(tf.test.TestCase):
...
@@ -682,15 +694,13 @@ class TfExampleDecoderTest(tf.test.TestCase):
image_width
=
3
image_width
=
3
# Randomly generate image.
# Randomly generate image.
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
image_height
,
image_tensor
=
np
.
random
.
randint
(
image_width
,
256
,
size
=
(
image_height
,
image_width
,
3
)).
astype
(
np
.
uint8
)
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
# Randomly generate instance segmentation masks.
# Randomly generate instance segmentation masks.
instance_masks
=
(
instance_masks
=
(
np
.
random
.
randint
(
2
,
size
=
(
num_instances
,
np
.
random
.
randint
(
2
,
size
=
(
num_instances
,
image_height
,
image_height
,
image_width
)).
astype
(
np
.
float32
))
image_width
)).
astype
(
np
.
float32
))
instance_masks_flattened
=
np
.
reshape
(
instance_masks
,
[
-
1
])
instance_masks_flattened
=
np
.
reshape
(
instance_masks
,
[
-
1
])
...
@@ -698,25 +708,32 @@ class TfExampleDecoderTest(tf.test.TestCase):
...
@@ -698,25 +708,32 @@ class TfExampleDecoderTest(tf.test.TestCase):
object_classes
=
np
.
random
.
randint
(
object_classes
=
np
.
random
.
randint
(
100
,
size
=
(
num_instances
)).
astype
(
np
.
int64
)
100
,
size
=
(
num_instances
)).
astype
(
np
.
int64
)
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
example
=
tf
.
train
.
Example
(
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
features
=
tf
.
train
.
Features
(
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
feature
=
{
'image/height'
:
self
.
_Int64Feature
([
image_height
]),
'image/encoded'
:
'image/width'
:
self
.
_Int64Feature
([
image_width
]),
dataset_util
.
bytes_feature
(
encoded_jpeg
),
'image/object/mask'
:
self
.
_FloatFeature
(
instance_masks_flattened
),
'image/format'
:
'image/object/class/label'
:
self
.
_Int64Feature
(
dataset_util
.
bytes_feature
(
'jpeg'
),
object_classes
)})).
SerializeToString
()
'image/height'
:
dataset_util
.
int64_feature
(
image_height
),
'image/width'
:
dataset_util
.
int64_feature
(
image_width
),
'image/object/mask'
:
dataset_util
.
float_list_feature
(
instance_masks_flattened
),
'image/object/class/label'
:
dataset_util
.
int64_list_feature
(
object_classes
)
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
(
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
(
load_instance_masks
=
True
)
load_instance_masks
=
True
)
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
(
(
self
.
assertAllEqual
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_instance_masks
]
.
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_instance_masks
]
get_shape
().
as_list
()),
[
4
,
5
,
3
])
.
get_shape
().
as_list
()),
[
4
,
5
,
3
])
self
.
assertAllEqual
((
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
]
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
].
.
get_shape
().
as_list
()),
[
4
])
get_shape
().
as_list
()),
[
4
])
with
self
.
test_session
()
as
sess
:
with
self
.
test_session
()
as
sess
:
tensor_dict
=
sess
.
run
(
tensor_dict
)
tensor_dict
=
sess
.
run
(
tensor_dict
)
...
@@ -724,24 +741,21 @@ class TfExampleDecoderTest(tf.test.TestCase):
...
@@ -724,24 +741,21 @@ class TfExampleDecoderTest(tf.test.TestCase):
self
.
assertAllEqual
(
self
.
assertAllEqual
(
instance_masks
.
astype
(
np
.
float32
),
instance_masks
.
astype
(
np
.
float32
),
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_instance_masks
])
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_instance_masks
])
self
.
assertAllEqual
(
self
.
assertAllEqual
(
object_classes
,
object_classes
,
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
])
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
])
def
testInstancesNotAvailableByDefault
(
self
):
def
testInstancesNotAvailableByDefault
(
self
):
num_instances
=
4
num_instances
=
4
image_height
=
5
image_height
=
5
image_width
=
3
image_width
=
3
# Randomly generate image.
# Randomly generate image.
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
image_height
,
image_tensor
=
np
.
random
.
randint
(
image_width
,
256
,
size
=
(
image_height
,
image_width
,
3
)).
astype
(
np
.
uint8
)
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
# Randomly generate instance segmentation masks.
# Randomly generate instance segmentation masks.
instance_masks
=
(
instance_masks
=
(
np
.
random
.
randint
(
2
,
size
=
(
num_instances
,
np
.
random
.
randint
(
2
,
size
=
(
num_instances
,
image_height
,
image_height
,
image_width
)).
astype
(
np
.
float32
))
image_width
)).
astype
(
np
.
float32
))
instance_masks_flattened
=
np
.
reshape
(
instance_masks
,
[
-
1
])
instance_masks_flattened
=
np
.
reshape
(
instance_masks
,
[
-
1
])
...
@@ -749,18 +763,26 @@ class TfExampleDecoderTest(tf.test.TestCase):
...
@@ -749,18 +763,26 @@ class TfExampleDecoderTest(tf.test.TestCase):
object_classes
=
np
.
random
.
randint
(
object_classes
=
np
.
random
.
randint
(
100
,
size
=
(
num_instances
)).
astype
(
np
.
int64
)
100
,
size
=
(
num_instances
)).
astype
(
np
.
int64
)
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
example
=
tf
.
train
.
Example
(
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
features
=
tf
.
train
.
Features
(
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
feature
=
{
'image/height'
:
self
.
_Int64Feature
([
image_height
]),
'image/encoded'
:
'image/width'
:
self
.
_Int64Feature
([
image_width
]),
dataset_util
.
bytes_feature
(
encoded_jpeg
),
'image/object/mask'
:
self
.
_FloatFeature
(
instance_masks_flattened
),
'image/format'
:
'image/object/class/label'
:
self
.
_Int64Feature
(
dataset_util
.
bytes_feature
(
'jpeg'
),
object_classes
)})).
SerializeToString
()
'image/height'
:
dataset_util
.
int64_feature
(
image_height
),
'image/width'
:
dataset_util
.
int64_feature
(
image_width
),
'image/object/mask'
:
dataset_util
.
float_list_feature
(
instance_masks_flattened
),
'image/object/class/label'
:
dataset_util
.
int64_list_feature
(
object_classes
)
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertTrue
(
fields
.
InputDataFields
.
groundtruth_instance_masks
self
.
assertTrue
(
not
in
tensor_dict
)
fields
.
InputDataFields
.
groundtruth_instance_masks
not
in
tensor_dict
)
def
testDecodeImageLabels
(
self
):
def
testDecodeImageLabels
(
self
):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
...
@@ -768,9 +790,9 @@ class TfExampleDecoderTest(tf.test.TestCase):
...
@@ -768,9 +790,9 @@ class TfExampleDecoderTest(tf.test.TestCase):
example
=
tf
.
train
.
Example
(
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
features
=
tf
.
train
.
Features
(
feature
=
{
feature
=
{
'image/encoded'
:
self
.
_B
ytes
F
eature
(
encoded_jpeg
),
'image/encoded'
:
dataset_util
.
b
ytes
_f
eature
(
encoded_jpeg
),
'image/format'
:
self
.
_B
ytes
F
eature
(
'jpeg'
),
'image/format'
:
dataset_util
.
b
ytes
_f
eature
(
'jpeg'
),
'image/class/label'
:
self
.
_Int64F
eature
([
1
,
2
]),
'image/class/label'
:
dataset_util
.
int64_list_f
eature
([
1
,
2
]),
})).
SerializeToString
()
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
...
@@ -784,9 +806,12 @@ class TfExampleDecoderTest(tf.test.TestCase):
...
@@ -784,9 +806,12 @@ class TfExampleDecoderTest(tf.test.TestCase):
example
=
tf
.
train
.
Example
(
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
features
=
tf
.
train
.
Features
(
feature
=
{
feature
=
{
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
'image/encoded'
:
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
dataset_util
.
bytes_feature
(
encoded_jpeg
),
'image/class/text'
:
self
.
_BytesFeature
([
'dog'
,
'cat'
]),
'image/format'
:
dataset_util
.
bytes_feature
(
'jpeg'
),
'image/class/text'
:
dataset_util
.
bytes_list_feature
([
'dog'
,
'cat'
]),
})).
SerializeToString
()
})).
SerializeToString
()
label_map_string
=
"""
label_map_string
=
"""
item {
item {
...
...
research/object_detection/dataset_tools/create_coco_tf_record.py
View file @
27b4acd4
...
@@ -177,8 +177,8 @@ def create_tf_example(image,
...
@@ -177,8 +177,8 @@ def create_tf_example(image,
dataset_util
.
float_list_feature
(
ymin
),
dataset_util
.
float_list_feature
(
ymin
),
'image/object/bbox/ymax'
:
'image/object/bbox/ymax'
:
dataset_util
.
float_list_feature
(
ymax
),
dataset_util
.
float_list_feature
(
ymax
),
'image/object/class/
label
'
:
'image/object/class/
text
'
:
dataset_util
.
int64
_list_feature
(
category_
id
s
),
dataset_util
.
bytes
_list_feature
(
category_
name
s
),
'image/object/is_crowd'
:
'image/object/is_crowd'
:
dataset_util
.
int64_list_feature
(
is_crowd
),
dataset_util
.
int64_list_feature
(
is_crowd
),
'image/object/area'
:
'image/object/area'
:
...
...
research/object_detection/dataset_tools/create_coco_tf_record_test.py
View file @
27b4acd4
...
@@ -106,6 +106,9 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
...
@@ -106,6 +106,9 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
self
.
_assertProtoEqual
(
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/object/bbox/ymax'
].
float_list
.
value
,
example
.
features
.
feature
[
'image/object/bbox/ymax'
].
float_list
.
value
,
[
0.75
])
[
0.75
])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/object/class/text'
].
bytes_list
.
value
,
[
'cat'
])
def
test_create_tf_example_with_instance_masks
(
self
):
def
test_create_tf_example_with_instance_masks
(
self
):
image_file_name
=
'tmp_image.jpg'
image_file_name
=
'tmp_image.jpg'
...
@@ -169,6 +172,9 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
...
@@ -169,6 +172,9 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
self
.
_assertProtoEqual
(
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/object/bbox/ymax'
].
float_list
.
value
,
example
.
features
.
feature
[
'image/object/bbox/ymax'
].
float_list
.
value
,
[
1
])
[
1
])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/object/class/text'
].
bytes_list
.
value
,
[
'dog'
])
encoded_mask_pngs
=
[
encoded_mask_pngs
=
[
io
.
BytesIO
(
encoded_masks
)
for
encoded_masks
in
example
.
features
.
feature
[
io
.
BytesIO
(
encoded_masks
)
for
encoded_masks
in
example
.
features
.
feature
[
'image/object/mask'
].
bytes_list
.
value
'image/object/mask'
].
bytes_list
.
value
...
...
research/object_detection/eval_util.py
View file @
27b4acd4
...
@@ -14,7 +14,6 @@
...
@@ -14,7 +14,6 @@
# ==============================================================================
# ==============================================================================
"""Common utility functions for evaluation."""
"""Common utility functions for evaluation."""
import
collections
import
collections
import
logging
import
os
import
os
import
time
import
time
...
@@ -53,15 +52,15 @@ def write_metrics(metrics, global_step, summary_dir):
...
@@ -53,15 +52,15 @@ def write_metrics(metrics, global_step, summary_dir):
global_step: Global step at which the metrics are computed.
global_step: Global step at which the metrics are computed.
summary_dir: Directory to write tensorflow summaries to.
summary_dir: Directory to write tensorflow summaries to.
"""
"""
logging
.
info
(
'Writing metrics to tf summary.'
)
tf
.
logging
.
info
(
'Writing metrics to tf summary.'
)
summary_writer
=
tf
.
summary
.
FileWriterCache
.
get
(
summary_dir
)
summary_writer
=
tf
.
summary
.
FileWriterCache
.
get
(
summary_dir
)
for
key
in
sorted
(
metrics
):
for
key
in
sorted
(
metrics
):
summary
=
tf
.
Summary
(
value
=
[
summary
=
tf
.
Summary
(
value
=
[
tf
.
Summary
.
Value
(
tag
=
key
,
simple_value
=
metrics
[
key
]),
tf
.
Summary
.
Value
(
tag
=
key
,
simple_value
=
metrics
[
key
]),
])
])
summary_writer
.
add_summary
(
summary
,
global_step
)
summary_writer
.
add_summary
(
summary
,
global_step
)
logging
.
info
(
'%s: %f'
,
key
,
metrics
[
key
])
tf
.
logging
.
info
(
'%s: %f'
,
key
,
metrics
[
key
])
logging
.
info
(
'Metrics written to tf summary.'
)
tf
.
logging
.
info
(
'Metrics written to tf summary.'
)
# TODO(rathodv): Add tests.
# TODO(rathodv): Add tests.
...
@@ -141,7 +140,7 @@ def visualize_detection_results(result_dict,
...
@@ -141,7 +140,7 @@ def visualize_detection_results(result_dict,
if
show_groundtruth
and
input_fields
.
groundtruth_boxes
not
in
result_dict
:
if
show_groundtruth
and
input_fields
.
groundtruth_boxes
not
in
result_dict
:
raise
ValueError
(
'If show_groundtruth is enabled, result_dict must contain '
raise
ValueError
(
'If show_groundtruth is enabled, result_dict must contain '
'groundtruth_boxes.'
)
'groundtruth_boxes.'
)
logging
.
info
(
'Creating detection visualizations.'
)
tf
.
logging
.
info
(
'Creating detection visualizations.'
)
category_index
=
label_map_util
.
create_category_index
(
categories
)
category_index
=
label_map_util
.
create_category_index
(
categories
)
image
=
np
.
squeeze
(
result_dict
[
input_fields
.
original_image
],
axis
=
0
)
image
=
np
.
squeeze
(
result_dict
[
input_fields
.
original_image
],
axis
=
0
)
...
@@ -205,7 +204,8 @@ def visualize_detection_results(result_dict,
...
@@ -205,7 +204,8 @@ def visualize_detection_results(result_dict,
summary_writer
=
tf
.
summary
.
FileWriterCache
.
get
(
summary_dir
)
summary_writer
=
tf
.
summary
.
FileWriterCache
.
get
(
summary_dir
)
summary_writer
.
add_summary
(
summary
,
global_step
)
summary_writer
.
add_summary
(
summary
,
global_step
)
logging
.
info
(
'Detection visualizations written to summary with tag %s.'
,
tag
)
tf
.
logging
.
info
(
'Detection visualizations written to summary with tag %s.'
,
tag
)
def
_run_checkpoint_once
(
tensor_dict
,
def
_run_checkpoint_once
(
tensor_dict
,
...
@@ -218,7 +218,8 @@ def _run_checkpoint_once(tensor_dict,
...
@@ -218,7 +218,8 @@ def _run_checkpoint_once(tensor_dict,
master
=
''
,
master
=
''
,
save_graph
=
False
,
save_graph
=
False
,
save_graph_dir
=
''
,
save_graph_dir
=
''
,
losses_dict
=
None
):
losses_dict
=
None
,
eval_export_path
=
None
):
"""Evaluates metrics defined in evaluators and returns summaries.
"""Evaluates metrics defined in evaluators and returns summaries.
This function loads the latest checkpoint in checkpoint_dirs and evaluates
This function loads the latest checkpoint in checkpoint_dirs and evaluates
...
@@ -258,6 +259,8 @@ def _run_checkpoint_once(tensor_dict,
...
@@ -258,6 +259,8 @@ def _run_checkpoint_once(tensor_dict,
save_graph_dir: where to store the Tensorflow graph on disk. If save_graph
save_graph_dir: where to store the Tensorflow graph on disk. If save_graph
is True this must be non-empty.
is True this must be non-empty.
losses_dict: optional dictionary of scalar detection losses.
losses_dict: optional dictionary of scalar detection losses.
eval_export_path: Path for saving a json file that contains the detection
results in json format.
Returns:
Returns:
global_step: the count of global steps.
global_step: the count of global steps.
...
@@ -292,7 +295,8 @@ def _run_checkpoint_once(tensor_dict,
...
@@ -292,7 +295,8 @@ def _run_checkpoint_once(tensor_dict,
try
:
try
:
for
batch
in
range
(
int
(
num_batches
)):
for
batch
in
range
(
int
(
num_batches
)):
if
(
batch
+
1
)
%
100
==
0
:
if
(
batch
+
1
)
%
100
==
0
:
logging
.
info
(
'Running eval ops batch %d/%d'
,
batch
+
1
,
num_batches
)
tf
.
logging
.
info
(
'Running eval ops batch %d/%d'
,
batch
+
1
,
num_batches
)
if
not
batch_processor
:
if
not
batch_processor
:
try
:
try
:
if
not
losses_dict
:
if
not
losses_dict
:
...
@@ -301,7 +305,7 @@ def _run_checkpoint_once(tensor_dict,
...
@@ -301,7 +305,7 @@ def _run_checkpoint_once(tensor_dict,
losses_dict
])
losses_dict
])
counters
[
'success'
]
+=
1
counters
[
'success'
]
+=
1
except
tf
.
errors
.
InvalidArgumentError
:
except
tf
.
errors
.
InvalidArgumentError
:
logging
.
info
(
'Skipping image'
)
tf
.
logging
.
info
(
'Skipping image'
)
counters
[
'skipped'
]
+=
1
counters
[
'skipped'
]
+=
1
result_dict
=
{}
result_dict
=
{}
else
:
else
:
...
@@ -316,18 +320,31 @@ def _run_checkpoint_once(tensor_dict,
...
@@ -316,18 +320,31 @@ def _run_checkpoint_once(tensor_dict,
# decoders to return correct image_id.
# decoders to return correct image_id.
# TODO(akuznetsa): result_dict contains batches of images, while
# TODO(akuznetsa): result_dict contains batches of images, while
# add_single_ground_truth_image_info expects a single image. Fix
# add_single_ground_truth_image_info expects a single image. Fix
if
(
isinstance
(
result_dict
,
dict
)
and
result_dict
[
fields
.
InputDataFields
.
key
]):
image_id
=
result_dict
[
fields
.
InputDataFields
.
key
]
else
:
image_id
=
batch
evaluator
.
add_single_ground_truth_image_info
(
evaluator
.
add_single_ground_truth_image_info
(
image_id
=
batch
,
groundtruth_dict
=
result_dict
)
image_id
=
image_id
,
groundtruth_dict
=
result_dict
)
evaluator
.
add_single_detected_image_info
(
evaluator
.
add_single_detected_image_info
(
image_id
=
batch
,
detections_dict
=
result_dict
)
image_id
=
image_id
,
detections_dict
=
result_dict
)
logging
.
info
(
'Running eval batches done.'
)
tf
.
logging
.
info
(
'Running eval batches done.'
)
except
tf
.
errors
.
OutOfRangeError
:
except
tf
.
errors
.
OutOfRangeError
:
logging
.
info
(
'Done evaluating -- epoch limit reached'
)
tf
.
logging
.
info
(
'Done evaluating -- epoch limit reached'
)
finally
:
finally
:
# When done, ask the threads to stop.
# When done, ask the threads to stop.
logging
.
info
(
'# success: %d'
,
counters
[
'success'
])
tf
.
logging
.
info
(
'# success: %d'
,
counters
[
'success'
])
logging
.
info
(
'# skipped: %d'
,
counters
[
'skipped'
])
tf
.
logging
.
info
(
'# skipped: %d'
,
counters
[
'skipped'
])
all_evaluator_metrics
=
{}
all_evaluator_metrics
=
{}
if
eval_export_path
and
eval_export_path
is
not
None
:
for
evaluator
in
evaluators
:
if
(
isinstance
(
evaluator
,
coco_evaluation
.
CocoDetectionEvaluator
)
or
isinstance
(
evaluator
,
coco_evaluation
.
CocoMaskEvaluator
)):
tf
.
logging
.
info
(
'Started dumping to json file.'
)
evaluator
.
dump_detections_to_json_file
(
json_output_path
=
eval_export_path
)
tf
.
logging
.
info
(
'Finished dumping to json file.'
)
for
evaluator
in
evaluators
:
for
evaluator
in
evaluators
:
metrics
=
evaluator
.
evaluate
()
metrics
=
evaluator
.
evaluate
()
evaluator
.
clear
()
evaluator
.
clear
()
...
@@ -356,7 +373,8 @@ def repeated_checkpoint_run(tensor_dict,
...
@@ -356,7 +373,8 @@ def repeated_checkpoint_run(tensor_dict,
master
=
''
,
master
=
''
,
save_graph
=
False
,
save_graph
=
False
,
save_graph_dir
=
''
,
save_graph_dir
=
''
,
losses_dict
=
None
):
losses_dict
=
None
,
eval_export_path
=
None
):
"""Periodically evaluates desired tensors using checkpoint_dirs or restore_fn.
"""Periodically evaluates desired tensors using checkpoint_dirs or restore_fn.
This function repeatedly loads a checkpoint and evaluates a desired
This function repeatedly loads a checkpoint and evaluates a desired
...
@@ -397,6 +415,8 @@ def repeated_checkpoint_run(tensor_dict,
...
@@ -397,6 +415,8 @@ def repeated_checkpoint_run(tensor_dict,
save_graph_dir: where to save on disk the Tensorflow graph. If store_graph
save_graph_dir: where to save on disk the Tensorflow graph. If store_graph
is True this must be non-empty.
is True this must be non-empty.
losses_dict: optional dictionary of scalar detection losses.
losses_dict: optional dictionary of scalar detection losses.
eval_export_path: Path for saving a json file that contains the detection
results in json format.
Returns:
Returns:
metrics: A dictionary containing metric names and values in the latest
metrics: A dictionary containing metric names and values in the latest
...
@@ -417,31 +437,36 @@ def repeated_checkpoint_run(tensor_dict,
...
@@ -417,31 +437,36 @@ def repeated_checkpoint_run(tensor_dict,
number_of_evaluations
=
0
number_of_evaluations
=
0
while
True
:
while
True
:
start
=
time
.
time
()
start
=
time
.
time
()
logging
.
info
(
'Starting evaluation at '
+
time
.
strftime
(
tf
.
logging
.
info
(
'Starting evaluation at '
+
time
.
strftime
(
'%Y-%m-%d-%H:%M:%S'
,
time
.
gmtime
()))
'%Y-%m-%d-%H:%M:%S'
,
time
.
gmtime
()))
model_path
=
tf
.
train
.
latest_checkpoint
(
checkpoint_dirs
[
0
])
model_path
=
tf
.
train
.
latest_checkpoint
(
checkpoint_dirs
[
0
])
if
not
model_path
:
if
not
model_path
:
logging
.
info
(
'No model found in %s. Will try again in %d seconds'
,
tf
.
logging
.
info
(
'No model found in %s. Will try again in %d seconds'
,
checkpoint_dirs
[
0
],
eval_interval_secs
)
checkpoint_dirs
[
0
],
eval_interval_secs
)
elif
model_path
==
last_evaluated_model_path
:
elif
model_path
==
last_evaluated_model_path
:
logging
.
info
(
'Found already evaluated checkpoint. Will try again in
%d
'
tf
.
logging
.
info
(
'Found already evaluated checkpoint. Will try again in '
'
seconds'
,
eval_interval_secs
)
'%d
seconds'
,
eval_interval_secs
)
else
:
else
:
last_evaluated_model_path
=
model_path
last_evaluated_model_path
=
model_path
global_step
,
metrics
=
_run_checkpoint_once
(
tensor_dict
,
evaluators
,
global_step
,
metrics
=
_run_checkpoint_once
(
batch_processor
,
tensor_dict
,
checkpoint_dirs
,
evaluators
,
variables_to_restore
,
batch_processor
,
restore_fn
,
num_batches
,
checkpoint_dirs
,
master
,
save_graph
,
variables_to_restore
,
save_graph_dir
,
restore_fn
,
losses_dict
=
losses_dict
)
num_batches
,
master
,
save_graph
,
save_graph_dir
,
losses_dict
=
losses_dict
,
eval_export_path
=
eval_export_path
)
write_metrics
(
metrics
,
global_step
,
summary_dir
)
write_metrics
(
metrics
,
global_step
,
summary_dir
)
number_of_evaluations
+=
1
number_of_evaluations
+=
1
if
(
max_number_of_evaluations
and
if
(
max_number_of_evaluations
and
number_of_evaluations
>=
max_number_of_evaluations
):
number_of_evaluations
>=
max_number_of_evaluations
):
logging
.
info
(
'Finished evaluation!'
)
tf
.
logging
.
info
(
'Finished evaluation!'
)
break
break
time_to_next_eval
=
start
+
eval_interval_secs
-
time
.
time
()
time_to_next_eval
=
start
+
eval_interval_secs
-
time
.
time
()
if
time_to_next_eval
>
0
:
if
time_to_next_eval
>
0
:
...
@@ -680,4 +705,3 @@ def evaluator_options_from_eval_config(eval_config):
...
@@ -680,4 +705,3 @@ def evaluator_options_from_eval_config(eval_config):
eval_config
.
include_metrics_per_category
)
eval_config
.
include_metrics_per_category
)
}
}
return
evaluator_options
return
evaluator_options
research/object_detection/g3doc/detection_model_zoo.md
View file @
27b4acd4
...
@@ -2,13 +2,12 @@
...
@@ -2,13 +2,12 @@
We provide a collection of detection models pre-trained on the
[
COCO
We provide a collection of detection models pre-trained on the
[
COCO
dataset
](
http://mscoco.org
)
, the
[
Kitti dataset
](
http://www.cvlibs.net/datasets/kitti/
)
,
dataset
](
http://mscoco.org
)
, the
[
Kitti dataset
](
http://www.cvlibs.net/datasets/kitti/
)
,
the
[
Open Images dataset
](
https://github.com/openimages/dataset
)
and the
the
[
Open Images dataset
](
https://github.com/openimages/dataset
)
, the
[
AVA v2.1 dataset
](
https://research.google.com/ava/
)
. These models can
[
AVA v2.1 dataset
](
https://research.google.com/ava/
)
and the
be useful for
[
iNaturalist Species Detection Dataset
](
https://github.com/visipedia/inat_comp/blob/master/2017/README.md#bounding-boxes
)
.
out-of-the-box inference if you are interested in categories already in COCO
These models can be useful for out-of-the-box inference if you are interested in
(e.g., humans, cars, etc) or in Open Images (e.g.,
categories already in those datasets. They are also useful for initializing your
surfboard, jacuzzi, etc). They are also useful for initializing your models when
models when training on novel datasets.
training on novel datasets.
In the table below, we list each such pre-trained model including:
In the table below, we list each such pre-trained model including:
...
@@ -113,6 +112,13 @@ Model name
...
@@ -113,6 +112,13 @@ Model name
[
faster_rcnn_inception_resnet_v2_atrous_oid
](
http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_oid_2018_01_28.tar.gz
)
| 727 | 37 | Boxes
[
faster_rcnn_inception_resnet_v2_atrous_oid
](
http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_oid_2018_01_28.tar.gz
)
| 727 | 37 | Boxes
[
faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid
](
http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid_2018_01_28.tar.gz
)
| 347 | | Boxes
[
faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid
](
http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid_2018_01_28.tar.gz
)
| 347 | | Boxes
## iNaturalist Species-trained models
Model name | Speed (ms) | Pascal mAP@0.5 | Outputs
----------------------------------------------------------------------------------------------------------------------------------------------------------------- | :---: | :-------------: | :-----:
[
faster_rcnn_resnet101_fgvc
](
http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_fgvc_2018_07_19.tar.gz
)
| 395 | 58 | Boxes
[
faster_rcnn_resnet50_fgvc
](
http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet50_fgvc_2018_07_19.tar.gz
)
| 366 | 55 | Boxes
## AVA v2.1 trained models
## AVA v2.1 trained models
...
...
research/object_detection/g3doc/running_locally.md
View file @
27b4acd4
...
@@ -37,12 +37,12 @@ A local training job can be run with the following command:
...
@@ -37,12 +37,12 @@ A local training job can be run with the following command:
PIPELINE_CONFIG_PATH
={
path to pipeline config file
}
PIPELINE_CONFIG_PATH
={
path to pipeline config file
}
MODEL_DIR
={
path to model directory
}
MODEL_DIR
={
path to model directory
}
NUM_TRAIN_STEPS
=
50000
NUM_TRAIN_STEPS
=
50000
NUM_EVAL_STEPS
=
2000
SAMPLE_1_OF_N_EVAL_EXAMPLES
=
1
python object_detection/model_main.py
\
python object_detection/model_main.py
\
--pipeline_config_path
=
${
PIPELINE_CONFIG_PATH
}
\
--pipeline_config_path
=
${
PIPELINE_CONFIG_PATH
}
\
--model_dir
=
${
MODEL_DIR
}
\
--model_dir
=
${
MODEL_DIR
}
\
--num_train_steps
=
${
NUM_TRAIN_STEPS
}
\
--num_train_steps
=
${
NUM_TRAIN_STEPS
}
\
--
num_eval_steps
=
${
NUM_EVAL_STEPS
}
\
--
sample_1_of_n_eval_examples
=
$SAMPLE_1_OF_N_EVAL_EXAMPLES
\
--alsologtostderr
--alsologtostderr
```
```
...
...
research/object_detection/g3doc/running_pets.md
View file @
27b4acd4
...
@@ -216,7 +216,7 @@ To start training and evaluation, execute the following command from the
...
@@ -216,7 +216,7 @@ To start training and evaluation, execute the following command from the
```
bash
```
bash
# From tensorflow/models/research/
# From tensorflow/models/research/
gcloud ml-engine
jobs
submit training
`
whoami
`
_object_detection_pets_
`
date
+%m_%d_%Y_%H_%M_%S
`
\
gcloud ml-engine
jobs
submit training
`
whoami
`
_object_detection_pets_
`
date
+%m_%d_%Y_%H_%M_%S
`
\
--runtime-version
1.
9
\
--runtime-version
1.
8
\
--job-dir
=
gs://
${
YOUR_GCS_BUCKET
}
/model_dir
\
--job-dir
=
gs://
${
YOUR_GCS_BUCKET
}
/model_dir
\
--packages
dist/object_detection-0.1.tar.gz,slim/dist/slim-0.1.tar.gz,/tmp/pycocotools/pycocotools-2.0.tar.gz
\
--packages
dist/object_detection-0.1.tar.gz,slim/dist/slim-0.1.tar.gz,/tmp/pycocotools/pycocotools-2.0.tar.gz
\
--module-name
object_detection.model_main
\
--module-name
object_detection.model_main
\
...
...
research/object_detection/inputs.py
View file @
27b4acd4
...
@@ -52,7 +52,8 @@ def transform_input_data(tensor_dict,
...
@@ -52,7 +52,8 @@ def transform_input_data(tensor_dict,
num_classes
,
num_classes
,
data_augmentation_fn
=
None
,
data_augmentation_fn
=
None
,
merge_multiple_boxes
=
False
,
merge_multiple_boxes
=
False
,
retain_original_image
=
False
):
retain_original_image
=
False
,
use_bfloat16
=
False
):
"""A single function that is responsible for all input data transformations.
"""A single function that is responsible for all input data transformations.
Data transformation functions are applied in the following order.
Data transformation functions are applied in the following order.
...
@@ -86,6 +87,7 @@ def transform_input_data(tensor_dict,
...
@@ -86,6 +87,7 @@ def transform_input_data(tensor_dict,
and classes for a given image if the boxes are exactly the same.
and classes for a given image if the boxes are exactly the same.
retain_original_image: (optional) whether to retain original image in the
retain_original_image: (optional) whether to retain original image in the
output dictionary.
output dictionary.
use_bfloat16: (optional) a bool, whether to use bfloat16 in training.
Returns:
Returns:
A dictionary keyed by fields.InputDataFields containing the tensors obtained
A dictionary keyed by fields.InputDataFields containing the tensors obtained
...
@@ -101,7 +103,8 @@ def transform_input_data(tensor_dict,
...
@@ -101,7 +103,8 @@ def transform_input_data(tensor_dict,
if
retain_original_image
:
if
retain_original_image
:
tensor_dict
[
fields
.
InputDataFields
.
original_image
]
=
tf
.
cast
(
tensor_dict
[
fields
.
InputDataFields
.
original_image
]
=
tf
.
cast
(
tensor_dict
[
fields
.
InputDataFields
.
image
],
tf
.
uint8
)
image_resizer_fn
(
tensor_dict
[
fields
.
InputDataFields
.
image
],
None
)[
0
],
tf
.
uint8
)
# Apply data augmentation ops.
# Apply data augmentation ops.
if
data_augmentation_fn
is
not
None
:
if
data_augmentation_fn
is
not
None
:
...
@@ -111,6 +114,9 @@ def transform_input_data(tensor_dict,
...
@@ -111,6 +114,9 @@ def transform_input_data(tensor_dict,
image
=
tensor_dict
[
fields
.
InputDataFields
.
image
]
image
=
tensor_dict
[
fields
.
InputDataFields
.
image
]
preprocessed_resized_image
,
true_image_shape
=
model_preprocess_fn
(
preprocessed_resized_image
,
true_image_shape
=
model_preprocess_fn
(
tf
.
expand_dims
(
tf
.
to_float
(
image
),
axis
=
0
))
tf
.
expand_dims
(
tf
.
to_float
(
image
),
axis
=
0
))
if
use_bfloat16
:
preprocessed_resized_image
=
tf
.
cast
(
preprocessed_resized_image
,
tf
.
bfloat16
)
tensor_dict
[
fields
.
InputDataFields
.
image
]
=
tf
.
squeeze
(
tensor_dict
[
fields
.
InputDataFields
.
image
]
=
tf
.
squeeze
(
preprocessed_resized_image
,
axis
=
0
)
preprocessed_resized_image
,
axis
=
0
)
tensor_dict
[
fields
.
InputDataFields
.
true_image_shape
]
=
tf
.
squeeze
(
tensor_dict
[
fields
.
InputDataFields
.
true_image_shape
]
=
tf
.
squeeze
(
...
@@ -128,13 +134,33 @@ def transform_input_data(tensor_dict,
...
@@ -128,13 +134,33 @@ def transform_input_data(tensor_dict,
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
]
=
tf
.
one_hot
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
]
=
tf
.
one_hot
(
zero_indexed_groundtruth_classes
,
num_classes
)
zero_indexed_groundtruth_classes
,
num_classes
)
if
fields
.
InputDataFields
.
groundtruth_confidences
in
tensor_dict
:
groundtruth_confidences
=
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_confidences
]
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_confidences
]
=
(
tf
.
sparse_to_dense
(
zero_indexed_groundtruth_classes
,
[
num_classes
],
groundtruth_confidences
,
validate_indices
=
False
))
else
:
groundtruth_confidences
=
tf
.
ones_like
(
zero_indexed_groundtruth_classes
,
dtype
=
tf
.
float32
)
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_confidences
]
=
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
])
if
merge_multiple_boxes
:
if
merge_multiple_boxes
:
merged_boxes
,
merged_classes
,
_
=
util_ops
.
merge_boxes_with_multiple_labels
(
merged_boxes
,
merged_classes
,
merged_confidences
,
_
=
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
],
util_ops
.
merge_boxes_with_multiple_labels
(
zero_indexed_groundtruth_classes
,
num_classes
)
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
],
zero_indexed_groundtruth_classes
,
groundtruth_confidences
,
num_classes
))
merged_classes
=
tf
.
cast
(
merged_classes
,
tf
.
float32
)
merged_classes
=
tf
.
cast
(
merged_classes
,
tf
.
float32
)
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
=
merged_boxes
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
=
merged_boxes
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
]
=
merged_classes
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
]
=
merged_classes
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_confidences
]
=
(
merged_confidences
)
return
tensor_dict
return
tensor_dict
...
@@ -174,6 +200,7 @@ def pad_input_data_to_static_shapes(tensor_dict, max_num_boxes, num_classes,
...
@@ -174,6 +200,7 @@ def pad_input_data_to_static_shapes(tensor_dict, max_num_boxes, num_classes,
fields
.
InputDataFields
.
image
:
[
fields
.
InputDataFields
.
image
:
[
height
,
width
,
3
+
num_additional_channels
height
,
width
,
3
+
num_additional_channels
],
],
fields
.
InputDataFields
.
original_image_spatial_shape
:
[
2
],
fields
.
InputDataFields
.
image_additional_channels
:
[
fields
.
InputDataFields
.
image_additional_channels
:
[
height
,
width
,
num_additional_channels
height
,
width
,
num_additional_channels
],
],
...
@@ -183,6 +210,8 @@ def pad_input_data_to_static_shapes(tensor_dict, max_num_boxes, num_classes,
...
@@ -183,6 +210,8 @@ def pad_input_data_to_static_shapes(tensor_dict, max_num_boxes, num_classes,
fields
.
InputDataFields
.
groundtruth_difficult
:
[
max_num_boxes
],
fields
.
InputDataFields
.
groundtruth_difficult
:
[
max_num_boxes
],
fields
.
InputDataFields
.
groundtruth_boxes
:
[
max_num_boxes
,
4
],
fields
.
InputDataFields
.
groundtruth_boxes
:
[
max_num_boxes
,
4
],
fields
.
InputDataFields
.
groundtruth_classes
:
[
max_num_boxes
,
num_classes
],
fields
.
InputDataFields
.
groundtruth_classes
:
[
max_num_boxes
,
num_classes
],
fields
.
InputDataFields
.
groundtruth_confidences
:
[
max_num_boxes
,
num_classes
],
fields
.
InputDataFields
.
groundtruth_instance_masks
:
[
fields
.
InputDataFields
.
groundtruth_instance_masks
:
[
max_num_boxes
,
height
,
width
max_num_boxes
,
height
,
width
],
],
...
@@ -198,11 +227,12 @@ def pad_input_data_to_static_shapes(tensor_dict, max_num_boxes, num_classes,
...
@@ -198,11 +227,12 @@ def pad_input_data_to_static_shapes(tensor_dict, max_num_boxes, num_classes,
max_num_boxes
,
num_classes
+
1
if
num_classes
is
not
None
else
None
max_num_boxes
,
num_classes
+
1
if
num_classes
is
not
None
else
None
],
],
fields
.
InputDataFields
.
groundtruth_image_classes
:
[
num_classes
],
fields
.
InputDataFields
.
groundtruth_image_classes
:
[
num_classes
],
fields
.
InputDataFields
.
groundtruth_image_confidences
:
[
num_classes
],
}
}
if
fields
.
InputDataFields
.
original_image
in
tensor_dict
:
if
fields
.
InputDataFields
.
original_image
in
tensor_dict
:
padding_shapes
[
fields
.
InputDataFields
.
original_image
]
=
[
padding_shapes
[
fields
.
InputDataFields
.
original_image
]
=
[
None
,
None
,
3
+
num_additional_channels
height
,
width
,
3
+
num_additional_channels
]
]
if
fields
.
InputDataFields
.
groundtruth_keypoints
in
tensor_dict
:
if
fields
.
InputDataFields
.
groundtruth_keypoints
in
tensor_dict
:
tensor_shape
=
(
tensor_shape
=
(
...
@@ -252,9 +282,12 @@ def augment_input_data(tensor_dict, data_augmentation_options):
...
@@ -252,9 +282,12 @@ def augment_input_data(tensor_dict, data_augmentation_options):
in
tensor_dict
)
in
tensor_dict
)
include_keypoints
=
(
fields
.
InputDataFields
.
groundtruth_keypoints
include_keypoints
=
(
fields
.
InputDataFields
.
groundtruth_keypoints
in
tensor_dict
)
in
tensor_dict
)
include_label_scores
=
(
fields
.
InputDataFields
.
groundtruth_confidences
in
tensor_dict
)
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
data_augmentation_options
,
tensor_dict
,
data_augmentation_options
,
func_arg_map
=
preprocessor
.
get_default_func_arg_map
(
func_arg_map
=
preprocessor
.
get_default_func_arg_map
(
include_label_scores
=
include_label_scores
,
include_instance_masks
=
include_instance_masks
,
include_instance_masks
=
include_instance_masks
,
include_keypoints
=
include_keypoints
))
include_keypoints
=
include_keypoints
))
tensor_dict
[
fields
.
InputDataFields
.
image
]
=
tf
.
squeeze
(
tensor_dict
[
fields
.
InputDataFields
.
image
]
=
tf
.
squeeze
(
...
@@ -275,6 +308,7 @@ def _get_labels_dict(input_dict):
...
@@ -275,6 +308,7 @@ def _get_labels_dict(input_dict):
labels_dict
[
key
]
=
input_dict
[
key
]
labels_dict
[
key
]
=
input_dict
[
key
]
optional_label_keys
=
[
optional_label_keys
=
[
fields
.
InputDataFields
.
groundtruth_confidences
,
fields
.
InputDataFields
.
groundtruth_keypoints
,
fields
.
InputDataFields
.
groundtruth_keypoints
,
fields
.
InputDataFields
.
groundtruth_instance_masks
,
fields
.
InputDataFields
.
groundtruth_instance_masks
,
fields
.
InputDataFields
.
groundtruth_area
,
fields
.
InputDataFields
.
groundtruth_area
,
...
@@ -291,16 +325,50 @@ def _get_labels_dict(input_dict):
...
@@ -291,16 +325,50 @@ def _get_labels_dict(input_dict):
return
labels_dict
return
labels_dict
def
_replace_empty_string_with_random_number
(
string_tensor
):
"""Returns string unchanged if non-empty, and random string tensor otherwise.
The random string is an integer 0 and 2**63 - 1, casted as string.
Args:
string_tensor: A tf.tensor of dtype string.
Returns:
out_string: A tf.tensor of dtype string. If string_tensor contains the empty
string, out_string will contain a random integer casted to a string.
Otherwise string_tensor is returned unchanged.
"""
empty_string
=
tf
.
constant
(
''
,
dtype
=
tf
.
string
,
name
=
'EmptyString'
)
random_source_id
=
tf
.
as_string
(
tf
.
random_uniform
(
shape
=
[],
maxval
=
2
**
63
-
1
,
dtype
=
tf
.
int64
))
out_string
=
tf
.
cond
(
tf
.
equal
(
string_tensor
,
empty_string
),
true_fn
=
lambda
:
random_source_id
,
false_fn
=
lambda
:
string_tensor
)
return
out_string
def
_get_features_dict
(
input_dict
):
def
_get_features_dict
(
input_dict
):
"""Extracts features dict from input dict."""
"""Extracts features dict from input dict."""
hash_from_source_id
=
tf
.
string_to_hash_bucket_fast
(
input_dict
[
fields
.
InputDataFields
.
source_id
],
HASH_BINS
)
source_id
=
_replace_empty_string_with_random_number
(
input_dict
[
fields
.
InputDataFields
.
source_id
])
hash_from_source_id
=
tf
.
string_to_hash_bucket_fast
(
source_id
,
HASH_BINS
)
features
=
{
features
=
{
fields
.
InputDataFields
.
image
:
fields
.
InputDataFields
.
image
:
input_dict
[
fields
.
InputDataFields
.
image
],
input_dict
[
fields
.
InputDataFields
.
image
],
HASH_KEY
:
tf
.
cast
(
hash_from_source_id
,
tf
.
int32
),
HASH_KEY
:
tf
.
cast
(
hash_from_source_id
,
tf
.
int32
),
fields
.
InputDataFields
.
true_image_shape
:
fields
.
InputDataFields
.
true_image_shape
:
input_dict
[
fields
.
InputDataFields
.
true_image_shape
]
input_dict
[
fields
.
InputDataFields
.
true_image_shape
],
fields
.
InputDataFields
.
original_image_spatial_shape
:
input_dict
[
fields
.
InputDataFields
.
original_image_spatial_shape
]
}
}
if
fields
.
InputDataFields
.
original_image
in
input_dict
:
if
fields
.
InputDataFields
.
original_image
in
input_dict
:
features
[
fields
.
InputDataFields
.
original_image
]
=
input_dict
[
features
[
fields
.
InputDataFields
.
original_image
]
=
input_dict
[
...
@@ -392,7 +460,8 @@ def create_train_input_fn(train_config, train_input_config,
...
@@ -392,7 +460,8 @@ def create_train_input_fn(train_config, train_input_config,
num_classes
=
config_util
.
get_number_of_classes
(
model_config
),
num_classes
=
config_util
.
get_number_of_classes
(
model_config
),
data_augmentation_fn
=
data_augmentation_fn
,
data_augmentation_fn
=
data_augmentation_fn
,
merge_multiple_boxes
=
train_config
.
merge_multiple_label_boxes
,
merge_multiple_boxes
=
train_config
.
merge_multiple_label_boxes
,
retain_original_image
=
train_config
.
retain_original_images
)
retain_original_image
=
train_config
.
retain_original_images
,
use_bfloat16
=
train_config
.
use_bfloat16
)
tensor_dict
=
pad_input_data_to_static_shapes
(
tensor_dict
=
pad_input_data_to_static_shapes
(
tensor_dict
=
transform_data_fn
(
tensor_dict
),
tensor_dict
=
transform_data_fn
(
tensor_dict
),
...
@@ -414,8 +483,6 @@ def create_train_input_fn(train_config, train_input_config,
...
@@ -414,8 +483,6 @@ def create_train_input_fn(train_config, train_input_config,
def
create_eval_input_fn
(
eval_config
,
eval_input_config
,
model_config
):
def
create_eval_input_fn
(
eval_config
,
eval_input_config
,
model_config
):
"""Creates an eval `input` function for `Estimator`.
"""Creates an eval `input` function for `Estimator`.
# TODO(ronnyvotel,rathodv): Allow batch sizes of more than 1 for eval.
Args:
Args:
eval_config: An eval_pb2.EvalConfig.
eval_config: An eval_pb2.EvalConfig.
eval_input_config: An input_reader_pb2.InputReader.
eval_input_config: An input_reader_pb2.InputReader.
...
@@ -497,7 +564,7 @@ def create_eval_input_fn(eval_config, eval_input_config, model_config):
...
@@ -497,7 +564,7 @@ def create_eval_input_fn(eval_config, eval_input_config, model_config):
return
(
_get_features_dict
(
tensor_dict
),
_get_labels_dict
(
tensor_dict
))
return
(
_get_features_dict
(
tensor_dict
),
_get_labels_dict
(
tensor_dict
))
dataset
=
INPUT_BUILDER_UTIL_MAP
[
'dataset_build'
](
dataset
=
INPUT_BUILDER_UTIL_MAP
[
'dataset_build'
](
eval_input_config
,
eval_input_config
,
batch_size
=
1
,
# Currently only support
batch
size
of 1 for eval.
batch_size
=
params
[
'
batch
_
size
'
]
if
params
else
eval_config
.
batch_size
,
transform_input_data_fn
=
transform_and_pad_input_data_fn
)
transform_input_data_fn
=
transform_and_pad_input_data_fn
)
return
dataset
return
dataset
...
...
research/object_detection/inputs_test.py
View file @
27b4acd4
...
@@ -20,6 +20,7 @@ from __future__ import print_function
...
@@ -20,6 +20,7 @@ from __future__ import print_function
import
functools
import
functools
import
os
import
os
from
absl.testing
import
parameterized
import
numpy
as
np
import
numpy
as
np
import
tensorflow
as
tf
import
tensorflow
as
tf
...
@@ -28,6 +29,7 @@ from object_detection import inputs
...
@@ -28,6 +29,7 @@ from object_detection import inputs
from
object_detection.core
import
preprocessor
from
object_detection.core
import
preprocessor
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.utils
import
config_util
from
object_detection.utils
import
config_util
from
object_detection.utils
import
test_case
FLAGS
=
tf
.
flags
.
FLAGS
FLAGS
=
tf
.
flags
.
FLAGS
...
@@ -41,11 +43,13 @@ def _get_configs_for_model(model_name):
...
@@ -41,11 +43,13 @@ def _get_configs_for_model(model_name):
data_path
=
os
.
path
.
join
(
tf
.
resource_loader
.
get_data_files_path
(),
data_path
=
os
.
path
.
join
(
tf
.
resource_loader
.
get_data_files_path
(),
'test_data/pets_examples.record'
)
'test_data/pets_examples.record'
)
configs
=
config_util
.
get_configs_from_pipeline_file
(
fname
)
configs
=
config_util
.
get_configs_from_pipeline_file
(
fname
)
override_dict
=
{
'train_input_path'
:
data_path
,
'eval_input_path'
:
data_path
,
'label_map_path'
:
label_map_path
}
return
config_util
.
merge_external_params_with_configs
(
return
config_util
.
merge_external_params_with_configs
(
configs
,
configs
,
kwargs_dict
=
override_dict
)
train_input_path
=
data_path
,
eval_input_path
=
data_path
,
label_map_path
=
label_map_path
)
def
_make_initializable_iterator
(
dataset
):
def
_make_initializable_iterator
(
dataset
):
...
@@ -62,7 +66,7 @@ def _make_initializable_iterator(dataset):
...
@@ -62,7 +66,7 @@ def _make_initializable_iterator(dataset):
return
iterator
return
iterator
class
InputsTest
(
t
f
.
t
est
.
TestCase
):
class
InputsTest
(
test
_case
.
TestCase
,
parameterized
.
TestCase
):
def
test_faster_rcnn_resnet50_train_input
(
self
):
def
test_faster_rcnn_resnet50_train_input
(
self
):
"""Tests the training input function for FasterRcnnResnet50."""
"""Tests the training input function for FasterRcnnResnet50."""
...
@@ -89,52 +93,71 @@ class InputsTest(tf.test.TestCase):
...
@@ -89,52 +93,71 @@ class InputsTest(tf.test.TestCase):
labels
[
fields
.
InputDataFields
.
groundtruth_classes
].
shape
.
as_list
())
labels
[
fields
.
InputDataFields
.
groundtruth_classes
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
float32
,
self
.
assertEqual
(
tf
.
float32
,
labels
[
fields
.
InputDataFields
.
groundtruth_classes
].
dtype
)
labels
[
fields
.
InputDataFields
.
groundtruth_classes
].
dtype
)
self
.
assertAllEqual
(
[
1
,
100
,
model_config
.
faster_rcnn
.
num_classes
],
labels
[
fields
.
InputDataFields
.
groundtruth_confidences
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
float32
,
labels
[
fields
.
InputDataFields
.
groundtruth_confidences
].
dtype
)
self
.
assertAllEqual
(
self
.
assertAllEqual
(
[
1
,
100
],
[
1
,
100
],
labels
[
fields
.
InputDataFields
.
groundtruth_weights
].
shape
.
as_list
())
labels
[
fields
.
InputDataFields
.
groundtruth_weights
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
float32
,
self
.
assertEqual
(
tf
.
float32
,
labels
[
fields
.
InputDataFields
.
groundtruth_weights
].
dtype
)
labels
[
fields
.
InputDataFields
.
groundtruth_weights
].
dtype
)
def
test_faster_rcnn_resnet50_eval_input
(
self
):
@
parameterized
.
parameters
(
{
'eval_batch_size'
:
1
},
{
'eval_batch_size'
:
8
}
)
def
test_faster_rcnn_resnet50_eval_input
(
self
,
eval_batch_size
=
1
):
"""Tests the eval input function for FasterRcnnResnet50."""
"""Tests the eval input function for FasterRcnnResnet50."""
configs
=
_get_configs_for_model
(
'faster_rcnn_resnet50_pets'
)
configs
=
_get_configs_for_model
(
'faster_rcnn_resnet50_pets'
)
model_config
=
configs
[
'model'
]
model_config
=
configs
[
'model'
]
model_config
.
faster_rcnn
.
num_classes
=
37
model_config
.
faster_rcnn
.
num_classes
=
37
eval_config
=
configs
[
'eval_config'
]
eval_config
.
batch_size
=
eval_batch_size
eval_input_fn
=
inputs
.
create_eval_input_fn
(
eval_input_fn
=
inputs
.
create_eval_input_fn
(
configs
[
'
eval_config
'
]
,
configs
[
'eval_input_config
'
],
model_config
)
eval_config
,
configs
[
'eval_input_config
s'
][
0
],
model_config
)
features
,
labels
=
_make_initializable_iterator
(
eval_input_fn
()).
get_next
()
features
,
labels
=
_make_initializable_iterator
(
eval_input_fn
()).
get_next
()
self
.
assertAllEqual
([
1
,
None
,
None
,
3
],
self
.
assertAllEqual
([
eval_batch_size
,
None
,
None
,
3
],
features
[
fields
.
InputDataFields
.
image
].
shape
.
as_list
())
features
[
fields
.
InputDataFields
.
image
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
float32
,
features
[
fields
.
InputDataFields
.
image
].
dtype
)
self
.
assertEqual
(
tf
.
float32
,
features
[
fields
.
InputDataFields
.
image
].
dtype
)
self
.
assertAllEqual
(
self
.
assertAllEqual
(
[
1
,
None
,
None
,
3
],
[
eval_batch_size
,
None
,
None
,
3
],
features
[
fields
.
InputDataFields
.
original_image
].
shape
.
as_list
())
features
[
fields
.
InputDataFields
.
original_image
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
uint8
,
self
.
assertEqual
(
tf
.
uint8
,
features
[
fields
.
InputDataFields
.
original_image
].
dtype
)
features
[
fields
.
InputDataFields
.
original_image
].
dtype
)
self
.
assertAllEqual
([
1
],
features
[
inputs
.
HASH_KEY
].
shape
.
as_list
())
self
.
assertAllEqual
([
eval_batch_size
],
features
[
inputs
.
HASH_KEY
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
int32
,
features
[
inputs
.
HASH_KEY
].
dtype
)
self
.
assertEqual
(
tf
.
int32
,
features
[
inputs
.
HASH_KEY
].
dtype
)
self
.
assertAllEqual
(
self
.
assertAllEqual
(
[
1
,
100
,
4
],
[
eval_batch_size
,
100
,
4
],
labels
[
fields
.
InputDataFields
.
groundtruth_boxes
].
shape
.
as_list
())
labels
[
fields
.
InputDataFields
.
groundtruth_boxes
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
float32
,
self
.
assertEqual
(
tf
.
float32
,
labels
[
fields
.
InputDataFields
.
groundtruth_boxes
].
dtype
)
labels
[
fields
.
InputDataFields
.
groundtruth_boxes
].
dtype
)
self
.
assertAllEqual
(
self
.
assertAllEqual
(
[
1
,
100
,
model_config
.
faster_rcnn
.
num_classes
],
[
eval_batch_size
,
100
,
model_config
.
faster_rcnn
.
num_classes
],
labels
[
fields
.
InputDataFields
.
groundtruth_classes
].
shape
.
as_list
())
labels
[
fields
.
InputDataFields
.
groundtruth_classes
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
float32
,
self
.
assertEqual
(
tf
.
float32
,
labels
[
fields
.
InputDataFields
.
groundtruth_classes
].
dtype
)
labels
[
fields
.
InputDataFields
.
groundtruth_classes
].
dtype
)
self
.
assertAllEqual
(
self
.
assertAllEqual
(
[
1
,
100
],
[
eval_batch_size
,
100
,
model_config
.
faster_rcnn
.
num_classes
],
labels
[
fields
.
InputDataFields
.
groundtruth_confidences
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
float32
,
labels
[
fields
.
InputDataFields
.
groundtruth_confidences
].
dtype
)
self
.
assertAllEqual
(
[
eval_batch_size
,
100
],
labels
[
fields
.
InputDataFields
.
groundtruth_area
].
shape
.
as_list
())
labels
[
fields
.
InputDataFields
.
groundtruth_area
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
float32
,
self
.
assertEqual
(
tf
.
float32
,
labels
[
fields
.
InputDataFields
.
groundtruth_area
].
dtype
)
labels
[
fields
.
InputDataFields
.
groundtruth_area
].
dtype
)
self
.
assertAllEqual
(
self
.
assertAllEqual
(
[
1
,
100
],
[
eval_batch_size
,
100
],
labels
[
fields
.
InputDataFields
.
groundtruth_is_crowd
].
shape
.
as_list
())
labels
[
fields
.
InputDataFields
.
groundtruth_is_crowd
].
shape
.
as_list
())
self
.
assertEqual
(
self
.
assertEqual
(
tf
.
bool
,
labels
[
fields
.
InputDataFields
.
groundtruth_is_crowd
].
dtype
)
tf
.
bool
,
labels
[
fields
.
InputDataFields
.
groundtruth_is_crowd
].
dtype
)
self
.
assertAllEqual
(
self
.
assertAllEqual
(
[
1
,
100
],
[
eval_batch_size
,
100
],
labels
[
fields
.
InputDataFields
.
groundtruth_difficult
].
shape
.
as_list
())
labels
[
fields
.
InputDataFields
.
groundtruth_difficult
].
shape
.
as_list
())
self
.
assertEqual
(
self
.
assertEqual
(
tf
.
int32
,
labels
[
fields
.
InputDataFields
.
groundtruth_difficult
].
dtype
)
tf
.
int32
,
labels
[
fields
.
InputDataFields
.
groundtruth_difficult
].
dtype
)
...
@@ -170,52 +193,73 @@ class InputsTest(tf.test.TestCase):
...
@@ -170,52 +193,73 @@ class InputsTest(tf.test.TestCase):
labels
[
fields
.
InputDataFields
.
groundtruth_classes
].
shape
.
as_list
())
labels
[
fields
.
InputDataFields
.
groundtruth_classes
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
float32
,
self
.
assertEqual
(
tf
.
float32
,
labels
[
fields
.
InputDataFields
.
groundtruth_classes
].
dtype
)
labels
[
fields
.
InputDataFields
.
groundtruth_classes
].
dtype
)
self
.
assertAllEqual
(
[
batch_size
,
100
,
model_config
.
ssd
.
num_classes
],
labels
[
fields
.
InputDataFields
.
groundtruth_confidences
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
float32
,
labels
[
fields
.
InputDataFields
.
groundtruth_confidences
].
dtype
)
self
.
assertAllEqual
(
self
.
assertAllEqual
(
[
batch_size
,
100
],
[
batch_size
,
100
],
labels
[
fields
.
InputDataFields
.
groundtruth_weights
].
shape
.
as_list
())
labels
[
fields
.
InputDataFields
.
groundtruth_weights
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
float32
,
self
.
assertEqual
(
tf
.
float32
,
labels
[
fields
.
InputDataFields
.
groundtruth_weights
].
dtype
)
labels
[
fields
.
InputDataFields
.
groundtruth_weights
].
dtype
)
def
test_ssd_inceptionV2_eval_input
(
self
):
@
parameterized
.
parameters
(
{
'eval_batch_size'
:
1
},
{
'eval_batch_size'
:
8
}
)
def
test_ssd_inceptionV2_eval_input
(
self
,
eval_batch_size
=
1
):
"""Tests the eval input function for SSDInceptionV2."""
"""Tests the eval input function for SSDInceptionV2."""
configs
=
_get_configs_for_model
(
'ssd_inception_v2_pets'
)
configs
=
_get_configs_for_model
(
'ssd_inception_v2_pets'
)
model_config
=
configs
[
'model'
]
model_config
=
configs
[
'model'
]
model_config
.
ssd
.
num_classes
=
37
model_config
.
ssd
.
num_classes
=
37
eval_config
=
configs
[
'eval_config'
]
eval_config
.
batch_size
=
eval_batch_size
eval_input_fn
=
inputs
.
create_eval_input_fn
(
eval_input_fn
=
inputs
.
create_eval_input_fn
(
configs
[
'
eval_config
'
]
,
configs
[
'eval_input_config
'
],
model_config
)
eval_config
,
configs
[
'eval_input_config
s'
][
0
],
model_config
)
features
,
labels
=
_make_initializable_iterator
(
eval_input_fn
()).
get_next
()
features
,
labels
=
_make_initializable_iterator
(
eval_input_fn
()).
get_next
()
self
.
assertAllEqual
([
1
,
300
,
300
,
3
],
self
.
assertAllEqual
([
eval_batch_size
,
300
,
300
,
3
],
features
[
fields
.
InputDataFields
.
image
].
shape
.
as_list
())
features
[
fields
.
InputDataFields
.
image
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
float32
,
features
[
fields
.
InputDataFields
.
image
].
dtype
)
self
.
assertEqual
(
tf
.
float32
,
features
[
fields
.
InputDataFields
.
image
].
dtype
)
self
.
assertAllEqual
(
self
.
assertAllEqual
(
[
1
,
None
,
None
,
3
],
[
eval_batch_size
,
300
,
300
,
3
],
features
[
fields
.
InputDataFields
.
original_image
].
shape
.
as_list
())
features
[
fields
.
InputDataFields
.
original_image
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
uint8
,
self
.
assertEqual
(
tf
.
uint8
,
features
[
fields
.
InputDataFields
.
original_image
].
dtype
)
features
[
fields
.
InputDataFields
.
original_image
].
dtype
)
self
.
assertAllEqual
([
1
],
features
[
inputs
.
HASH_KEY
].
shape
.
as_list
())
self
.
assertAllEqual
([
eval_batch_size
],
features
[
inputs
.
HASH_KEY
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
int32
,
features
[
inputs
.
HASH_KEY
].
dtype
)
self
.
assertEqual
(
tf
.
int32
,
features
[
inputs
.
HASH_KEY
].
dtype
)
self
.
assertAllEqual
(
self
.
assertAllEqual
(
[
1
,
100
,
4
],
[
eval_batch_size
,
100
,
4
],
labels
[
fields
.
InputDataFields
.
groundtruth_boxes
].
shape
.
as_list
())
labels
[
fields
.
InputDataFields
.
groundtruth_boxes
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
float32
,
self
.
assertEqual
(
tf
.
float32
,
labels
[
fields
.
InputDataFields
.
groundtruth_boxes
].
dtype
)
labels
[
fields
.
InputDataFields
.
groundtruth_boxes
].
dtype
)
self
.
assertAllEqual
(
self
.
assertAllEqual
(
[
1
,
100
,
model_config
.
ssd
.
num_classes
],
[
eval_batch_size
,
100
,
model_config
.
ssd
.
num_classes
],
labels
[
fields
.
InputDataFields
.
groundtruth_classes
].
shape
.
as_list
())
labels
[
fields
.
InputDataFields
.
groundtruth_classes
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
float32
,
self
.
assertEqual
(
tf
.
float32
,
labels
[
fields
.
InputDataFields
.
groundtruth_classes
].
dtype
)
labels
[
fields
.
InputDataFields
.
groundtruth_classes
].
dtype
)
self
.
assertAllEqual
(
self
.
assertAllEqual
(
[
1
,
100
],
[
eval_batch_size
,
100
,
model_config
.
ssd
.
num_classes
],
labels
[
fields
.
InputDataFields
.
groundtruth_confidences
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
float32
,
labels
[
fields
.
InputDataFields
.
groundtruth_confidences
].
dtype
)
self
.
assertAllEqual
(
[
eval_batch_size
,
100
],
labels
[
fields
.
InputDataFields
.
groundtruth_area
].
shape
.
as_list
())
labels
[
fields
.
InputDataFields
.
groundtruth_area
].
shape
.
as_list
())
self
.
assertEqual
(
tf
.
float32
,
self
.
assertEqual
(
tf
.
float32
,
labels
[
fields
.
InputDataFields
.
groundtruth_area
].
dtype
)
labels
[
fields
.
InputDataFields
.
groundtruth_area
].
dtype
)
self
.
assertAllEqual
(
self
.
assertAllEqual
(
[
1
,
100
],
[
eval_batch_size
,
100
],
labels
[
fields
.
InputDataFields
.
groundtruth_is_crowd
].
shape
.
as_list
())
labels
[
fields
.
InputDataFields
.
groundtruth_is_crowd
].
shape
.
as_list
())
self
.
assertEqual
(
self
.
assertEqual
(
tf
.
bool
,
labels
[
fields
.
InputDataFields
.
groundtruth_is_crowd
].
dtype
)
tf
.
bool
,
labels
[
fields
.
InputDataFields
.
groundtruth_is_crowd
].
dtype
)
self
.
assertAllEqual
(
self
.
assertAllEqual
(
[
1
,
100
],
[
eval_batch_size
,
100
],
labels
[
fields
.
InputDataFields
.
groundtruth_difficult
].
shape
.
as_list
())
labels
[
fields
.
InputDataFields
.
groundtruth_difficult
].
shape
.
as_list
())
self
.
assertEqual
(
self
.
assertEqual
(
tf
.
int32
,
labels
[
fields
.
InputDataFields
.
groundtruth_difficult
].
dtype
)
tf
.
int32
,
labels
[
fields
.
InputDataFields
.
groundtruth_difficult
].
dtype
)
...
@@ -225,7 +269,7 @@ class InputsTest(tf.test.TestCase):
...
@@ -225,7 +269,7 @@ class InputsTest(tf.test.TestCase):
configs
=
_get_configs_for_model
(
'ssd_inception_v2_pets'
)
configs
=
_get_configs_for_model
(
'ssd_inception_v2_pets'
)
predict_input_fn
=
inputs
.
create_predict_input_fn
(
predict_input_fn
=
inputs
.
create_predict_input_fn
(
model_config
=
configs
[
'model'
],
model_config
=
configs
[
'model'
],
predict_input_config
=
configs
[
'eval_input_config
'
])
predict_input_config
=
configs
[
'eval_input_config
s'
][
0
])
serving_input_receiver
=
predict_input_fn
()
serving_input_receiver
=
predict_input_fn
()
image
=
serving_input_receiver
.
features
[
fields
.
InputDataFields
.
image
]
image
=
serving_input_receiver
.
features
[
fields
.
InputDataFields
.
image
]
...
@@ -238,10 +282,10 @@ class InputsTest(tf.test.TestCase):
...
@@ -238,10 +282,10 @@ class InputsTest(tf.test.TestCase):
def
test_predict_input_with_additional_channels
(
self
):
def
test_predict_input_with_additional_channels
(
self
):
"""Tests the predict input function with additional channels."""
"""Tests the predict input function with additional channels."""
configs
=
_get_configs_for_model
(
'ssd_inception_v2_pets'
)
configs
=
_get_configs_for_model
(
'ssd_inception_v2_pets'
)
configs
[
'eval_input_config
'
].
num_additional_channels
=
2
configs
[
'eval_input_config
s'
][
0
].
num_additional_channels
=
2
predict_input_fn
=
inputs
.
create_predict_input_fn
(
predict_input_fn
=
inputs
.
create_predict_input_fn
(
model_config
=
configs
[
'model'
],
model_config
=
configs
[
'model'
],
predict_input_config
=
configs
[
'eval_input_config
'
])
predict_input_config
=
configs
[
'eval_input_config
s'
][
0
])
serving_input_receiver
=
predict_input_fn
()
serving_input_receiver
=
predict_input_fn
()
image
=
serving_input_receiver
.
features
[
fields
.
InputDataFields
.
image
]
image
=
serving_input_receiver
.
features
[
fields
.
InputDataFields
.
image
]
...
@@ -291,7 +335,7 @@ class InputsTest(tf.test.TestCase):
...
@@ -291,7 +335,7 @@ class InputsTest(tf.test.TestCase):
configs
[
'model'
].
ssd
.
num_classes
=
37
configs
[
'model'
].
ssd
.
num_classes
=
37
eval_input_fn
=
inputs
.
create_eval_input_fn
(
eval_input_fn
=
inputs
.
create_eval_input_fn
(
eval_config
=
configs
[
'train_config'
],
# Expecting `EvalConfig`.
eval_config
=
configs
[
'train_config'
],
# Expecting `EvalConfig`.
eval_input_config
=
configs
[
'eval_input_config
'
],
eval_input_config
=
configs
[
'eval_input_config
s'
][
0
],
model_config
=
configs
[
'model'
])
model_config
=
configs
[
'model'
])
with
self
.
assertRaises
(
TypeError
):
with
self
.
assertRaises
(
TypeError
):
eval_input_fn
()
eval_input_fn
()
...
@@ -313,13 +357,45 @@ class InputsTest(tf.test.TestCase):
...
@@ -313,13 +357,45 @@ class InputsTest(tf.test.TestCase):
configs
[
'model'
].
ssd
.
num_classes
=
37
configs
[
'model'
].
ssd
.
num_classes
=
37
eval_input_fn
=
inputs
.
create_eval_input_fn
(
eval_input_fn
=
inputs
.
create_eval_input_fn
(
eval_config
=
configs
[
'eval_config'
],
eval_config
=
configs
[
'eval_config'
],
eval_input_config
=
configs
[
'eval_input_config
'
],
eval_input_config
=
configs
[
'eval_input_config
s'
][
0
],
model_config
=
configs
[
'eval_config'
])
# Expecting `DetectionModel`.
model_config
=
configs
[
'eval_config'
])
# Expecting `DetectionModel`.
with
self
.
assertRaises
(
TypeError
):
with
self
.
assertRaises
(
TypeError
):
eval_input_fn
()
eval_input_fn
()
def
test_output_equal_in_replace_empty_string_with_random_number
(
self
):
string_placeholder
=
tf
.
placeholder
(
tf
.
string
,
shape
=
[])
replaced_string
=
inputs
.
_replace_empty_string_with_random_number
(
string_placeholder
)
test_string
=
'hello world'
feed_dict
=
{
string_placeholder
:
test_string
}
with
self
.
test_session
()
as
sess
:
out_string
=
sess
.
run
(
replaced_string
,
feed_dict
=
feed_dict
)
self
.
assertEqual
(
test_string
,
out_string
)
def
test_output_is_integer_in_replace_empty_string_with_random_number
(
self
):
string_placeholder
=
tf
.
placeholder
(
tf
.
string
,
shape
=
[])
replaced_string
=
inputs
.
_replace_empty_string_with_random_number
(
string_placeholder
)
empty_string
=
''
feed_dict
=
{
string_placeholder
:
empty_string
}
tf
.
set_random_seed
(
0
)
with
self
.
test_session
()
as
sess
:
out_string
=
sess
.
run
(
replaced_string
,
feed_dict
=
feed_dict
)
# Test whether out_string is a string which represents an integer.
int
(
out_string
)
# throws an error if out_string is not castable to int.
self
.
assertEqual
(
out_string
,
'2798129067578209328'
)
class
DataAugmentationFnTest
(
tf
.
test
.
TestCase
):
class
DataAugmentationFnTest
(
test_case
.
TestCase
):
def
test_apply_image_and_box_augmentation
(
self
):
def
test_apply_image_and_box_augmentation
(
self
):
data_augmentation_options
=
[
data_augmentation_options
=
[
...
@@ -352,6 +428,50 @@ class DataAugmentationFnTest(tf.test.TestCase):
...
@@ -352,6 +428,50 @@ class DataAugmentationFnTest(tf.test.TestCase):
[[
10
,
10
,
20
,
20
]]
[[
10
,
10
,
20
,
20
]]
)
)
def
test_apply_image_and_box_augmentation_with_scores
(
self
):
data_augmentation_options
=
[
(
preprocessor
.
resize_image
,
{
'new_height'
:
20
,
'new_width'
:
20
,
'method'
:
tf
.
image
.
ResizeMethod
.
NEAREST_NEIGHBOR
}),
(
preprocessor
.
scale_boxes_to_pixel_coordinates
,
{}),
]
data_augmentation_fn
=
functools
.
partial
(
inputs
.
augment_input_data
,
data_augmentation_options
=
data_augmentation_options
)
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
tf
.
constant
(
np
.
random
.
rand
(
10
,
10
,
3
).
astype
(
np
.
float32
)),
fields
.
InputDataFields
.
groundtruth_boxes
:
tf
.
constant
(
np
.
array
([[.
5
,
.
5
,
1.
,
1.
]],
np
.
float32
)),
fields
.
InputDataFields
.
groundtruth_classes
:
tf
.
constant
(
np
.
array
([
1.0
],
np
.
float32
)),
fields
.
InputDataFields
.
groundtruth_confidences
:
tf
.
constant
(
np
.
array
([
0.8
],
np
.
float32
)),
}
augmented_tensor_dict
=
data_augmentation_fn
(
tensor_dict
=
tensor_dict
)
with
self
.
test_session
()
as
sess
:
augmented_tensor_dict_out
=
sess
.
run
(
augmented_tensor_dict
)
self
.
assertAllEqual
(
augmented_tensor_dict_out
[
fields
.
InputDataFields
.
image
].
shape
,
[
20
,
20
,
3
]
)
self
.
assertAllClose
(
augmented_tensor_dict_out
[
fields
.
InputDataFields
.
groundtruth_boxes
],
[[
10
,
10
,
20
,
20
]]
)
self
.
assertAllClose
(
augmented_tensor_dict_out
[
fields
.
InputDataFields
.
groundtruth_classes
],
[
1.0
]
)
self
.
assertAllClose
(
augmented_tensor_dict_out
[
fields
.
InputDataFields
.
groundtruth_confidences
],
[
0.8
]
)
def
test_include_masks_in_data_augmentation
(
self
):
def
test_include_masks_in_data_augmentation
(
self
):
data_augmentation_options
=
[
data_augmentation_options
=
[
(
preprocessor
.
resize_image
,
{
(
preprocessor
.
resize_image
,
{
...
@@ -425,7 +545,7 @@ def _fake_image_resizer_fn(image, mask):
...
@@ -425,7 +545,7 @@ def _fake_image_resizer_fn(image, mask):
return
(
image
,
mask
,
tf
.
shape
(
image
))
return
(
image
,
mask
,
tf
.
shape
(
image
))
class
DataTransformationFnTest
(
t
f
.
t
est
.
TestCase
):
class
DataTransformationFnTest
(
test
_case
.
TestCase
):
def
test_combine_additional_channels_if_present
(
self
):
def
test_combine_additional_channels_if_present
(
self
):
image
=
np
.
random
.
rand
(
4
,
4
,
3
).
astype
(
np
.
float32
)
image
=
np
.
random
.
rand
(
4
,
4
,
3
).
astype
(
np
.
float32
)
...
@@ -476,6 +596,9 @@ class DataTransformationFnTest(tf.test.TestCase):
...
@@ -476,6 +596,9 @@ class DataTransformationFnTest(tf.test.TestCase):
self
.
assertAllClose
(
self
.
assertAllClose
(
transformed_inputs
[
fields
.
InputDataFields
.
groundtruth_classes
],
transformed_inputs
[
fields
.
InputDataFields
.
groundtruth_classes
],
[[
0
,
0
,
1
],
[
1
,
0
,
0
]])
[[
0
,
0
,
1
],
[
1
,
0
,
0
]])
self
.
assertAllClose
(
transformed_inputs
[
fields
.
InputDataFields
.
groundtruth_confidences
],
[[
0
,
0
,
1
],
[
1
,
0
,
0
]])
def
test_returns_correct_merged_boxes
(
self
):
def
test_returns_correct_merged_boxes
(
self
):
tensor_dict
=
{
tensor_dict
=
{
...
@@ -504,6 +627,9 @@ class DataTransformationFnTest(tf.test.TestCase):
...
@@ -504,6 +627,9 @@ class DataTransformationFnTest(tf.test.TestCase):
self
.
assertAllClose
(
self
.
assertAllClose
(
transformed_inputs
[
fields
.
InputDataFields
.
groundtruth_classes
],
transformed_inputs
[
fields
.
InputDataFields
.
groundtruth_classes
],
[[
1
,
0
,
1
]])
[[
1
,
0
,
1
]])
self
.
assertAllClose
(
transformed_inputs
[
fields
.
InputDataFields
.
groundtruth_confidences
],
[[
1
,
0
,
1
]])
def
test_returns_resized_masks
(
self
):
def
test_returns_resized_masks
(
self
):
tensor_dict
=
{
tensor_dict
=
{
...
@@ -512,8 +638,11 @@ class DataTransformationFnTest(tf.test.TestCase):
...
@@ -512,8 +638,11 @@ class DataTransformationFnTest(tf.test.TestCase):
fields
.
InputDataFields
.
groundtruth_instance_masks
:
fields
.
InputDataFields
.
groundtruth_instance_masks
:
tf
.
constant
(
np
.
random
.
rand
(
2
,
4
,
4
).
astype
(
np
.
float32
)),
tf
.
constant
(
np
.
random
.
rand
(
2
,
4
,
4
).
astype
(
np
.
float32
)),
fields
.
InputDataFields
.
groundtruth_classes
:
fields
.
InputDataFields
.
groundtruth_classes
:
tf
.
constant
(
np
.
array
([
3
,
1
],
np
.
int32
))
tf
.
constant
(
np
.
array
([
3
,
1
],
np
.
int32
)),
fields
.
InputDataFields
.
original_image_spatial_shape
:
tf
.
constant
(
np
.
array
([
4
,
4
],
np
.
int32
))
}
}
def
fake_image_resizer_fn
(
image
,
masks
=
None
):
def
fake_image_resizer_fn
(
image
,
masks
=
None
):
resized_image
=
tf
.
image
.
resize_images
(
image
,
[
8
,
8
])
resized_image
=
tf
.
image
.
resize_images
(
image
,
[
8
,
8
])
results
=
[
resized_image
]
results
=
[
resized_image
]
...
@@ -538,7 +667,9 @@ class DataTransformationFnTest(tf.test.TestCase):
...
@@ -538,7 +667,9 @@ class DataTransformationFnTest(tf.test.TestCase):
self
.
assertAllEqual
(
transformed_inputs
[
self
.
assertAllEqual
(
transformed_inputs
[
fields
.
InputDataFields
.
original_image
].
dtype
,
tf
.
uint8
)
fields
.
InputDataFields
.
original_image
].
dtype
,
tf
.
uint8
)
self
.
assertAllEqual
(
transformed_inputs
[
self
.
assertAllEqual
(
transformed_inputs
[
fields
.
InputDataFields
.
original_image
].
shape
,
[
4
,
4
,
3
])
fields
.
InputDataFields
.
original_image_spatial_shape
],
[
4
,
4
])
self
.
assertAllEqual
(
transformed_inputs
[
fields
.
InputDataFields
.
original_image
].
shape
,
[
8
,
8
,
3
])
self
.
assertAllEqual
(
transformed_inputs
[
self
.
assertAllEqual
(
transformed_inputs
[
fields
.
InputDataFields
.
groundtruth_instance_masks
].
shape
,
[
2
,
8
,
8
])
fields
.
InputDataFields
.
groundtruth_instance_masks
].
shape
,
[
2
,
8
,
8
])
...
@@ -550,6 +681,7 @@ class DataTransformationFnTest(tf.test.TestCase):
...
@@ -550,6 +681,7 @@ class DataTransformationFnTest(tf.test.TestCase):
fields
.
InputDataFields
.
groundtruth_classes
:
fields
.
InputDataFields
.
groundtruth_classes
:
tf
.
constant
(
np
.
array
([
3
,
1
],
np
.
int32
))
tf
.
constant
(
np
.
array
([
3
,
1
],
np
.
int32
))
}
}
def
fake_model_preprocessor_fn
(
image
):
def
fake_model_preprocessor_fn
(
image
):
return
(
image
/
255.
,
tf
.
expand_dims
(
tf
.
shape
(
image
)[
1
:],
axis
=
0
))
return
(
image
/
255.
,
tf
.
expand_dims
(
tf
.
shape
(
image
)[
1
:],
axis
=
0
))
...
@@ -577,6 +709,7 @@ class DataTransformationFnTest(tf.test.TestCase):
...
@@ -577,6 +709,7 @@ class DataTransformationFnTest(tf.test.TestCase):
fields
.
InputDataFields
.
groundtruth_classes
:
fields
.
InputDataFields
.
groundtruth_classes
:
tf
.
constant
(
np
.
array
([
3
,
1
],
np
.
int32
))
tf
.
constant
(
np
.
array
([
3
,
1
],
np
.
int32
))
}
}
def
add_one_data_augmentation_fn
(
tensor_dict
):
def
add_one_data_augmentation_fn
(
tensor_dict
):
return
{
key
:
value
+
1
for
key
,
value
in
tensor_dict
.
items
()}
return
{
key
:
value
+
1
for
key
,
value
in
tensor_dict
.
items
()}
...
@@ -605,8 +738,10 @@ class DataTransformationFnTest(tf.test.TestCase):
...
@@ -605,8 +738,10 @@ class DataTransformationFnTest(tf.test.TestCase):
fields
.
InputDataFields
.
groundtruth_classes
:
fields
.
InputDataFields
.
groundtruth_classes
:
tf
.
constant
(
np
.
array
([
3
,
1
],
np
.
int32
))
tf
.
constant
(
np
.
array
([
3
,
1
],
np
.
int32
))
}
}
def
mul_two_model_preprocessor_fn
(
image
):
def
mul_two_model_preprocessor_fn
(
image
):
return
(
image
*
2
,
tf
.
expand_dims
(
tf
.
shape
(
image
)[
1
:],
axis
=
0
))
return
(
image
*
2
,
tf
.
expand_dims
(
tf
.
shape
(
image
)[
1
:],
axis
=
0
))
def
add_five_to_image_data_augmentation_fn
(
tensor_dict
):
def
add_five_to_image_data_augmentation_fn
(
tensor_dict
):
tensor_dict
[
fields
.
InputDataFields
.
image
]
+=
5
tensor_dict
[
fields
.
InputDataFields
.
image
]
+=
5
return
tensor_dict
return
tensor_dict
...
@@ -626,7 +761,7 @@ class DataTransformationFnTest(tf.test.TestCase):
...
@@ -626,7 +761,7 @@ class DataTransformationFnTest(tf.test.TestCase):
(
np_image
+
5
)
*
2
)
(
np_image
+
5
)
*
2
)
class
PadInputDataToStaticShapesFnTest
(
t
f
.
t
est
.
TestCase
):
class
PadInputDataToStaticShapesFnTest
(
test
_case
.
TestCase
):
def
test_pad_images_boxes_and_classes
(
self
):
def
test_pad_images_boxes_and_classes
(
self
):
input_tensor_dict
=
{
input_tensor_dict
=
{
...
@@ -636,7 +771,10 @@ class PadInputDataToStaticShapesFnTest(tf.test.TestCase):
...
@@ -636,7 +771,10 @@ class PadInputDataToStaticShapesFnTest(tf.test.TestCase):
tf
.
placeholder
(
tf
.
float32
,
[
None
,
4
]),
tf
.
placeholder
(
tf
.
float32
,
[
None
,
4
]),
fields
.
InputDataFields
.
groundtruth_classes
:
fields
.
InputDataFields
.
groundtruth_classes
:
tf
.
placeholder
(
tf
.
int32
,
[
None
,
3
]),
tf
.
placeholder
(
tf
.
int32
,
[
None
,
3
]),
fields
.
InputDataFields
.
true_image_shape
:
tf
.
placeholder
(
tf
.
int32
,
[
3
]),
fields
.
InputDataFields
.
true_image_shape
:
tf
.
placeholder
(
tf
.
int32
,
[
3
]),
fields
.
InputDataFields
.
original_image_spatial_shape
:
tf
.
placeholder
(
tf
.
int32
,
[
2
])
}
}
padded_tensor_dict
=
inputs
.
pad_input_data_to_static_shapes
(
padded_tensor_dict
=
inputs
.
pad_input_data_to_static_shapes
(
tensor_dict
=
input_tensor_dict
,
tensor_dict
=
input_tensor_dict
,
...
@@ -650,6 +788,9 @@ class PadInputDataToStaticShapesFnTest(tf.test.TestCase):
...
@@ -650,6 +788,9 @@ class PadInputDataToStaticShapesFnTest(tf.test.TestCase):
self
.
assertAllEqual
(
self
.
assertAllEqual
(
padded_tensor_dict
[
fields
.
InputDataFields
.
true_image_shape
]
padded_tensor_dict
[
fields
.
InputDataFields
.
true_image_shape
]
.
shape
.
as_list
(),
[
3
])
.
shape
.
as_list
(),
[
3
])
self
.
assertAllEqual
(
padded_tensor_dict
[
fields
.
InputDataFields
.
original_image_spatial_shape
]
.
shape
.
as_list
(),
[
2
])
self
.
assertAllEqual
(
self
.
assertAllEqual
(
padded_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
padded_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
.
shape
.
as_list
(),
[
3
,
4
])
.
shape
.
as_list
(),
[
3
,
4
])
...
...
research/object_detection/legacy/eval.py
View file @
27b4acd4
...
@@ -12,7 +12,6 @@
...
@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
# ==============================================================================
# ==============================================================================
r
"""Evaluation executable for detection models.
r
"""Evaluation executable for detection models.
This executable is used to evaluate DetectionModels. There are two ways of
This executable is used to evaluate DetectionModels. There are two ways of
...
@@ -54,29 +53,30 @@ from object_detection.legacy import evaluator
...
@@ -54,29 +53,30 @@ from object_detection.legacy import evaluator
from
object_detection.utils
import
config_util
from
object_detection.utils
import
config_util
from
object_detection.utils
import
label_map_util
from
object_detection.utils
import
label_map_util
tf
.
logging
.
set_verbosity
(
tf
.
logging
.
INFO
)
tf
.
logging
.
set_verbosity
(
tf
.
logging
.
INFO
)
flags
=
tf
.
app
.
flags
flags
=
tf
.
app
.
flags
flags
.
DEFINE_boolean
(
'eval_training_data'
,
False
,
flags
.
DEFINE_boolean
(
'eval_training_data'
,
False
,
'If training data should be evaluated for this job.'
)
'If training data should be evaluated for this job.'
)
flags
.
DEFINE_string
(
'checkpoint_dir'
,
''
,
flags
.
DEFINE_string
(
'Directory containing checkpoints to evaluate, typically '
'checkpoint_dir'
,
''
,
'set to `train_dir` used in the training job.'
)
'Directory containing checkpoints to evaluate, typically '
flags
.
DEFINE_string
(
'eval_dir'
,
''
,
'set to `train_dir` used in the training job.'
)
'Directory to write eval summaries to.'
)
flags
.
DEFINE_string
(
'eval_dir'
,
''
,
'Directory to write eval summaries to.'
)
flags
.
DEFINE_string
(
'pipeline_config_path'
,
''
,
flags
.
DEFINE_string
(
'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
'pipeline_config_path'
,
''
,
'file. If provided, other configs are ignored'
)
'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
'file. If provided, other configs are ignored'
)
flags
.
DEFINE_string
(
'eval_config_path'
,
''
,
flags
.
DEFINE_string
(
'eval_config_path'
,
''
,
'Path to an eval_pb2.EvalConfig config file.'
)
'Path to an eval_pb2.EvalConfig config file.'
)
flags
.
DEFINE_string
(
'input_config_path'
,
''
,
flags
.
DEFINE_string
(
'input_config_path'
,
''
,
'Path to an input_reader_pb2.InputReader config file.'
)
'Path to an input_reader_pb2.InputReader config file.'
)
flags
.
DEFINE_string
(
'model_config_path'
,
''
,
flags
.
DEFINE_string
(
'model_config_path'
,
''
,
'Path to a model_pb2.DetectionModel config file.'
)
'Path to a model_pb2.DetectionModel config file.'
)
flags
.
DEFINE_boolean
(
'run_once'
,
False
,
'Option to only run a single pass of '
flags
.
DEFINE_boolean
(
'evaluation. Overrides the `max_evals` parameter in the '
'run_once'
,
False
,
'Option to only run a single pass of '
'provided config.'
)
'evaluation. Overrides the `max_evals` parameter in the '
'provided config.'
)
FLAGS
=
flags
.
FLAGS
FLAGS
=
flags
.
FLAGS
...
@@ -88,9 +88,10 @@ def main(unused_argv):
...
@@ -88,9 +88,10 @@ def main(unused_argv):
if
FLAGS
.
pipeline_config_path
:
if
FLAGS
.
pipeline_config_path
:
configs
=
config_util
.
get_configs_from_pipeline_file
(
configs
=
config_util
.
get_configs_from_pipeline_file
(
FLAGS
.
pipeline_config_path
)
FLAGS
.
pipeline_config_path
)
tf
.
gfile
.
Copy
(
FLAGS
.
pipeline_config_path
,
tf
.
gfile
.
Copy
(
os
.
path
.
join
(
FLAGS
.
eval_dir
,
'pipeline.config'
),
FLAGS
.
pipeline_config_path
,
overwrite
=
True
)
os
.
path
.
join
(
FLAGS
.
eval_dir
,
'pipeline.config'
),
overwrite
=
True
)
else
:
else
:
configs
=
config_util
.
get_configs_from_multiple_files
(
configs
=
config_util
.
get_configs_from_multiple_files
(
model_config_path
=
FLAGS
.
model_config_path
,
model_config_path
=
FLAGS
.
model_config_path
,
...
@@ -99,9 +100,7 @@ def main(unused_argv):
...
@@ -99,9 +100,7 @@ def main(unused_argv):
for
name
,
config
in
[(
'model.config'
,
FLAGS
.
model_config_path
),
for
name
,
config
in
[(
'model.config'
,
FLAGS
.
model_config_path
),
(
'eval.config'
,
FLAGS
.
eval_config_path
),
(
'eval.config'
,
FLAGS
.
eval_config_path
),
(
'input.config'
,
FLAGS
.
input_config_path
)]:
(
'input.config'
,
FLAGS
.
input_config_path
)]:
tf
.
gfile
.
Copy
(
config
,
tf
.
gfile
.
Copy
(
config
,
os
.
path
.
join
(
FLAGS
.
eval_dir
,
name
),
overwrite
=
True
)
os
.
path
.
join
(
FLAGS
.
eval_dir
,
name
),
overwrite
=
True
)
model_config
=
configs
[
'model'
]
model_config
=
configs
[
'model'
]
eval_config
=
configs
[
'eval_config'
]
eval_config
=
configs
[
'eval_config'
]
...
@@ -110,9 +109,7 @@ def main(unused_argv):
...
@@ -110,9 +109,7 @@ def main(unused_argv):
input_config
=
configs
[
'train_input_config'
]
input_config
=
configs
[
'train_input_config'
]
model_fn
=
functools
.
partial
(
model_fn
=
functools
.
partial
(
model_builder
.
build
,
model_builder
.
build
,
model_config
=
model_config
,
is_training
=
False
)
model_config
=
model_config
,
is_training
=
False
)
def
get_next
(
config
):
def
get_next
(
config
):
return
dataset_builder
.
make_initializable_iterator
(
return
dataset_builder
.
make_initializable_iterator
(
...
@@ -120,10 +117,8 @@ def main(unused_argv):
...
@@ -120,10 +117,8 @@ def main(unused_argv):
create_input_dict_fn
=
functools
.
partial
(
get_next
,
input_config
)
create_input_dict_fn
=
functools
.
partial
(
get_next
,
input_config
)
label_map
=
label_map_util
.
load_labelmap
(
input_config
.
label_map_path
)
categories
=
label_map_util
.
create_categories_from_labelmap
(
max_num_classes
=
max
([
item
.
id
for
item
in
label_map
.
item
])
input_config
.
label_map_path
)
categories
=
label_map_util
.
convert_label_map_to_categories
(
label_map
,
max_num_classes
)
if
FLAGS
.
run_once
:
if
FLAGS
.
run_once
:
eval_config
.
max_evals
=
1
eval_config
.
max_evals
=
1
...
...
research/object_detection/legacy/evaluator.py
View file @
27b4acd4
...
@@ -273,6 +273,7 @@ def evaluate(create_input_dict_fn, create_model_fn, eval_config, categories,
...
@@ -273,6 +273,7 @@ def evaluate(create_input_dict_fn, create_model_fn, eval_config, categories,
master
=
eval_config
.
eval_master
,
master
=
eval_config
.
eval_master
,
save_graph
=
eval_config
.
save_graph
,
save_graph
=
eval_config
.
save_graph
,
save_graph_dir
=
(
eval_dir
if
eval_config
.
save_graph
else
''
),
save_graph_dir
=
(
eval_dir
if
eval_config
.
save_graph
else
''
),
losses_dict
=
losses_dict
)
losses_dict
=
losses_dict
,
eval_export_path
=
eval_config
.
export_path
)
return
metrics
return
metrics
research/object_detection/matchers/argmax_matcher.py
View file @
27b4acd4
...
@@ -99,17 +99,19 @@ class ArgMaxMatcher(matcher.Matcher):
...
@@ -99,17 +99,19 @@ class ArgMaxMatcher(matcher.Matcher):
if
self
.
_unmatched_threshold
==
self
.
_matched_threshold
:
if
self
.
_unmatched_threshold
==
self
.
_matched_threshold
:
raise
ValueError
(
'When negatives are in between matched and '
raise
ValueError
(
'When negatives are in between matched and '
'unmatched thresholds, these cannot be of equal '
'unmatched thresholds, these cannot be of equal '
'value. matched: %s, unmatched: %s'
,
'value. matched: {}, unmatched: {}'
.
format
(
self
.
_matched_threshold
,
self
.
_unmatched_threshold
)
self
.
_matched_threshold
,
self
.
_unmatched_threshold
))
self
.
_force_match_for_each_row
=
force_match_for_each_row
self
.
_force_match_for_each_row
=
force_match_for_each_row
self
.
_negatives_lower_than_unmatched
=
negatives_lower_than_unmatched
self
.
_negatives_lower_than_unmatched
=
negatives_lower_than_unmatched
def
_match
(
self
,
similarity_matrix
):
def
_match
(
self
,
similarity_matrix
,
valid_rows
):
"""Tries to match each column of the similarity matrix to a row.
"""Tries to match each column of the similarity matrix to a row.
Args:
Args:
similarity_matrix: tensor of shape [N, M] representing any similarity
similarity_matrix: tensor of shape [N, M] representing any similarity
metric.
metric.
valid_rows: a boolean tensor of shape [N] indicating valid rows.
Returns:
Returns:
Match object with corresponding matches for each of M columns.
Match object with corresponding matches for each of M columns.
...
@@ -167,8 +169,10 @@ class ArgMaxMatcher(matcher.Matcher):
...
@@ -167,8 +169,10 @@ class ArgMaxMatcher(matcher.Matcher):
similarity_matrix
)
similarity_matrix
)
force_match_column_ids
=
tf
.
argmax
(
similarity_matrix
,
1
,
force_match_column_ids
=
tf
.
argmax
(
similarity_matrix
,
1
,
output_type
=
tf
.
int32
)
output_type
=
tf
.
int32
)
force_match_column_indicators
=
tf
.
one_hot
(
force_match_column_indicators
=
(
force_match_column_ids
,
depth
=
similarity_matrix_shape
[
1
])
tf
.
one_hot
(
force_match_column_ids
,
depth
=
similarity_matrix_shape
[
1
])
*
tf
.
cast
(
tf
.
expand_dims
(
valid_rows
,
axis
=-
1
),
dtype
=
tf
.
float32
))
force_match_row_ids
=
tf
.
argmax
(
force_match_column_indicators
,
0
,
force_match_row_ids
=
tf
.
argmax
(
force_match_column_indicators
,
0
,
output_type
=
tf
.
int32
)
output_type
=
tf
.
int32
)
force_match_column_mask
=
tf
.
cast
(
force_match_column_mask
=
tf
.
cast
(
...
...
research/object_detection/matchers/argmax_matcher_test.py
View file @
27b4acd4
...
@@ -182,6 +182,34 @@ class ArgMaxMatcherTest(test_case.TestCase):
...
@@ -182,6 +182,34 @@ class ArgMaxMatcherTest(test_case.TestCase):
self
.
assertAllEqual
(
np
.
nonzero
(
res_unmatched_cols
)[
0
],
self
.
assertAllEqual
(
np
.
nonzero
(
res_unmatched_cols
)[
0
],
expected_unmatched_cols
)
expected_unmatched_cols
)
def
test_return_correct_matches_using_force_match_padded_groundtruth
(
self
):
def
graph_fn
(
similarity
,
valid_rows
):
matcher
=
argmax_matcher
.
ArgMaxMatcher
(
matched_threshold
=
3.
,
unmatched_threshold
=
2.
,
force_match_for_each_row
=
True
)
match
=
matcher
.
match
(
similarity
,
valid_rows
)
matched_cols
=
match
.
matched_column_indicator
()
unmatched_cols
=
match
.
unmatched_column_indicator
()
match_results
=
match
.
match_results
return
(
matched_cols
,
unmatched_cols
,
match_results
)
similarity
=
np
.
array
([[
1
,
1
,
1
,
3
,
1
],
[
-
1
,
0
,
-
2
,
-
2
,
-
1
],
[
0
,
0
,
0
,
0
,
0
],
[
3
,
0
,
-
1
,
2
,
0
],
[
0
,
0
,
0
,
0
,
0
]],
dtype
=
np
.
float32
)
valid_rows
=
np
.
array
([
True
,
True
,
False
,
True
,
False
])
expected_matched_cols
=
np
.
array
([
0
,
1
,
3
])
expected_matched_rows
=
np
.
array
([
3
,
1
,
0
])
expected_unmatched_cols
=
np
.
array
([
2
,
4
])
# col 2 has too high max val
(
res_matched_cols
,
res_unmatched_cols
,
match_results
)
=
self
.
execute
(
graph_fn
,
[
similarity
,
valid_rows
])
self
.
assertAllEqual
(
match_results
[
res_matched_cols
],
expected_matched_rows
)
self
.
assertAllEqual
(
np
.
nonzero
(
res_matched_cols
)[
0
],
expected_matched_cols
)
self
.
assertAllEqual
(
np
.
nonzero
(
res_unmatched_cols
)[
0
],
expected_unmatched_cols
)
def
test_valid_arguments_corner_case
(
self
):
def
test_valid_arguments_corner_case
(
self
):
argmax_matcher
.
ArgMaxMatcher
(
matched_threshold
=
1
,
argmax_matcher
.
ArgMaxMatcher
(
matched_threshold
=
1
,
unmatched_threshold
=
1
)
unmatched_threshold
=
1
)
...
...
research/object_detection/matchers/bipartite_matcher.py
View file @
27b4acd4
...
@@ -35,7 +35,7 @@ class GreedyBipartiteMatcher(matcher.Matcher):
...
@@ -35,7 +35,7 @@ class GreedyBipartiteMatcher(matcher.Matcher):
super
(
GreedyBipartiteMatcher
,
self
).
__init__
(
super
(
GreedyBipartiteMatcher
,
self
).
__init__
(
use_matmul_gather
=
use_matmul_gather
)
use_matmul_gather
=
use_matmul_gather
)
def
_match
(
self
,
similarity_matrix
,
num_
valid_rows
=-
1
):
def
_match
(
self
,
similarity_matrix
,
valid_rows
):
"""Bipartite matches a collection rows and columns. A greedy bi-partite.
"""Bipartite matches a collection rows and columns. A greedy bi-partite.
TODO(rathodv): Add num_valid_columns options to match only that many columns
TODO(rathodv): Add num_valid_columns options to match only that many columns
...
@@ -44,21 +44,27 @@ class GreedyBipartiteMatcher(matcher.Matcher):
...
@@ -44,21 +44,27 @@ class GreedyBipartiteMatcher(matcher.Matcher):
Args:
Args:
similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
where higher values mean more similar.
where higher values mean more similar.
num_valid_rows: A scalar or a 1-D tensor with one element describing the
valid_rows: A boolean tensor of shape [N] indicating the rows that are
number of valid rows of similarity_matrix to consider for the bipartite
valid.
matching. If set to be negative, then all rows from similarity_matrix
are used.
Returns:
Returns:
match_results: int32 tensor of shape [M] with match_results[i]=-1
match_results: int32 tensor of shape [M] with match_results[i]=-1
meaning that column i is not matched and otherwise that it is matched to
meaning that column i is not matched and otherwise that it is matched to
row match_results[i].
row match_results[i].
"""
"""
valid_row_sim_matrix
=
tf
.
gather
(
similarity_matrix
,
tf
.
squeeze
(
tf
.
where
(
valid_rows
),
axis
=-
1
))
invalid_row_sim_matrix
=
tf
.
gather
(
similarity_matrix
,
tf
.
squeeze
(
tf
.
where
(
tf
.
logical_not
(
valid_rows
)),
axis
=-
1
))
similarity_matrix
=
tf
.
concat
(
[
valid_row_sim_matrix
,
invalid_row_sim_matrix
],
axis
=
0
)
# Convert similarity matrix to distance matrix as tf.image.bipartite tries
# Convert similarity matrix to distance matrix as tf.image.bipartite tries
# to find minimum distance matches.
# to find minimum distance matches.
distance_matrix
=
-
1
*
similarity_matrix
distance_matrix
=
-
1
*
similarity_matrix
num_valid_rows
=
tf
.
reduce_sum
(
tf
.
to_float
(
valid_rows
))
_
,
match_results
=
image_ops
.
bipartite_match
(
_
,
match_results
=
image_ops
.
bipartite_match
(
distance_matrix
,
num_valid_rows
)
distance_matrix
,
num_valid_rows
=
num_valid_rows
)
match_results
=
tf
.
reshape
(
match_results
,
[
-
1
])
match_results
=
tf
.
reshape
(
match_results
,
[
-
1
])
match_results
=
tf
.
cast
(
match_results
,
tf
.
int32
)
match_results
=
tf
.
cast
(
match_results
,
tf
.
int32
)
return
match_results
return
match_results
research/object_detection/matchers/bipartite_matcher_test.py
View file @
27b4acd4
...
@@ -24,44 +24,54 @@ class GreedyBipartiteMatcherTest(tf.test.TestCase):
...
@@ -24,44 +24,54 @@ class GreedyBipartiteMatcherTest(tf.test.TestCase):
def
test_get_expected_matches_when_all_rows_are_valid
(
self
):
def
test_get_expected_matches_when_all_rows_are_valid
(
self
):
similarity_matrix
=
tf
.
constant
([[
0.50
,
0.1
,
0.8
],
[
0.15
,
0.2
,
0.3
]])
similarity_matrix
=
tf
.
constant
([[
0.50
,
0.1
,
0.8
],
[
0.15
,
0.2
,
0.3
]])
num_
valid_rows
=
2
valid_rows
=
tf
.
ones
([
2
],
dtype
=
tf
.
bool
)
expected_match_results
=
[
-
1
,
1
,
0
]
expected_match_results
=
[
-
1
,
1
,
0
]
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
match
=
matcher
.
match
(
similarity_matrix
,
num_
valid_rows
=
num_
valid_rows
)
match
=
matcher
.
match
(
similarity_matrix
,
valid_rows
=
valid_rows
)
with
self
.
test_session
()
as
sess
:
with
self
.
test_session
()
as
sess
:
match_results_out
=
sess
.
run
(
match
.
_match_results
)
match_results_out
=
sess
.
run
(
match
.
_match_results
)
self
.
assertAllEqual
(
match_results_out
,
expected_match_results
)
self
.
assertAllEqual
(
match_results_out
,
expected_match_results
)
def
test_get_expected_matches_with_
v
al
id
_rows_
set_to_minus_one
(
self
):
def
test_get_expected_matches_with_al
l
_rows_
be_default
(
self
):
similarity_matrix
=
tf
.
constant
([[
0.50
,
0.1
,
0.8
],
[
0.15
,
0.2
,
0.3
]])
similarity_matrix
=
tf
.
constant
([[
0.50
,
0.1
,
0.8
],
[
0.15
,
0.2
,
0.3
]])
num_valid_rows
=
-
1
expected_match_results
=
[
-
1
,
1
,
0
]
expected_match_results
=
[
-
1
,
1
,
0
]
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
match
=
matcher
.
match
(
similarity_matrix
,
num_valid_rows
=
num_valid_rows
)
match
=
matcher
.
match
(
similarity_matrix
)
with
self
.
test_session
()
as
sess
:
with
self
.
test_session
()
as
sess
:
match_results_out
=
sess
.
run
(
match
.
_match_results
)
match_results_out
=
sess
.
run
(
match
.
_match_results
)
self
.
assertAllEqual
(
match_results_out
,
expected_match_results
)
self
.
assertAllEqual
(
match_results_out
,
expected_match_results
)
def
test_get_no_matches_with_zero_valid_rows
(
self
):
def
test_get_no_matches_with_zero_valid_rows
(
self
):
similarity_matrix
=
tf
.
constant
([[
0.50
,
0.1
,
0.8
],
[
0.15
,
0.2
,
0.3
]])
similarity_matrix
=
tf
.
constant
([[
0.50
,
0.1
,
0.8
],
[
0.15
,
0.2
,
0.3
]])
num_
valid_rows
=
0
valid_rows
=
tf
.
zeros
([
2
],
dtype
=
tf
.
bool
)
expected_match_results
=
[
-
1
,
-
1
,
-
1
]
expected_match_results
=
[
-
1
,
-
1
,
-
1
]
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
match
=
matcher
.
match
(
similarity_matrix
,
num_valid_rows
=
num_
valid_rows
)
match
=
matcher
.
match
(
similarity_matrix
,
valid_rows
)
with
self
.
test_session
()
as
sess
:
with
self
.
test_session
()
as
sess
:
match_results_out
=
sess
.
run
(
match
.
_match_results
)
match_results_out
=
sess
.
run
(
match
.
_match_results
)
self
.
assertAllEqual
(
match_results_out
,
expected_match_results
)
self
.
assertAllEqual
(
match_results_out
,
expected_match_results
)
def
test_get_expected_matches_with_only_one_valid_row
(
self
):
def
test_get_expected_matches_with_only_one_valid_row
(
self
):
similarity_matrix
=
tf
.
constant
([[
0.50
,
0.1
,
0.8
],
[
0.15
,
0.2
,
0.3
]])
similarity_matrix
=
tf
.
constant
([[
0.50
,
0.1
,
0.8
],
[
0.15
,
0.2
,
0.3
]])
num_
valid_rows
=
1
valid_rows
=
tf
.
constant
([
True
,
False
],
dtype
=
tf
.
bool
)
expected_match_results
=
[
-
1
,
-
1
,
0
]
expected_match_results
=
[
-
1
,
-
1
,
0
]
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
match
=
matcher
.
match
(
similarity_matrix
,
num_valid_rows
=
num_valid_rows
)
match
=
matcher
.
match
(
similarity_matrix
,
valid_rows
)
with
self
.
test_session
()
as
sess
:
match_results_out
=
sess
.
run
(
match
.
_match_results
)
self
.
assertAllEqual
(
match_results_out
,
expected_match_results
)
def
test_get_expected_matches_with_only_one_valid_row_at_bottom
(
self
):
similarity_matrix
=
tf
.
constant
([[
0.15
,
0.2
,
0.3
],
[
0.50
,
0.1
,
0.8
]])
valid_rows
=
tf
.
constant
([
False
,
True
],
dtype
=
tf
.
bool
)
expected_match_results
=
[
-
1
,
-
1
,
0
]
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
match
=
matcher
.
match
(
similarity_matrix
,
valid_rows
)
with
self
.
test_session
()
as
sess
:
with
self
.
test_session
()
as
sess
:
match_results_out
=
sess
.
run
(
match
.
_match_results
)
match_results_out
=
sess
.
run
(
match
.
_match_results
)
self
.
assertAllEqual
(
match_results_out
,
expected_match_results
)
self
.
assertAllEqual
(
match_results_out
,
expected_match_results
)
...
...
research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
View file @
27b4acd4
...
@@ -103,7 +103,6 @@ from object_detection.core import box_list_ops
...
@@ -103,7 +103,6 @@ from object_detection.core import box_list_ops
from
object_detection.core
import
box_predictor
from
object_detection.core
import
box_predictor
from
object_detection.core
import
losses
from
object_detection.core
import
losses
from
object_detection.core
import
model
from
object_detection.core
import
model
from
object_detection.core
import
post_processing
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.core
import
target_assigner
from
object_detection.core
import
target_assigner
from
object_detection.utils
import
ops
from
object_detection.utils
import
ops
...
@@ -234,11 +233,11 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -234,11 +233,11 @@ class FasterRCNNMetaArch(model.DetectionModel):
first_stage_box_predictor_depth
,
first_stage_box_predictor_depth
,
first_stage_minibatch_size
,
first_stage_minibatch_size
,
first_stage_sampler
,
first_stage_sampler
,
first_stage_nms_score_threshold
,
first_stage_non_max_suppression_fn
,
first_stage_nms_iou_threshold
,
first_stage_max_proposals
,
first_stage_max_proposals
,
first_stage_localization_loss_weight
,
first_stage_localization_loss_weight
,
first_stage_objectness_loss_weight
,
first_stage_objectness_loss_weight
,
crop_and_resize_fn
,
initial_crop_size
,
initial_crop_size
,
maxpool_kernel_size
,
maxpool_kernel_size
,
maxpool_stride
,
maxpool_stride
,
...
@@ -255,8 +254,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -255,8 +254,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
hard_example_miner
=
None
,
hard_example_miner
=
None
,
parallel_iterations
=
16
,
parallel_iterations
=
16
,
add_summaries
=
True
,
add_summaries
=
True
,
use_matmul_crop_and_resize
=
False
,
clip_anchors_to_image
=
False
,
clip_anchors_to_image
=
False
):
use_static_shapes
=
False
,
resize_masks
=
True
):
"""FasterRCNNMetaArch Constructor.
"""FasterRCNNMetaArch Constructor.
Args:
Args:
...
@@ -309,18 +309,22 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -309,18 +309,22 @@ class FasterRCNNMetaArch(model.DetectionModel):
to the loss function for any given image within the image batch and is
to the loss function for any given image within the image batch and is
only called "batch_size" due to terminology from the Faster R-CNN paper.
only called "batch_size" due to terminology from the Faster R-CNN paper.
first_stage_sampler: Sampler to use for first stage loss (RPN loss).
first_stage_sampler: Sampler to use for first stage loss (RPN loss).
first_stage_nms_score_threshold: Score threshold for non max suppression
first_stage_non_max_suppression_fn: batch_multiclass_non_max_suppression
for the Region Proposal Network (RPN). This value is expected to be in
callable that takes `boxes`, `scores` and optional `clip_window`(with
[0, 1] as it is applied directly after a softmax transformation. The
all other inputs already set) and returns a dictionary containing
recommended value for Faster R-CNN is 0.
tensors with keys: `detection_boxes`, `detection_scores`,
first_stage_nms_iou_threshold: The Intersection Over Union (IOU) threshold
`detection_classes`, `num_detections`. This is used to perform non max
for performing Non-Max Suppression (NMS) on the boxes predicted by the
suppression on the boxes predicted by the Region Proposal Network
Region Proposal Network (RPN).
(RPN).
See `post_processing.batch_multiclass_non_max_suppression` for the type
and shape of these tensors.
first_stage_max_proposals: Maximum number of boxes to retain after
first_stage_max_proposals: Maximum number of boxes to retain after
performing Non-Max Suppression (NMS) on the boxes predicted by the
performing Non-Max Suppression (NMS) on the boxes predicted by the
Region Proposal Network (RPN).
Region Proposal Network (RPN).
first_stage_localization_loss_weight: A float
first_stage_localization_loss_weight: A float
first_stage_objectness_loss_weight: A float
first_stage_objectness_loss_weight: A float
crop_and_resize_fn: A differentiable resampler to use for cropping RPN
proposal features.
initial_crop_size: A single integer indicating the output size
initial_crop_size: A single integer indicating the output size
(width and height are set to be the same) of the initial bilinear
(width and height are set to be the same) of the initial bilinear
interpolation based cropping during ROI pooling.
interpolation based cropping during ROI pooling.
...
@@ -367,12 +371,13 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -367,12 +371,13 @@ class FasterRCNNMetaArch(model.DetectionModel):
in parallel for calls to tf.map_fn.
in parallel for calls to tf.map_fn.
add_summaries: boolean (default: True) controlling whether summary ops
add_summaries: boolean (default: True) controlling whether summary ops
should be added to tensorflow graph.
should be added to tensorflow graph.
use_matmul_crop_and_resize: Force the use of matrix multiplication based
crop and resize instead of standard tf.image.crop_and_resize while
computing second stage input feature maps.
clip_anchors_to_image: Normally, anchors generated for a given image size
clip_anchors_to_image: Normally, anchors generated for a given image size
are pruned during training if they lie outside the image window. This
are pruned during training if they lie outside the image window. This
option clips the anchors to be within the image instead of pruning.
option clips the anchors to be within the image instead of pruning.
use_static_shapes: If True, uses implementation of ops with static shape
guarantees.
resize_masks: Indicates whether the masks presend in the groundtruth
should be resized in the model with `image_resizer_fn`
Raises:
Raises:
ValueError: If `second_stage_batch_size` > `first_stage_max_proposals` at
ValueError: If `second_stage_batch_size` > `first_stage_max_proposals` at
...
@@ -384,9 +389,6 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -384,9 +389,6 @@ class FasterRCNNMetaArch(model.DetectionModel):
# in the future.
# in the future.
super
(
FasterRCNNMetaArch
,
self
).
__init__
(
num_classes
=
num_classes
)
super
(
FasterRCNNMetaArch
,
self
).
__init__
(
num_classes
=
num_classes
)
if
is_training
and
second_stage_batch_size
>
first_stage_max_proposals
:
raise
ValueError
(
'second_stage_batch_size should be no greater than '
'first_stage_max_proposals.'
)
if
not
isinstance
(
first_stage_anchor_generator
,
if
not
isinstance
(
first_stage_anchor_generator
,
grid_anchor_generator
.
GridAnchorGenerator
):
grid_anchor_generator
.
GridAnchorGenerator
):
raise
ValueError
(
'first_stage_anchor_generator must be of type '
raise
ValueError
(
'first_stage_anchor_generator must be of type '
...
@@ -394,6 +396,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -394,6 +396,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
self
.
_is_training
=
is_training
self
.
_is_training
=
is_training
self
.
_image_resizer_fn
=
image_resizer_fn
self
.
_image_resizer_fn
=
image_resizer_fn
self
.
_resize_masks
=
resize_masks
self
.
_feature_extractor
=
feature_extractor
self
.
_feature_extractor
=
feature_extractor
self
.
_number_of_stages
=
number_of_stages
self
.
_number_of_stages
=
number_of_stages
...
@@ -425,9 +428,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -425,9 +428,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
min_depth
=
0
,
min_depth
=
0
,
max_depth
=
0
))
max_depth
=
0
))
self
.
_first_stage_nms_score_threshold
=
first_stage_nms_score_threshold
self
.
_first_stage_nms_fn
=
first_stage_non_max_suppression_fn
self
.
_first_stage_nms_iou_threshold
=
first_stage_nms_iou_threshold
self
.
_first_stage_max_proposals
=
first_stage_max_proposals
self
.
_first_stage_max_proposals
=
first_stage_max_proposals
self
.
_use_static_shapes
=
use_static_shapes
self
.
_first_stage_localization_loss
=
(
self
.
_first_stage_localization_loss
=
(
losses
.
WeightedSmoothL1LocalizationLoss
())
losses
.
WeightedSmoothL1LocalizationLoss
())
...
@@ -437,6 +440,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -437,6 +440,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
self
.
_first_stage_obj_loss_weight
=
first_stage_objectness_loss_weight
self
.
_first_stage_obj_loss_weight
=
first_stage_objectness_loss_weight
# Per-region cropping parameters
# Per-region cropping parameters
self
.
_crop_and_resize_fn
=
crop_and_resize_fn
self
.
_initial_crop_size
=
initial_crop_size
self
.
_initial_crop_size
=
initial_crop_size
self
.
_maxpool_kernel_size
=
maxpool_kernel_size
self
.
_maxpool_kernel_size
=
maxpool_kernel_size
self
.
_maxpool_stride
=
maxpool_stride
self
.
_maxpool_stride
=
maxpool_stride
...
@@ -458,7 +462,6 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -458,7 +462,6 @@ class FasterRCNNMetaArch(model.DetectionModel):
self
.
_second_stage_cls_loss_weight
=
second_stage_classification_loss_weight
self
.
_second_stage_cls_loss_weight
=
second_stage_classification_loss_weight
self
.
_second_stage_mask_loss_weight
=
(
self
.
_second_stage_mask_loss_weight
=
(
second_stage_mask_prediction_loss_weight
)
second_stage_mask_prediction_loss_weight
)
self
.
_use_matmul_crop_and_resize
=
use_matmul_crop_and_resize
self
.
_hard_example_miner
=
hard_example_miner
self
.
_hard_example_miner
=
hard_example_miner
self
.
_parallel_iterations
=
parallel_iterations
self
.
_parallel_iterations
=
parallel_iterations
...
@@ -673,9 +676,13 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -673,9 +676,13 @@ class FasterRCNNMetaArch(model.DetectionModel):
}
}
if
self
.
_number_of_stages
>=
2
:
if
self
.
_number_of_stages
>=
2
:
# If mixed-precision training on TPU is enabled, rpn_box_encodings and
# rpn_objectness_predictions_with_background are bfloat16 tensors.
# Considered prediction results, they need to be casted to float32
# tensors for correct postprocess_rpn computation in predict_second_stage.
prediction_dict
.
update
(
self
.
_predict_second_stage
(
prediction_dict
.
update
(
self
.
_predict_second_stage
(
rpn_box_encodings
,
tf
.
to_float
(
rpn_box_encodings
)
,
rpn_objectness_predictions_with_background
,
tf
.
to_float
(
rpn_objectness_predictions_with_background
)
,
rpn_features_to_crop
,
rpn_features_to_crop
,
self
.
_anchors
.
get
(),
image_shape
,
true_image_shapes
))
self
.
_anchors
.
get
(),
image_shape
,
true_image_shapes
))
...
@@ -719,7 +726,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -719,7 +726,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
[batch_size, num_valid_anchors, 2] containing class
[batch_size, num_valid_anchors, 2] containing class
predictions (logits) for each of the anchors. Note that this
predictions (logits) for each of the anchors. Note that this
tensor *includes* background class predictions (at class index 0).
tensor *includes* background class predictions (at class index 0).
rpn_features_to_crop: A 4-D float32 tensor with shape
rpn_features_to_crop: A 4-D float32
or bfloat16
tensor with shape
[batch_size, height, width, depth] representing image features to crop
[batch_size, height, width, depth] representing image features to crop
using the proposal boxes predicted by the RPN.
using the proposal boxes predicted by the RPN.
anchors: 2-D float tensor of shape
anchors: 2-D float tensor of shape
...
@@ -758,17 +765,22 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -758,17 +765,22 @@ class FasterRCNNMetaArch(model.DetectionModel):
boxes proposed by the RPN, thus enabling one to extract features and
boxes proposed by the RPN, thus enabling one to extract features and
get box classification and prediction for externally selected areas
get box classification and prediction for externally selected areas
of the image.
of the image.
6) box_classifier_features: a 4-D float32
tensor representing the
6) box_classifier_features: a 4-D float32
or bfloat16 tensor
features for each proposal.
representing the
features for each proposal.
"""
"""
image_shape_2d
=
self
.
_image_batch_shape_2d
(
image_shape
)
image_shape_2d
=
self
.
_image_batch_shape_2d
(
image_shape
)
proposal_boxes_normalized
,
_
,
num_proposals
=
self
.
_postprocess_rpn
(
proposal_boxes_normalized
,
_
,
num_proposals
=
self
.
_postprocess_rpn
(
rpn_box_encodings
,
rpn_objectness_predictions_with_background
,
rpn_box_encodings
,
rpn_objectness_predictions_with_background
,
anchors
,
image_shape_2d
,
true_image_shapes
)
anchors
,
image_shape_2d
,
true_image_shapes
)
# If mixed-precision training on TPU is enabled, the dtype of
# rpn_features_to_crop is bfloat16, otherwise it is float32. tf.cast is
# used to match the dtype of proposal_boxes_normalized to that of
# rpn_features_to_crop for further computation.
flattened_proposal_feature_maps
=
(
flattened_proposal_feature_maps
=
(
self
.
_compute_second_stage_input_feature_maps
(
self
.
_compute_second_stage_input_feature_maps
(
rpn_features_to_crop
,
proposal_boxes_normalized
))
rpn_features_to_crop
,
tf
.
cast
(
proposal_boxes_normalized
,
rpn_features_to_crop
.
dtype
)))
box_classifier_features
=
(
box_classifier_features
=
(
self
.
_feature_extractor
.
extract_box_classifier_features
(
self
.
_feature_extractor
.
extract_box_classifier_features
(
...
@@ -956,8 +968,11 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -956,8 +968,11 @@ class FasterRCNNMetaArch(model.DetectionModel):
image_shape: A 1-D tensor representing the input image shape.
image_shape: A 1-D tensor representing the input image shape.
"""
"""
image_shape
=
tf
.
shape
(
preprocessed_inputs
)
image_shape
=
tf
.
shape
(
preprocessed_inputs
)
rpn_features_to_crop
,
_
=
self
.
_feature_extractor
.
extract_proposal_features
(
preprocessed_inputs
,
scope
=
self
.
first_stage_feature_extractor_scope
)
rpn_features_to_crop
,
self
.
endpoints
=
(
self
.
_feature_extractor
.
extract_proposal_features
(
preprocessed_inputs
,
scope
=
self
.
first_stage_feature_extractor_scope
))
feature_map_shape
=
tf
.
shape
(
rpn_features_to_crop
)
feature_map_shape
=
tf
.
shape
(
rpn_features_to_crop
)
anchors
=
box_list_ops
.
concatenate
(
anchors
=
box_list_ops
.
concatenate
(
...
@@ -965,12 +980,15 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -965,12 +980,15 @@ class FasterRCNNMetaArch(model.DetectionModel):
feature_map_shape
[
2
])]))
feature_map_shape
[
2
])]))
with
slim
.
arg_scope
(
self
.
_first_stage_box_predictor_arg_scope_fn
()):
with
slim
.
arg_scope
(
self
.
_first_stage_box_predictor_arg_scope_fn
()):
kernel_size
=
self
.
_first_stage_box_predictor_kernel_size
kernel_size
=
self
.
_first_stage_box_predictor_kernel_size
reuse
=
tf
.
get_variable_scope
().
reuse
rpn_box_predictor_features
=
slim
.
conv2d
(
rpn_box_predictor_features
=
slim
.
conv2d
(
rpn_features_to_crop
,
rpn_features_to_crop
,
self
.
_first_stage_box_predictor_depth
,
self
.
_first_stage_box_predictor_depth
,
kernel_size
=
[
kernel_size
,
kernel_size
],
kernel_size
=
[
kernel_size
,
kernel_size
],
rate
=
self
.
_first_stage_atrous_rate
,
rate
=
self
.
_first_stage_atrous_rate
,
activation_fn
=
tf
.
nn
.
relu6
)
activation_fn
=
tf
.
nn
.
relu6
,
scope
=
'Conv'
,
reuse
=
reuse
)
return
(
rpn_box_predictor_features
,
rpn_features_to_crop
,
return
(
rpn_box_predictor_features
,
rpn_features_to_crop
,
anchors
,
image_shape
)
anchors
,
image_shape
)
...
@@ -1223,14 +1241,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -1223,14 +1241,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
rpn_objectness_predictions_with_background_batch
)[:,
:,
1
]
rpn_objectness_predictions_with_background_batch
)[:,
:,
1
]
clip_window
=
self
.
_compute_clip_window
(
image_shapes
)
clip_window
=
self
.
_compute_clip_window
(
image_shapes
)
(
proposal_boxes
,
proposal_scores
,
_
,
_
,
_
,
(
proposal_boxes
,
proposal_scores
,
_
,
_
,
_
,
num_proposals
)
=
post_processing
.
batch_multiclass_non_max_suppressio
n
(
num_proposals
)
=
self
.
_first_stage_nms_f
n
(
tf
.
expand_dims
(
proposal_boxes
,
axis
=
2
),
tf
.
expand_dims
(
proposal_boxes
,
axis
=
2
),
tf
.
expand_dims
(
rpn_objectness_softmax_without_background
,
tf
.
expand_dims
(
rpn_objectness_softmax_without_background
,
axis
=
2
),
axis
=
2
),
self
.
_first_stage_nms_score_threshold
,
self
.
_first_stage_nms_iou_threshold
,
self
.
_first_stage_max_proposals
,
self
.
_first_stage_max_proposals
,
clip_window
=
clip_window
)
clip_window
=
clip_window
)
if
self
.
_is_training
:
if
self
.
_is_training
:
proposal_boxes
=
tf
.
stop_gradient
(
proposal_boxes
)
proposal_boxes
=
tf
.
stop_gradient
(
proposal_boxes
)
...
@@ -1377,16 +1390,19 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -1377,16 +1390,19 @@ class FasterRCNNMetaArch(model.DetectionModel):
groundtruth_masks_list
=
self
.
_groundtruth_lists
.
get
(
groundtruth_masks_list
=
self
.
_groundtruth_lists
.
get
(
fields
.
BoxListFields
.
masks
)
fields
.
BoxListFields
.
masks
)
if
groundtruth_masks_list
is
not
None
:
# TODO(rathodv): Remove mask resizing once the legacy pipeline is deleted.
if
groundtruth_masks_list
is
not
None
and
self
.
_resize_masks
:
resized_masks_list
=
[]
resized_masks_list
=
[]
for
mask
in
groundtruth_masks_list
:
for
mask
in
groundtruth_masks_list
:
_
,
resized_mask
,
_
=
self
.
_image_resizer_fn
(
_
,
resized_mask
,
_
=
self
.
_image_resizer_fn
(
# Reuse the given `image_resizer_fn` to resize groundtruth masks.
# Reuse the given `image_resizer_fn` to resize groundtruth masks.
# `mask` tensor for an image is of the shape [num_masks,
# `mask` tensor for an image is of the shape [num_masks,
# image_height, image_width]. Below we create a dummy image of the
# image_height, image_width]. Below we create a dummy image of the
# the shape [image_height, image_width, 1] to use with
# the shape [image_height, image_width, 1] to use with
# `image_resizer_fn`.
# `image_resizer_fn`.
image
=
tf
.
zeros
(
tf
.
stack
([
tf
.
shape
(
mask
)[
1
],
tf
.
shape
(
mask
)[
2
],
1
])),
image
=
tf
.
zeros
(
tf
.
stack
([
tf
.
shape
(
mask
)[
1
],
tf
.
shape
(
mask
)[
2
],
1
])),
masks
=
mask
)
masks
=
mask
)
resized_masks_list
.
append
(
resized_mask
)
resized_masks_list
.
append
(
resized_mask
)
...
@@ -1443,11 +1459,16 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -1443,11 +1459,16 @@ class FasterRCNNMetaArch(model.DetectionModel):
tf
.
range
(
proposal_boxlist
.
num_boxes
())
<
num_valid_proposals
,
tf
.
range
(
proposal_boxlist
.
num_boxes
())
<
num_valid_proposals
,
cls_weights
>
0
cls_weights
>
0
)
)
s
ampled_indice
s
=
self
.
_second_stage_sampler
.
subsample
(
s
elected_position
s
=
self
.
_second_stage_sampler
.
subsample
(
valid_indicator
,
valid_indicator
,
self
.
_second_stage_batch_size
,
self
.
_second_stage_batch_size
,
positive_indicator
)
positive_indicator
)
return
box_list_ops
.
boolean_mask
(
proposal_boxlist
,
sampled_indices
)
return
box_list_ops
.
boolean_mask
(
proposal_boxlist
,
selected_positions
,
use_static_shapes
=
self
.
_use_static_shapes
,
indicator_sum
=
(
self
.
_second_stage_batch_size
if
self
.
_use_static_shapes
else
None
))
def
_compute_second_stage_input_feature_maps
(
self
,
features_to_crop
,
def
_compute_second_stage_input_feature_maps
(
self
,
features_to_crop
,
proposal_boxes_normalized
):
proposal_boxes_normalized
):
...
@@ -1467,35 +1488,10 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -1467,35 +1488,10 @@ class FasterRCNNMetaArch(model.DetectionModel):
Returns:
Returns:
A float32 tensor with shape [K, new_height, new_width, depth].
A float32 tensor with shape [K, new_height, new_width, depth].
"""
"""
def
get_box_inds
(
proposals
):
cropped_regions
=
self
.
_flatten_first_two_dimensions
(
proposals_shape
=
proposals
.
get_shape
().
as_list
()
self
.
_crop_and_resize_fn
(
if
any
(
dim
is
None
for
dim
in
proposals_shape
):
features_to_crop
,
proposal_boxes_normalized
,
proposals_shape
=
tf
.
shape
(
proposals
)
[
self
.
_initial_crop_size
,
self
.
_initial_crop_size
]))
ones_mat
=
tf
.
ones
(
proposals_shape
[:
2
],
dtype
=
tf
.
int32
)
multiplier
=
tf
.
expand_dims
(
tf
.
range
(
start
=
0
,
limit
=
proposals_shape
[
0
]),
1
)
return
tf
.
reshape
(
ones_mat
*
multiplier
,
[
-
1
])
if
self
.
_use_matmul_crop_and_resize
:
def
_single_image_crop_and_resize
(
inputs
):
single_image_features_to_crop
,
proposal_boxes_normalized
=
inputs
return
ops
.
matmul_crop_and_resize
(
tf
.
expand_dims
(
single_image_features_to_crop
,
0
),
proposal_boxes_normalized
,
[
self
.
_initial_crop_size
,
self
.
_initial_crop_size
])
cropped_regions
=
self
.
_flatten_first_two_dimensions
(
shape_utils
.
static_or_dynamic_map_fn
(
_single_image_crop_and_resize
,
elems
=
[
features_to_crop
,
proposal_boxes_normalized
],
dtype
=
tf
.
float32
,
parallel_iterations
=
self
.
_parallel_iterations
))
else
:
cropped_regions
=
tf
.
image
.
crop_and_resize
(
features_to_crop
,
self
.
_flatten_first_two_dimensions
(
proposal_boxes_normalized
),
get_box_inds
(
proposal_boxes_normalized
),
(
self
.
_initial_crop_size
,
self
.
_initial_crop_size
))
return
slim
.
max_pool2d
(
return
slim
.
max_pool2d
(
cropped_regions
,
cropped_regions
,
[
self
.
_maxpool_kernel_size
,
self
.
_maxpool_kernel_size
],
[
self
.
_maxpool_kernel_size
,
self
.
_maxpool_kernel_size
],
...
@@ -1738,11 +1734,17 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -1738,11 +1734,17 @@ class FasterRCNNMetaArch(model.DetectionModel):
sampled_reg_indices
=
tf
.
multiply
(
batch_sampled_indices
,
sampled_reg_indices
=
tf
.
multiply
(
batch_sampled_indices
,
batch_reg_weights
)
batch_reg_weights
)
losses_mask
=
None
if
self
.
groundtruth_has_field
(
fields
.
InputDataFields
.
is_annotated
):
losses_mask
=
tf
.
stack
(
self
.
groundtruth_lists
(
fields
.
InputDataFields
.
is_annotated
))
localization_losses
=
self
.
_first_stage_localization_loss
(
localization_losses
=
self
.
_first_stage_localization_loss
(
rpn_box_encodings
,
batch_reg_targets
,
weights
=
sampled_reg_indices
)
rpn_box_encodings
,
batch_reg_targets
,
weights
=
sampled_reg_indices
,
losses_mask
=
losses_mask
)
objectness_losses
=
self
.
_first_stage_objectness_loss
(
objectness_losses
=
self
.
_first_stage_objectness_loss
(
rpn_objectness_predictions_with_background
,
rpn_objectness_predictions_with_background
,
batch_one_hot_targets
,
weights
=
batch_sampled_indices
)
batch_one_hot_targets
,
weights
=
batch_sampled_indices
,
losses_mask
=
losses_mask
)
localization_loss
=
tf
.
reduce_mean
(
localization_loss
=
tf
.
reduce_mean
(
tf
.
reduce_sum
(
localization_losses
,
axis
=
1
)
/
normalizer
)
tf
.
reduce_sum
(
localization_losses
,
axis
=
1
)
/
normalizer
)
objectness_loss
=
tf
.
reduce_mean
(
objectness_loss
=
tf
.
reduce_mean
(
...
@@ -1866,32 +1868,32 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -1866,32 +1868,32 @@ class FasterRCNNMetaArch(model.DetectionModel):
# for just one class to avoid over-counting for regression loss and
# for just one class to avoid over-counting for regression loss and
# (optionally) mask loss.
# (optionally) mask loss.
else
:
else
:
# We only predict refined location encodings for the non background
reshaped_refined_box_encodings
=
(
# classes, but we now pad it to make it compatible with the class
self
.
_get_refined_encodings_for_postitive_class
(
# predictions
refined_box_encodings
,
refined_box_encodings_with_background
=
tf
.
pad
(
one_hot_flat_cls_targets_with_background
,
batch_size
))
refined_box_encodings
,
[[
0
,
0
],
[
1
,
0
],
[
0
,
0
]])
refined_box_encodings_masked_by_class_targets
=
tf
.
boolean_mask
(
losses_mask
=
None
refined_box_encodings_with_background
,
if
self
.
groundtruth_has_field
(
fields
.
InputDataFields
.
is_annotated
):
tf
.
greater
(
one_hot_flat_cls_targets_with_background
,
0
))
losses_mask
=
tf
.
stack
(
self
.
groundtruth_lists
(
reshaped_refined_box_encodings
=
tf
.
reshape
(
fields
.
InputDataFields
.
is_annotated
))
refined_box_encodings_masked_by_class_targets
,
[
batch_size
,
self
.
max_num_proposals
,
self
.
_box_coder
.
code_size
])
second_stage_loc_losses
=
self
.
_second_stage_localization_loss
(
second_stage_loc_losses
=
self
.
_second_stage_localization_loss
(
reshaped_refined_box_encodings
,
reshaped_refined_box_encodings
,
batch_reg_targets
,
weights
=
batch_reg_weights
)
/
normalizer
batch_reg_targets
,
weights
=
batch_reg_weights
,
losses_mask
=
losses_mask
)
/
normalizer
second_stage_cls_losses
=
ops
.
reduce_sum_trailing_dimensions
(
second_stage_cls_losses
=
ops
.
reduce_sum_trailing_dimensions
(
self
.
_second_stage_classification_loss
(
self
.
_second_stage_classification_loss
(
class_predictions_with_background
,
class_predictions_with_background
,
batch_cls_targets_with_background
,
batch_cls_targets_with_background
,
weights
=
batch_cls_weights
),
weights
=
batch_cls_weights
,
losses_mask
=
losses_mask
),
ndims
=
2
)
/
normalizer
ndims
=
2
)
/
normalizer
second_stage_loc_loss
=
tf
.
reduce_sum
(
second_stage_loc_loss
=
tf
.
reduce_sum
(
tf
.
boolean_mask
(
second_stage_loc_losses
,
paddings_indicator
))
second_stage_loc_losses
*
tf
.
to_float
(
paddings_indicator
))
second_stage_cls_loss
=
tf
.
reduce_sum
(
second_stage_cls_loss
=
tf
.
reduce_sum
(
tf
.
boolean_mask
(
second_stage_cls_losses
,
paddings_indicator
))
second_stage_cls_losses
*
tf
.
to_float
(
paddings_indicator
))
if
self
.
_hard_example_miner
:
if
self
.
_hard_example_miner
:
(
second_stage_loc_loss
,
second_stage_cls_loss
(
second_stage_loc_loss
,
second_stage_cls_loss
...
@@ -1954,10 +1956,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -1954,10 +1956,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
box_list
.
BoxList
(
tf
.
reshape
(
proposal_boxes
,
[
-
1
,
4
])),
box_list
.
BoxList
(
tf
.
reshape
(
proposal_boxes
,
[
-
1
,
4
])),
image_shape
[
1
],
image_shape
[
2
]).
get
()
image_shape
[
1
],
image_shape
[
2
]).
get
()
flat_cropped_gt_mask
=
tf
.
image
.
crop_and_resize
(
flat_cropped_gt_mask
=
self
.
_
crop_and_resize
_fn
(
tf
.
expand_dims
(
flat_gt_masks
,
-
1
),
tf
.
expand_dims
(
flat_gt_masks
,
-
1
),
flat_normalized_proposals
,
tf
.
expand_dims
(
flat_normalized_proposals
,
axis
=
1
),
tf
.
range
(
flat_normalized_proposals
.
shape
[
0
].
value
),
[
mask_height
,
mask_width
])
[
mask_height
,
mask_width
])
batch_cropped_gt_mask
=
tf
.
reshape
(
batch_cropped_gt_mask
=
tf
.
reshape
(
...
@@ -1968,14 +1969,16 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -1968,14 +1969,16 @@ class FasterRCNNMetaArch(model.DetectionModel):
self
.
_second_stage_mask_loss
(
self
.
_second_stage_mask_loss
(
reshaped_prediction_masks
,
reshaped_prediction_masks
,
batch_cropped_gt_mask
,
batch_cropped_gt_mask
,
weights
=
batch_mask_target_weights
),
weights
=
batch_mask_target_weights
,
losses_mask
=
losses_mask
),
ndims
=
2
)
/
(
ndims
=
2
)
/
(
mask_height
*
mask_width
*
tf
.
maximum
(
mask_height
*
mask_width
*
tf
.
maximum
(
tf
.
reduce_sum
(
tf
.
reduce_sum
(
batch_mask_target_weights
,
axis
=
1
,
keep_dims
=
True
batch_mask_target_weights
,
axis
=
1
,
keep_dims
=
True
),
tf
.
ones
((
batch_size
,
1
))))
),
tf
.
ones
((
batch_size
,
1
))))
second_stage_mask_loss
=
tf
.
reduce_sum
(
second_stage_mask_loss
=
tf
.
reduce_sum
(
tf
.
boolean_mask
(
second_stage_mask_losses
,
paddings_indicator
))
tf
.
where
(
paddings_indicator
,
second_stage_mask_losses
,
tf
.
zeros_like
(
second_stage_mask_losses
)))
if
second_stage_mask_loss
is
not
None
:
if
second_stage_mask_loss
is
not
None
:
mask_loss
=
tf
.
multiply
(
self
.
_second_stage_mask_loss_weight
,
mask_loss
=
tf
.
multiply
(
self
.
_second_stage_mask_loss_weight
,
...
@@ -1983,6 +1986,29 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -1983,6 +1986,29 @@ class FasterRCNNMetaArch(model.DetectionModel):
loss_dict
[
mask_loss
.
op
.
name
]
=
mask_loss
loss_dict
[
mask_loss
.
op
.
name
]
=
mask_loss
return
loss_dict
return
loss_dict
def
_get_refined_encodings_for_postitive_class
(
self
,
refined_box_encodings
,
flat_cls_targets_with_background
,
batch_size
):
# We only predict refined location encodings for the non background
# classes, but we now pad it to make it compatible with the class
# predictions
refined_box_encodings_with_background
=
tf
.
pad
(
refined_box_encodings
,
[[
0
,
0
],
[
1
,
0
],
[
0
,
0
]])
refined_box_encodings_masked_by_class_targets
=
(
box_list_ops
.
boolean_mask
(
box_list
.
BoxList
(
tf
.
reshape
(
refined_box_encodings_with_background
,
[
-
1
,
self
.
_box_coder
.
code_size
])),
tf
.
reshape
(
tf
.
greater
(
flat_cls_targets_with_background
,
0
),
[
-
1
]),
use_static_shapes
=
self
.
_use_static_shapes
,
indicator_sum
=
batch_size
*
self
.
max_num_proposals
if
self
.
_use_static_shapes
else
None
).
get
())
return
tf
.
reshape
(
refined_box_encodings_masked_by_class_targets
,
[
batch_size
,
self
.
max_num_proposals
,
self
.
_box_coder
.
code_size
])
def
_padded_batched_proposals_indicator
(
self
,
def
_padded_batched_proposals_indicator
(
self
,
num_proposals
,
num_proposals
,
max_num_proposals
):
max_num_proposals
):
...
...
Prev
1
…
5
6
7
8
9
10
11
12
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment