Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
31ca3b97
Commit
31ca3b97
authored
Jul 23, 2020
by
Kaushik Shivakumar
Browse files
resovle merge conflicts
parents
3e9d886d
7fcd7cba
Changes
392
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2390 additions
and
480 deletions
+2390
-480
research/object_detection/core/densepose_ops_test.py
research/object_detection/core/densepose_ops_test.py
+178
-0
research/object_detection/core/model.py
research/object_detection/core/model.py
+83
-18
research/object_detection/core/model_test.py
research/object_detection/core/model_test.py
+3
-0
research/object_detection/core/preprocessor.py
research/object_detection/core/preprocessor.py
+301
-21
research/object_detection/core/preprocessor_test.py
research/object_detection/core/preprocessor_test.py
+198
-56
research/object_detection/core/standard_fields.py
research/object_detection/core/standard_fields.py
+18
-0
research/object_detection/core/target_assigner.py
research/object_detection/core/target_assigner.py
+254
-16
research/object_detection/core/target_assigner_test.py
research/object_detection/core/target_assigner_test.py
+268
-0
research/object_detection/data_decoders/tf_example_decoder.py
...arch/object_detection/data_decoders/tf_example_decoder.py
+131
-1
research/object_detection/data_decoders/tf_example_decoder_test.py
...object_detection/data_decoders/tf_example_decoder_test.py
+91
-6
research/object_detection/dataset_tools/context_rcnn/add_context_to_examples.py
...ion/dataset_tools/context_rcnn/add_context_to_examples.py
+231
-139
research/object_detection/dataset_tools/context_rcnn/add_context_to_examples_tf1_test.py
...et_tools/context_rcnn/add_context_to_examples_tf1_test.py
+20
-8
research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_main.py
...ols/context_rcnn/create_cococameratraps_tfexample_main.py
+75
-55
research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_tf1_test.py
...context_rcnn/create_cococameratraps_tfexample_tf1_test.py
+24
-15
research/object_detection/dataset_tools/context_rcnn/generate_detection_data.py
...ion/dataset_tools/context_rcnn/generate_detection_data.py
+84
-51
research/object_detection/dataset_tools/context_rcnn/generate_detection_data_tf1_test.py
...et_tools/context_rcnn/generate_detection_data_tf1_test.py
+14
-5
research/object_detection/dataset_tools/context_rcnn/generate_embedding_data.py
...ion/dataset_tools/context_rcnn/generate_embedding_data.py
+96
-55
research/object_detection/dataset_tools/context_rcnn/generate_embedding_data_tf1_test.py
...et_tools/context_rcnn/generate_embedding_data_tf1_test.py
+20
-10
research/object_detection/dataset_tools/create_coco_tf_record.py
...h/object_detection/dataset_tools/create_coco_tf_record.py
+165
-14
research/object_detection/dataset_tools/create_coco_tf_record_test.py
...ect_detection/dataset_tools/create_coco_tf_record_test.py
+136
-10
No files found.
research/object_detection/core/densepose_ops_test.py
0 → 100644
View file @
31ca3b97
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.core.densepose_ops."""
import
numpy
as
np
import
tensorflow.compat.v1
as
tf
from
object_detection.core
import
densepose_ops
from
object_detection.utils
import
test_case
class
DensePoseOpsTest
(
test_case
.
TestCase
):
"""Tests for common DensePose operations."""
def
test_scale
(
self
):
def
graph_fn
():
dp_surface_coords
=
tf
.
constant
([
[[
0.0
,
0.0
,
0.1
,
0.2
],
[
100.0
,
200.0
,
0.3
,
0.4
]],
[[
50.0
,
120.0
,
0.5
,
0.6
],
[
100.0
,
140.0
,
0.7
,
0.8
]]
])
y_scale
=
tf
.
constant
(
1.0
/
100
)
x_scale
=
tf
.
constant
(
1.0
/
200
)
output
=
densepose_ops
.
scale
(
dp_surface_coords
,
y_scale
,
x_scale
)
return
output
output
=
self
.
execute
(
graph_fn
,
[])
expected_dp_surface_coords
=
np
.
array
([
[[
0.
,
0.
,
0.1
,
0.2
],
[
1.0
,
1.0
,
0.3
,
0.4
]],
[[
0.5
,
0.6
,
0.5
,
0.6
],
[
1.0
,
0.7
,
0.7
,
0.8
]]
])
self
.
assertAllClose
(
output
,
expected_dp_surface_coords
)
def
test_clip_to_window
(
self
):
def
graph_fn
():
dp_surface_coords
=
tf
.
constant
([
[[
0.25
,
0.5
,
0.1
,
0.2
],
[
0.75
,
0.75
,
0.3
,
0.4
]],
[[
0.5
,
0.0
,
0.5
,
0.6
],
[
1.0
,
1.0
,
0.7
,
0.8
]]
])
window
=
tf
.
constant
([
0.25
,
0.25
,
0.75
,
0.75
])
output
=
densepose_ops
.
clip_to_window
(
dp_surface_coords
,
window
)
return
output
output
=
self
.
execute
(
graph_fn
,
[])
expected_dp_surface_coords
=
np
.
array
([
[[
0.25
,
0.5
,
0.1
,
0.2
],
[
0.75
,
0.75
,
0.3
,
0.4
]],
[[
0.5
,
0.25
,
0.5
,
0.6
],
[
0.75
,
0.75
,
0.7
,
0.8
]]
])
self
.
assertAllClose
(
output
,
expected_dp_surface_coords
)
def
test_prune_outside_window
(
self
):
def
graph_fn
():
dp_num_points
=
tf
.
constant
([
2
,
0
,
1
])
dp_part_ids
=
tf
.
constant
([[
1
,
1
],
[
0
,
0
],
[
16
,
0
]])
dp_surface_coords
=
tf
.
constant
([
[[
0.9
,
0.5
,
0.1
,
0.2
],
[
0.75
,
0.75
,
0.3
,
0.4
]],
[[
0.0
,
0.0
,
0.0
,
0.0
],
[
0.0
,
0.0
,
0.0
,
0.0
]],
[[
0.8
,
0.5
,
0.6
,
0.6
],
[
0.5
,
0.5
,
0.7
,
0.7
]]
])
window
=
tf
.
constant
([
0.25
,
0.25
,
0.75
,
0.75
])
new_dp_num_points
,
new_dp_part_ids
,
new_dp_surface_coords
=
(
densepose_ops
.
prune_outside_window
(
dp_num_points
,
dp_part_ids
,
dp_surface_coords
,
window
))
return
new_dp_num_points
,
new_dp_part_ids
,
new_dp_surface_coords
new_dp_num_points
,
new_dp_part_ids
,
new_dp_surface_coords
=
(
self
.
execute_cpu
(
graph_fn
,
[]))
expected_dp_num_points
=
np
.
array
([
1
,
0
,
0
])
expected_dp_part_ids
=
np
.
array
([[
1
],
[
0
],
[
0
]])
expected_dp_surface_coords
=
np
.
array
([
[[
0.75
,
0.75
,
0.3
,
0.4
]],
[[
0.0
,
0.0
,
0.0
,
0.0
]],
[[
0.0
,
0.0
,
0.0
,
0.0
]]
])
self
.
assertAllEqual
(
new_dp_num_points
,
expected_dp_num_points
)
self
.
assertAllEqual
(
new_dp_part_ids
,
expected_dp_part_ids
)
self
.
assertAllClose
(
new_dp_surface_coords
,
expected_dp_surface_coords
)
def
test_change_coordinate_frame
(
self
):
def
graph_fn
():
dp_surface_coords
=
tf
.
constant
([
[[
0.25
,
0.5
,
0.1
,
0.2
],
[
0.75
,
0.75
,
0.3
,
0.4
]],
[[
0.5
,
0.0
,
0.5
,
0.6
],
[
1.0
,
1.0
,
0.7
,
0.8
]]
])
window
=
tf
.
constant
([
0.25
,
0.25
,
0.75
,
0.75
])
output
=
densepose_ops
.
change_coordinate_frame
(
dp_surface_coords
,
window
)
return
output
output
=
self
.
execute
(
graph_fn
,
[])
expected_dp_surface_coords
=
np
.
array
([
[[
0
,
0.5
,
0.1
,
0.2
],
[
1.0
,
1.0
,
0.3
,
0.4
]],
[[
0.5
,
-
0.5
,
0.5
,
0.6
],
[
1.5
,
1.5
,
0.7
,
0.8
]]
])
self
.
assertAllClose
(
output
,
expected_dp_surface_coords
)
def
test_to_normalized_coordinates
(
self
):
def
graph_fn
():
dp_surface_coords
=
tf
.
constant
([
[[
10.
,
30.
,
0.1
,
0.2
],
[
30.
,
45.
,
0.3
,
0.4
]],
[[
20.
,
0.
,
0.5
,
0.6
],
[
40.
,
60.
,
0.7
,
0.8
]]
])
output
=
densepose_ops
.
to_normalized_coordinates
(
dp_surface_coords
,
40
,
60
)
return
output
output
=
self
.
execute
(
graph_fn
,
[])
expected_dp_surface_coords
=
np
.
array
([
[[
0.25
,
0.5
,
0.1
,
0.2
],
[
0.75
,
0.75
,
0.3
,
0.4
]],
[[
0.5
,
0.0
,
0.5
,
0.6
],
[
1.0
,
1.0
,
0.7
,
0.8
]]
])
self
.
assertAllClose
(
output
,
expected_dp_surface_coords
)
def
test_to_absolute_coordinates
(
self
):
def
graph_fn
():
dp_surface_coords
=
tf
.
constant
([
[[
0.25
,
0.5
,
0.1
,
0.2
],
[
0.75
,
0.75
,
0.3
,
0.4
]],
[[
0.5
,
0.0
,
0.5
,
0.6
],
[
1.0
,
1.0
,
0.7
,
0.8
]]
])
output
=
densepose_ops
.
to_absolute_coordinates
(
dp_surface_coords
,
40
,
60
)
return
output
output
=
self
.
execute
(
graph_fn
,
[])
expected_dp_surface_coords
=
np
.
array
([
[[
10.
,
30.
,
0.1
,
0.2
],
[
30.
,
45.
,
0.3
,
0.4
]],
[[
20.
,
0.
,
0.5
,
0.6
],
[
40.
,
60.
,
0.7
,
0.8
]]
])
self
.
assertAllClose
(
output
,
expected_dp_surface_coords
)
def
test_horizontal_flip
(
self
):
part_ids_np
=
np
.
array
([[
1
,
4
],
[
0
,
8
]],
dtype
=
np
.
int32
)
surf_coords_np
=
np
.
array
([
[[
0.1
,
0.7
,
0.2
,
0.4
],
[
0.3
,
0.8
,
0.2
,
0.4
]],
[[
0.0
,
0.5
,
0.8
,
0.7
],
[
0.6
,
1.0
,
0.7
,
0.9
]],
],
dtype
=
np
.
float32
)
def
graph_fn
():
part_ids
=
tf
.
constant
(
part_ids_np
,
dtype
=
tf
.
int32
)
surf_coords
=
tf
.
constant
(
surf_coords_np
,
dtype
=
tf
.
float32
)
flipped_part_ids
,
flipped_surf_coords
=
densepose_ops
.
flip_horizontal
(
part_ids
,
surf_coords
)
flipped_twice_part_ids
,
flipped_twice_surf_coords
=
(
densepose_ops
.
flip_horizontal
(
flipped_part_ids
,
flipped_surf_coords
))
return
(
flipped_part_ids
,
flipped_surf_coords
,
flipped_twice_part_ids
,
flipped_twice_surf_coords
)
(
flipped_part_ids
,
flipped_surf_coords
,
flipped_twice_part_ids
,
flipped_twice_surf_coords
)
=
self
.
execute
(
graph_fn
,
[])
expected_flipped_part_ids
=
[[
1
,
5
],
# 1->1, 4->5
[
0
,
9
]]
# 0->0, 8->9
expected_flipped_surf_coords_yx
=
np
.
array
([
[[
0.1
,
1.0
-
0.7
],
[
0.3
,
1.0
-
0.8
]],
[[
0.0
,
1.0
-
0.5
],
[
0.6
,
1.0
-
1.0
]],
],
dtype
=
np
.
float32
)
self
.
assertAllEqual
(
expected_flipped_part_ids
,
flipped_part_ids
)
self
.
assertAllClose
(
expected_flipped_surf_coords_yx
,
flipped_surf_coords
[:,
:,
0
:
2
])
self
.
assertAllEqual
(
part_ids_np
,
flipped_twice_part_ids
)
self
.
assertAllClose
(
surf_coords_np
,
flipped_twice_surf_coords
,
rtol
=
1e-2
,
atol
=
1e-2
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/object_detection/core/model.py
View file @
31ca3b97
...
...
@@ -102,7 +102,7 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
Args:
field: a string key, options are
fields.BoxListFields.{boxes,classes,masks,keypoints,
keypoint_visibilities
} or
keypoint_visibilities
, densepose_*}
fields.InputDataFields.is_annotated.
Returns:
...
...
@@ -123,7 +123,7 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
Args:
field: a string key, options are
fields.BoxListFields.{boxes,classes,masks,keypoints,
keypoint_visibilities} or
keypoint_visibilities
, densepose_*
} or
fields.InputDataFields.is_annotated.
Returns:
...
...
@@ -251,9 +251,14 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
detection_classes: [batch, max_detections]
(If a model is producing class-agnostic detections, this field may be
missing)
instance
_masks: [batch, max_detections,
i
ma
ge
_height,
i
ma
ge
_width]
detection
_masks: [batch, max_detections, ma
sk
_height, ma
sk
_width]
(optional)
keypoints: [batch, max_detections, num_keypoints, 2] (optional)
detection_keypoints: [batch, max_detections, num_keypoints, 2]
(optional)
detection_keypoint_scores: [batch, max_detections, num_keypoints]
(optional)
detection_surface_coords: [batch, max_detections, mask_height,
mask_width, 2] (optional)
num_detections: [batch]
In addition to the above fields this stage also outputs the following
...
...
@@ -288,19 +293,23 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
"""
pass
def
provide_groundtruth
(
self
,
groundtruth_boxes_list
,
groundtruth_classes_list
,
groundtruth_masks_list
=
None
,
groundtruth_keypoints_list
=
None
,
groundtruth_keypoint_visibilities_list
=
None
,
groundtruth_weights_list
=
None
,
groundtruth_confidences_list
=
None
,
groundtruth_is_crowd_list
=
None
,
groundtruth_group_of_list
=
None
,
groundtruth_area_list
=
None
,
is_annotated_list
=
None
,
groundtruth_labeled_classes
=
None
):
def
provide_groundtruth
(
self
,
groundtruth_boxes_list
,
groundtruth_classes_list
,
groundtruth_masks_list
=
None
,
groundtruth_keypoints_list
=
None
,
groundtruth_keypoint_visibilities_list
=
None
,
groundtruth_dp_num_points_list
=
None
,
groundtruth_dp_part_ids_list
=
None
,
groundtruth_dp_surface_coords_list
=
None
,
groundtruth_weights_list
=
None
,
groundtruth_confidences_list
=
None
,
groundtruth_is_crowd_list
=
None
,
groundtruth_group_of_list
=
None
,
groundtruth_area_list
=
None
,
is_annotated_list
=
None
,
groundtruth_labeled_classes
=
None
):
"""Provide groundtruth tensors.
Args:
...
...
@@ -324,6 +333,15 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
`groundtruth_keypoint_visibilities_list`).
groundtruth_keypoint_visibilities_list: a list of 3-D tf.bool tensors
of shape [num_boxes, num_keypoints] containing keypoint visibilities.
groundtruth_dp_num_points_list: a list of 1-D tf.int32 tensors of shape
[num_boxes] containing the number of DensePose sampled points.
groundtruth_dp_part_ids_list: a list of 2-D tf.int32 tensors of shape
[num_boxes, max_sampled_points] containing the DensePose part ids
(0-indexed) for each sampled point. Note that there may be padding.
groundtruth_dp_surface_coords_list: a list of 3-D tf.float32 tensors of
shape [num_boxes, max_sampled_points, 4] containing the DensePose
surface coordinates for each sampled point. Note that there may be
padding.
groundtruth_weights_list: A list of 1-D tf.float32 tensors of shape
[num_boxes] containing weights for groundtruth boxes.
groundtruth_confidences_list: A list of 2-D tf.float32 tensors of shape
...
...
@@ -361,6 +379,18 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
self
.
_groundtruth_lists
[
fields
.
BoxListFields
.
keypoint_visibilities
]
=
(
groundtruth_keypoint_visibilities_list
)
if
groundtruth_dp_num_points_list
:
self
.
_groundtruth_lists
[
fields
.
BoxListFields
.
densepose_num_points
]
=
(
groundtruth_dp_num_points_list
)
if
groundtruth_dp_part_ids_list
:
self
.
_groundtruth_lists
[
fields
.
BoxListFields
.
densepose_part_ids
]
=
(
groundtruth_dp_part_ids_list
)
if
groundtruth_dp_surface_coords_list
:
self
.
_groundtruth_lists
[
fields
.
BoxListFields
.
densepose_surface_coords
]
=
(
groundtruth_dp_surface_coords_list
)
if
groundtruth_is_crowd_list
:
self
.
_groundtruth_lists
[
fields
.
BoxListFields
.
is_crowd
]
=
groundtruth_is_crowd_list
...
...
@@ -391,7 +421,9 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
pass
@
abc
.
abstractmethod
def
restore_map
(
self
,
fine_tune_checkpoint_type
=
'detection'
):
def
restore_map
(
self
,
fine_tune_checkpoint_type
=
'detection'
,
load_all_detection_checkpoint_vars
=
False
):
"""Returns a map of variables to load from a foreign checkpoint.
Returns a map of variable names to load from a checkpoint to variables in
...
...
@@ -407,6 +439,9 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
checkpoint (with compatible variable names) or to restore from a
classification checkpoint for initialization prior to training.
Valid values: `detection`, `classification`. Default 'detection'.
load_all_detection_checkpoint_vars: whether to load all variables (when
`fine_tune_checkpoint_type` is `detection`). If False, only variables
within the feature extractor scope are included. Default False.
Returns:
A dict mapping variable names (to load from a checkpoint) to variables in
...
...
@@ -414,6 +449,36 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
"""
pass
@
abc
.
abstractmethod
def
restore_from_objects
(
self
,
fine_tune_checkpoint_type
=
'detection'
):
"""Returns a map of variables to load from a foreign checkpoint.
Returns a dictionary of Tensorflow 2 Trackable objects (e.g. tf.Module
or Checkpoint). This enables the model to initialize based on weights from
another task. For example, the feature extractor variables from a
classification model can be used to bootstrap training of an object
detector. When loading from an object detection model, the checkpoint model
should have the same parameters as this detection model with exception of
the num_classes parameter.
Note that this function is intended to be used to restore Keras-based
models when running Tensorflow 2, whereas restore_map (above) is intended
to be used to restore Slim-based models when running Tensorflow 1.x.
TODO(jonathanhuang,rathodv): Check tf_version and raise unimplemented
error for both restore_map and restore_from_objects depending on version.
Args:
fine_tune_checkpoint_type: whether to restore from a full detection
checkpoint (with compatible variable names) or to restore from a
classification checkpoint for initialization prior to training.
Valid values: `detection`, `classification`. Default 'detection'.
Returns:
A dict mapping keys to Trackable objects (tf.Module or Checkpoint).
"""
pass
@
abc
.
abstractmethod
def
updates
(
self
):
"""Returns a list of update operators for this model.
...
...
research/object_detection/core/model_test.py
View file @
31ca3b97
...
...
@@ -57,6 +57,9 @@ class FakeModel(model.DetectionModel):
def
restore_map
(
self
):
return
{}
def
restore_from_objects
(
self
,
fine_tune_checkpoint_type
):
pass
def
regularization_losses
(
self
):
return
[]
...
...
research/object_detection/core/preprocessor.py
View file @
31ca3b97
...
...
@@ -79,6 +79,7 @@ import tensorflow.compat.v1 as tf
from
tensorflow.python.ops
import
control_flow_ops
from
object_detection.core
import
box_list
from
object_detection.core
import
box_list_ops
from
object_detection.core
import
densepose_ops
from
object_detection.core
import
keypoint_ops
from
object_detection.core
import
preprocessor_cache
from
object_detection.core
import
standard_fields
as
fields
...
...
@@ -568,6 +569,8 @@ def random_horizontal_flip(image,
masks
=
None
,
keypoints
=
None
,
keypoint_visibilities
=
None
,
densepose_part_ids
=
None
,
densepose_surface_coords
=
None
,
keypoint_flip_permutation
=
None
,
probability
=
0.5
,
seed
=
None
,
...
...
@@ -589,6 +592,16 @@ def random_horizontal_flip(image,
normalized coordinates.
keypoint_visibilities: (optional) rank 2 bool tensor with shape
[num_instances, num_keypoints].
densepose_part_ids: (optional) rank 2 int32 tensor with shape
[num_instances, num_points] holding the part id for each
sampled point. These part_ids are 0-indexed, where the
first non-background part has index 0.
densepose_surface_coords: (optional) rank 3 float32 tensor with shape
[num_instances, num_points, 4]. The DensePose
coordinates are of the form (y, x, v, u) where
(y, x) are the normalized image coordinates for a
sampled point, and (v, u) is the surface
coordinate for the part.
keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip
permutation.
probability: the probability of performing this augmentation.
...
...
@@ -601,9 +614,9 @@ def random_horizontal_flip(image,
Returns:
image: image which is the same shape as input image.
If boxes, masks, keypoints, keypoint_visibilities,
and
keypoint_flip_permutation
are not None,the function also returns the
following tensors.
If boxes, masks, keypoints, keypoint_visibilities,
keypoint_flip_permutation
, densepose_part_ids, or densepose_surface_coords
are not None,the function also returns the
following tensors.
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
...
...
@@ -614,9 +627,15 @@ def random_horizontal_flip(image,
[num_instances, num_keypoints, 2]
keypoint_visibilities: rank 2 bool tensor with shape
[num_instances, num_keypoints].
densepose_part_ids: rank 2 int32 tensor with shape
[num_instances, num_points].
densepose_surface_coords: rank 3 float32 tensor with shape
[num_instances, num_points, 4].
Raises:
ValueError: if keypoints are provided but keypoint_flip_permutation is not.
ValueError: if either densepose_part_ids or densepose_surface_coords is
not None, but both are not None.
"""
def
_flip_image
(
image
):
...
...
@@ -628,6 +647,11 @@ def random_horizontal_flip(image,
raise
ValueError
(
'keypoints are provided but keypoints_flip_permutation is not provided'
)
if
((
densepose_part_ids
is
not
None
and
densepose_surface_coords
is
None
)
or
(
densepose_part_ids
is
None
and
densepose_surface_coords
is
not
None
)):
raise
ValueError
(
'Must provide both `densepose_part_ids` and `densepose_surface_coords`'
)
with
tf
.
name_scope
(
'RandomHorizontalFlip'
,
values
=
[
image
,
boxes
]):
result
=
[]
# random variable defining whether to do flip or not
...
...
@@ -666,7 +690,6 @@ def random_horizontal_flip(image,
# flip keypoint visibilities
if
(
keypoint_visibilities
is
not
None
and
keypoint_flip_permutation
is
not
None
):
permutation
=
keypoint_flip_permutation
kpt_flip_perm
=
keypoint_flip_permutation
keypoint_visibilities
=
tf
.
cond
(
do_a_flip_random
,
...
...
@@ -674,6 +697,17 @@ def random_horizontal_flip(image,
lambda
:
keypoint_visibilities
)
result
.
append
(
keypoint_visibilities
)
# flip DensePose parts and coordinates
if
densepose_part_ids
is
not
None
:
flip_densepose_fn
=
functools
.
partial
(
densepose_ops
.
flip_horizontal
,
densepose_part_ids
,
densepose_surface_coords
)
densepose_tensors
=
tf
.
cond
(
do_a_flip_random
,
flip_densepose_fn
,
lambda
:
(
densepose_part_ids
,
densepose_surface_coords
))
result
.
extend
(
densepose_tensors
)
return
tuple
(
result
)
...
...
@@ -1285,6 +1319,9 @@ def _strict_random_crop_image(image,
masks
=
None
,
keypoints
=
None
,
keypoint_visibilities
=
None
,
densepose_num_points
=
None
,
densepose_part_ids
=
None
,
densepose_surface_coords
=
None
,
min_object_covered
=
1.0
,
aspect_ratio_range
=
(
0.75
,
1.33
),
area_range
=
(
0.1
,
1.0
),
...
...
@@ -1322,6 +1359,19 @@ def _strict_random_crop_image(image,
normalized coordinates.
keypoint_visibilities: (optional) rank 2 bool tensor with shape
[num_instances, num_keypoints].
densepose_num_points: (optional) rank 1 int32 tensor with shape
[num_instances] with the number of sampled points per
instance.
densepose_part_ids: (optional) rank 2 int32 tensor with shape
[num_instances, num_points] holding the part id for each
sampled point. These part_ids are 0-indexed, where the
first non-background part has index 0.
densepose_surface_coords: (optional) rank 3 float32 tensor with shape
[num_instances, num_points, 4]. The DensePose
coordinates are of the form (y, x, v, u) where
(y, x) are the normalized image coordinates for a
sampled point, and (v, u) is the surface
coordinate for the part.
min_object_covered: the cropped image must cover at least this fraction of
at least one of the input bounding boxes.
aspect_ratio_range: allowed range for aspect ratio of cropped image.
...
...
@@ -1341,8 +1391,9 @@ def _strict_random_crop_image(image,
Boxes are in normalized form.
labels: new labels.
If label_weights, multiclass_scores, masks, keypoints, or
keypoint_visibilities is not None, the function also returns:
If label_weights, multiclass_scores, masks, keypoints,
keypoint_visibilities, densepose_num_points, densepose_part_ids, or
densepose_surface_coords is not None, the function also returns:
label_weights: rank 1 float32 tensor with shape [num_instances].
multiclass_scores: rank 2 float32 tensor with shape
[num_instances, num_classes]
...
...
@@ -1351,9 +1402,24 @@ def _strict_random_crop_image(image,
keypoints: rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]
keypoint_visibilities: rank 2 bool tensor with shape
[num_instances, num_keypoints]
[num_instances, num_keypoints]
densepose_num_points: rank 1 int32 tensor with shape [num_instances].
densepose_part_ids: rank 2 int32 tensor with shape
[num_instances, num_points].
densepose_surface_coords: rank 3 float32 tensor with shape
[num_instances, num_points, 4].
Raises:
ValueError: If some but not all of the DensePose tensors are provided.
"""
with
tf
.
name_scope
(
'RandomCropImage'
,
values
=
[
image
,
boxes
]):
densepose_tensors
=
[
densepose_num_points
,
densepose_part_ids
,
densepose_surface_coords
]
if
(
any
(
t
is
not
None
for
t
in
densepose_tensors
)
and
not
all
(
t
is
not
None
for
t
in
densepose_tensors
)):
raise
ValueError
(
'If cropping DensePose labels, must provide '
'`densepose_num_points`, `densepose_part_ids`, and '
'`densepose_surface_coords`'
)
image_shape
=
tf
.
shape
(
image
)
# boxes are [N, 4]. Lets first make them [N, 1, 4].
...
...
@@ -1464,6 +1530,23 @@ def _strict_random_crop_image(image,
new_keypoints
,
kpt_vis_of_boxes_completely_inside_window
)
result
.
append
(
new_kpt_visibilities
)
if
densepose_num_points
is
not
None
:
filtered_dp_tensors
=
[]
for
dp_tensor
in
densepose_tensors
:
dp_tensor_inside_window
=
tf
.
gather
(
dp_tensor
,
inside_window_ids
)
dp_tensor_completely_inside_window
=
tf
.
gather
(
dp_tensor_inside_window
,
keep_ids
)
filtered_dp_tensors
.
append
(
dp_tensor_completely_inside_window
)
new_dp_num_points
=
filtered_dp_tensors
[
0
]
new_dp_point_ids
=
filtered_dp_tensors
[
1
]
new_dp_surf_coords
=
densepose_ops
.
change_coordinate_frame
(
filtered_dp_tensors
[
2
],
im_box_rank1
)
if
clip_boxes
:
new_dp_num_points
,
new_dp_point_ids
,
new_dp_surf_coords
=
(
densepose_ops
.
prune_outside_window
(
new_dp_num_points
,
new_dp_point_ids
,
new_dp_surf_coords
,
window
=
[
0.0
,
0.0
,
1.0
,
1.0
]))
result
.
extend
([
new_dp_num_points
,
new_dp_point_ids
,
new_dp_surf_coords
])
return
tuple
(
result
)
...
...
@@ -1476,6 +1559,9 @@ def random_crop_image(image,
masks
=
None
,
keypoints
=
None
,
keypoint_visibilities
=
None
,
densepose_num_points
=
None
,
densepose_part_ids
=
None
,
densepose_surface_coords
=
None
,
min_object_covered
=
1.0
,
aspect_ratio_range
=
(
0.75
,
1.33
),
area_range
=
(
0.1
,
1.0
),
...
...
@@ -1523,6 +1609,19 @@ def random_crop_image(image,
normalized coordinates.
keypoint_visibilities: (optional) rank 2 bool tensor with shape
[num_instances, num_keypoints].
densepose_num_points: (optional) rank 1 int32 tensor with shape
[num_instances] with the number of sampled points per
instance.
densepose_part_ids: (optional) rank 2 int32 tensor with shape
[num_instances, num_points] holding the part id for each
sampled point. These part_ids are 0-indexed, where the
first non-background part has index 0.
densepose_surface_coords: (optional) rank 3 float32 tensor with shape
[num_instances, num_points, 4]. The DensePose
coordinates are of the form (y, x, v, u) where
(y, x) are the normalized image coordinates for a
sampled point, and (v, u) is the surface
coordinate for the part.
min_object_covered: the cropped image must cover at least this fraction of
at least one of the input bounding boxes.
aspect_ratio_range: allowed range for aspect ratio of cropped image.
...
...
@@ -1547,8 +1646,9 @@ def random_crop_image(image,
form.
labels: new labels.
If label_weights, multiclass_scores, masks, keypoints, keypoint_visibilities
is not None, the function also returns:
If label_weights, multiclass_scores, masks, keypoints,
keypoint_visibilities, densepose_num_points, densepose_part_ids,
densepose_surface_coords is not None, the function also returns:
label_weights: rank 1 float32 tensor with shape [num_instances].
multiclass_scores: rank 2 float32 tensor with shape
[num_instances, num_classes]
...
...
@@ -1557,7 +1657,12 @@ def random_crop_image(image,
keypoints: rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]
keypoint_visibilities: rank 2 bool tensor with shape
[num_instances, num_keypoints]
[num_instances, num_keypoints]
densepose_num_points: rank 1 int32 tensor with shape [num_instances].
densepose_part_ids: rank 2 int32 tensor with shape
[num_instances, num_points].
densepose_surface_coords: rank 3 float32 tensor with shape
[num_instances, num_points, 4].
"""
def
strict_random_crop_image_fn
():
...
...
@@ -1571,6 +1676,9 @@ def random_crop_image(image,
masks
=
masks
,
keypoints
=
keypoints
,
keypoint_visibilities
=
keypoint_visibilities
,
densepose_num_points
=
densepose_num_points
,
densepose_part_ids
=
densepose_part_ids
,
densepose_surface_coords
=
densepose_surface_coords
,
min_object_covered
=
min_object_covered
,
aspect_ratio_range
=
aspect_ratio_range
,
area_range
=
area_range
,
...
...
@@ -1602,6 +1710,9 @@ def random_crop_image(image,
outputs
.
append
(
keypoints
)
if
keypoint_visibilities
is
not
None
:
outputs
.
append
(
keypoint_visibilities
)
if
densepose_num_points
is
not
None
:
outputs
.
extend
([
densepose_num_points
,
densepose_part_ids
,
densepose_surface_coords
])
result
=
tf
.
cond
(
do_a_crop_random
,
strict_random_crop_image_fn
,
lambda
:
tuple
(
outputs
))
...
...
@@ -1612,6 +1723,7 @@ def random_pad_image(image,
boxes
,
masks
=
None
,
keypoints
=
None
,
densepose_surface_coords
=
None
,
min_image_size
=
None
,
max_image_size
=
None
,
pad_color
=
None
,
...
...
@@ -1639,6 +1751,11 @@ def random_pad_image(image,
keypoints: (optional) rank 3 float32 tensor with shape
[N, num_keypoints, 2]. The keypoints are in y-x normalized
coordinates.
densepose_surface_coords: (optional) rank 3 float32 tensor with shape
[N, num_points, 4]. The DensePose coordinates are
of the form (y, x, v, u) where (y, x) are the
normalized image coordinates for a sampled point,
and (v, u) is the surface coordinate for the part.
min_image_size: a tensor of size [min_height, min_width], type tf.int32.
If passed as None, will be set to image size
[height, width].
...
...
@@ -1663,6 +1780,9 @@ def random_pad_image(image,
masks: rank 3 float32 tensor with shape [N, new_height, new_width]
if keypoints is not None, the function also returns:
keypoints: rank 3 float32 tensor with shape [N, num_keypoints, 2]
if densepose_surface_coords is not None, the function also returns:
densepose_surface_coords: rank 3 float32 tensor with shape
[num_instances, num_points, 4]
"""
if
pad_color
is
None
:
pad_color
=
tf
.
reduce_mean
(
image
,
axis
=
[
0
,
1
])
...
...
@@ -1754,6 +1874,11 @@ def random_pad_image(image,
new_keypoints
=
keypoint_ops
.
change_coordinate_frame
(
keypoints
,
new_window
)
result
.
append
(
new_keypoints
)
if
densepose_surface_coords
is
not
None
:
new_densepose_surface_coords
=
densepose_ops
.
change_coordinate_frame
(
densepose_surface_coords
,
new_window
)
result
.
append
(
new_densepose_surface_coords
)
return
tuple
(
result
)
...
...
@@ -1761,6 +1886,7 @@ def random_absolute_pad_image(image,
boxes
,
masks
=
None
,
keypoints
=
None
,
densepose_surface_coords
=
None
,
max_height_padding
=
None
,
max_width_padding
=
None
,
pad_color
=
None
,
...
...
@@ -1785,6 +1911,11 @@ def random_absolute_pad_image(image,
keypoints: (optional) rank 3 float32 tensor with shape
[N, num_keypoints, 2]. The keypoints are in y-x normalized
coordinates.
densepose_surface_coords: (optional) rank 3 float32 tensor with shape
[N, num_points, 4]. The DensePose coordinates are
of the form (y, x, v, u) where (y, x) are the
normalized image coordinates for a sampled point,
and (v, u) is the surface coordinate for the part.
max_height_padding: a scalar tf.int32 tensor denoting the maximum amount of
height padding. The padding will be chosen uniformly at
random from [0, max_height_padding).
...
...
@@ -1817,6 +1948,7 @@ def random_absolute_pad_image(image,
boxes
,
masks
=
masks
,
keypoints
=
keypoints
,
densepose_surface_coords
=
densepose_surface_coords
,
min_image_size
=
min_image_size
,
max_image_size
=
max_image_size
,
pad_color
=
pad_color
,
...
...
@@ -3852,7 +3984,7 @@ def random_square_crop_by_scale(image, boxes, labels, label_weights,
Args:
image: rank 3 float32 tensor containing 1 image ->
[height, width,channels].
[height, width,
channels].
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1]. Each row is in the form of [ymin, xmin, ymax, xmax].
...
...
@@ -3996,12 +4128,138 @@ def random_square_crop_by_scale(image, boxes, labels, label_weights,
return
return_values
def
random_scale_crop_and_pad_to_square
(
image
,
boxes
,
labels
,
label_weights
,
masks
=
None
,
keypoints
=
None
,
scale_min
=
0.1
,
scale_max
=
2.0
,
output_size
=
512
,
resize_method
=
tf
.
image
.
ResizeMethod
.
BILINEAR
,
seed
=
None
):
"""Randomly scale, crop, and then pad an image to fixed square dimensions.
Randomly scale, crop, and then pad an image to the desired square output
dimensions. Specifically, this method first samples a random_scale factor
from a uniform distribution between scale_min and scale_max, and then resizes
the image such that it's maximum dimension is (output_size * random_scale).
Secondly, a square output_size crop is extracted from the resized image
(note, this will only occur when random_scale > 1.0). Lastly, the cropped
region is padded to the desired square output_size, by filling with zeros.
The augmentation is borrowed from [1]
[1]: https://arxiv.org/abs/1911.09070
Args:
image: rank 3 float32 tensor containing 1 image ->
[height, width, channels].
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. Boxes
are in normalized form meaning their coordinates vary between [0, 1]. Each
row is in the form of [ymin, xmin, ymax, xmax]. Boxes on the crop boundary
are clipped to the boundary and boxes falling outside the crop are
ignored.
labels: rank 1 int32 tensor containing the object classes.
label_weights: float32 tensor of shape [num_instances] representing the
weight for each box.
masks: (optional) rank 3 float32 tensor with shape [num_instances, height,
width] containing instance masks. The masks are of the same height, width
as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape [num_instances,
num_keypoints, 2]. The keypoints are in y-x normalized coordinates.
scale_min: float, the minimum value for the random scale factor.
scale_max: float, the maximum value for the random scale factor.
output_size: int, the desired (square) output image size.
resize_method: tf.image.ResizeMethod, resize method to use when scaling the
input images.
seed: random seed.
Returns:
image: image which is the same rank as input image.
boxes: boxes which is the same rank as input boxes.
Boxes are in normalized form.
labels: new labels.
label_weights: rank 1 float32 tensor with shape [num_instances].
masks: rank 3 float32 tensor with shape [num_instances, height, width]
containing instance masks.
"""
img_shape
=
tf
.
shape
(
image
)
input_height
,
input_width
=
img_shape
[
0
],
img_shape
[
1
]
random_scale
=
tf
.
random_uniform
([],
scale_min
,
scale_max
,
seed
=
seed
)
# Compute the scaled height and width from the random scale.
max_input_dim
=
tf
.
cast
(
tf
.
maximum
(
input_height
,
input_width
),
tf
.
float32
)
input_ar_y
=
tf
.
cast
(
input_height
,
tf
.
float32
)
/
max_input_dim
input_ar_x
=
tf
.
cast
(
input_width
,
tf
.
float32
)
/
max_input_dim
scaled_height
=
tf
.
cast
(
random_scale
*
output_size
*
input_ar_y
,
tf
.
int32
)
scaled_width
=
tf
.
cast
(
random_scale
*
output_size
*
input_ar_x
,
tf
.
int32
)
# Compute the offsets:
offset_y
=
tf
.
cast
(
scaled_height
-
output_size
,
tf
.
float32
)
offset_x
=
tf
.
cast
(
scaled_width
-
output_size
,
tf
.
float32
)
offset_y
=
tf
.
maximum
(
0.0
,
offset_y
)
*
tf
.
random_uniform
([],
0
,
1
,
seed
=
seed
)
offset_x
=
tf
.
maximum
(
0.0
,
offset_x
)
*
tf
.
random_uniform
([],
0
,
1
,
seed
=
seed
)
offset_y
=
tf
.
cast
(
offset_y
,
tf
.
int32
)
offset_x
=
tf
.
cast
(
offset_x
,
tf
.
int32
)
# Scale, crop, and pad the input image.
scaled_image
=
tf
.
image
.
resize_images
(
image
,
[
scaled_height
,
scaled_width
],
method
=
resize_method
)
scaled_image
=
scaled_image
[
offset_y
:
offset_y
+
output_size
,
offset_x
:
offset_x
+
output_size
,
:]
output_image
=
tf
.
image
.
pad_to_bounding_box
(
scaled_image
,
0
,
0
,
output_size
,
output_size
)
# Update the boxes.
new_window
=
tf
.
cast
(
tf
.
stack
([
offset_y
,
offset_x
,
offset_y
+
output_size
,
offset_x
+
output_size
]),
dtype
=
tf
.
float32
)
new_window
/=
tf
.
cast
(
tf
.
stack
([
scaled_height
,
scaled_width
,
scaled_height
,
scaled_width
]),
dtype
=
tf
.
float32
)
boxlist
=
box_list
.
BoxList
(
boxes
)
boxlist
=
box_list_ops
.
change_coordinate_frame
(
boxlist
,
new_window
)
boxlist
,
indices
=
box_list_ops
.
prune_completely_outside_window
(
boxlist
,
[
0.0
,
0.0
,
1.0
,
1.0
])
boxlist
=
box_list_ops
.
clip_to_window
(
boxlist
,
[
0.0
,
0.0
,
1.0
,
1.0
],
filter_nonoverlapping
=
False
)
return_values
=
[
output_image
,
boxlist
.
get
(),
tf
.
gather
(
labels
,
indices
),
tf
.
gather
(
label_weights
,
indices
)]
if
masks
is
not
None
:
new_masks
=
tf
.
expand_dims
(
masks
,
-
1
)
new_masks
=
tf
.
image
.
resize_images
(
new_masks
,
[
scaled_height
,
scaled_width
],
method
=
resize_method
)
new_masks
=
new_masks
[:,
offset_y
:
offset_y
+
output_size
,
offset_x
:
offset_x
+
output_size
,
:]
new_masks
=
tf
.
image
.
pad_to_bounding_box
(
new_masks
,
0
,
0
,
output_size
,
output_size
)
new_masks
=
tf
.
squeeze
(
new_masks
,
[
-
1
])
return_values
.
append
(
tf
.
gather
(
new_masks
,
indices
))
if
keypoints
is
not
None
:
keypoints
=
tf
.
gather
(
keypoints
,
indices
)
keypoints
=
keypoint_ops
.
change_coordinate_frame
(
keypoints
,
new_window
)
keypoints
=
keypoint_ops
.
prune_outside_window
(
keypoints
,
[
0.0
,
0.0
,
1.0
,
1.0
])
return_values
.
append
(
keypoints
)
return
return_values
def
get_default_func_arg_map
(
include_label_weights
=
True
,
include_label_confidences
=
False
,
include_multiclass_scores
=
False
,
include_instance_masks
=
False
,
include_keypoints
=
False
,
include_keypoint_visibilities
=
False
):
include_keypoint_visibilities
=
False
,
include_dense_pose
=
False
):
"""Returns the default mapping from a preprocessor function to its args.
Args:
...
...
@@ -4017,6 +4275,8 @@ def get_default_func_arg_map(include_label_weights=True,
keypoints, too.
include_keypoint_visibilities: If True, preprocessing functions will modify
the keypoint visibilities, too.
include_dense_pose: If True, preprocessing functions will modify the
DensePose labels, too.
Returns:
A map from preprocessing functions to the arguments they receive.
...
...
@@ -4049,6 +4309,17 @@ def get_default_func_arg_map(include_label_weights=True,
groundtruth_keypoint_visibilities
=
(
fields
.
InputDataFields
.
groundtruth_keypoint_visibilities
)
groundtruth_dp_num_points
=
None
groundtruth_dp_part_ids
=
None
groundtruth_dp_surface_coords
=
None
if
include_dense_pose
:
groundtruth_dp_num_points
=
(
fields
.
InputDataFields
.
groundtruth_dp_num_points
)
groundtruth_dp_part_ids
=
(
fields
.
InputDataFields
.
groundtruth_dp_part_ids
)
groundtruth_dp_surface_coords
=
(
fields
.
InputDataFields
.
groundtruth_dp_surface_coords
)
prep_func_arg_map
=
{
normalize_image
:
(
fields
.
InputDataFields
.
image
,),
random_horizontal_flip
:
(
...
...
@@ -4057,6 +4328,8 @@ def get_default_func_arg_map(include_label_weights=True,
groundtruth_instance_masks
,
groundtruth_keypoints
,
groundtruth_keypoint_visibilities
,
groundtruth_dp_part_ids
,
groundtruth_dp_surface_coords
,
),
random_vertical_flip
:
(
fields
.
InputDataFields
.
image
,
...
...
@@ -4082,21 +4355,22 @@ def get_default_func_arg_map(include_label_weights=True,
random_adjust_saturation
:
(
fields
.
InputDataFields
.
image
,),
random_distort_color
:
(
fields
.
InputDataFields
.
image
,),
random_jitter_boxes
:
(
fields
.
InputDataFields
.
groundtruth_boxes
,),
random_crop_image
:
(
fields
.
InputDataFields
.
image
,
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_classes
,
groundtruth_label_weights
,
groundtruth_label_confidences
,
multiclass_scores
,
groundtruth_instance_masks
,
groundtruth_keypoints
,
groundtruth_keypoint_visibilities
),
random_crop_image
:
(
fields
.
InputDataFields
.
image
,
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_classes
,
groundtruth_label_weights
,
groundtruth_label_confidences
,
multiclass_scores
,
groundtruth_instance_masks
,
groundtruth_keypoints
,
groundtruth_keypoint_visibilities
,
groundtruth_dp_num_points
,
groundtruth_dp_part_ids
,
groundtruth_dp_surface_coords
),
random_pad_image
:
(
fields
.
InputDataFields
.
image
,
fields
.
InputDataFields
.
groundtruth_boxes
,
groundtruth_instance_masks
,
groundtruth_keypoints
),
groundtruth_keypoints
,
groundtruth_dp_surface_coords
),
random_absolute_pad_image
:
(
fields
.
InputDataFields
.
image
,
fields
.
InputDataFields
.
groundtruth_boxes
,
groundtruth_instance_masks
,
groundtruth_keypoints
),
groundtruth_keypoints
,
groundtruth_dp_surface_coords
),
random_crop_pad_image
:
(
fields
.
InputDataFields
.
image
,
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_classes
,
...
...
@@ -4211,6 +4485,12 @@ def get_default_func_arg_map(include_label_weights=True,
fields
.
InputDataFields
.
groundtruth_classes
,
groundtruth_label_weights
,
groundtruth_instance_masks
,
groundtruth_keypoints
),
random_scale_crop_and_pad_to_square
:
(
fields
.
InputDataFields
.
image
,
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_classes
,
groundtruth_label_weights
,
groundtruth_instance_masks
,
groundtruth_keypoints
),
}
return
prep_func_arg_map
...
...
research/object_detection/core/preprocessor_test.py
View file @
31ca3b97
...
...
@@ -119,6 +119,24 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
])
return
tf
.
constant
(
keypoints
,
dtype
=
tf
.
float32
)
def
createTestDensePose
(
self
):
dp_num_points
=
tf
.
constant
([
1
,
3
],
dtype
=
tf
.
int32
)
dp_part_ids
=
tf
.
constant
(
[[
4
,
0
,
0
],
[
1
,
0
,
5
]],
dtype
=
tf
.
int32
)
dp_surface_coords
=
tf
.
constant
(
[
# Instance 0.
[[
0.1
,
0.2
,
0.6
,
0.7
],
[
0.0
,
0.0
,
0.0
,
0.0
],
[
0.0
,
0.0
,
0.0
,
0.0
]],
# Instance 1.
[[
0.8
,
0.9
,
0.2
,
0.4
],
[
0.1
,
0.3
,
0.2
,
0.8
],
[
0.6
,
1.0
,
0.3
,
0.4
]],
],
dtype
=
tf
.
float32
)
return
dp_num_points
,
dp_part_ids
,
dp_surface_coords
def
createKeypointFlipPermutation
(
self
):
return
[
0
,
2
,
1
]
...
...
@@ -694,51 +712,6 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
test_masks
=
True
,
test_keypoints
=
True
)
def
testRunRandomHorizontalFlipWithMaskAndKeypoints
(
self
):
def
graph_fn
():
preprocess_options
=
[(
preprocessor
.
random_horizontal_flip
,
{})]
image_height
=
3
image_width
=
3
images
=
tf
.
random_uniform
([
1
,
image_height
,
image_width
,
3
])
boxes
=
self
.
createTestBoxes
()
masks
=
self
.
createTestMasks
()
keypoints
,
keypoint_visibilities
=
self
.
createTestKeypoints
()
keypoint_flip_permutation
=
self
.
createKeypointFlipPermutation
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_instance_masks
:
masks
,
fields
.
InputDataFields
.
groundtruth_keypoints
:
keypoints
,
fields
.
InputDataFields
.
groundtruth_keypoint_visibilities
:
keypoint_visibilities
}
preprocess_options
=
[(
preprocessor
.
random_horizontal_flip
,
{
'keypoint_flip_permutation'
:
keypoint_flip_permutation
})]
preprocessor_arg_map
=
preprocessor
.
get_default_func_arg_map
(
include_instance_masks
=
True
,
include_keypoints
=
True
,
include_keypoint_visibilities
=
True
)
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocess_options
,
func_arg_map
=
preprocessor_arg_map
)
boxes
=
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
masks
=
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_instance_masks
]
keypoints
=
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_keypoints
]
keypoint_visibilities
=
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_keypoint_visibilities
]
return
[
boxes
,
masks
,
keypoints
,
keypoint_visibilities
]
boxes
,
masks
,
keypoints
,
keypoint_visibilities
=
self
.
execute_cpu
(
graph_fn
,
[])
self
.
assertIsNotNone
(
boxes
)
self
.
assertIsNotNone
(
masks
)
self
.
assertIsNotNone
(
keypoints
)
self
.
assertIsNotNone
(
keypoint_visibilities
)
def
testRandomVerticalFlip
(
self
):
...
...
@@ -1886,6 +1859,65 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
self
.
assertAllClose
(
distorted_keypoints_
.
flatten
(),
expected_keypoints
.
flatten
())
def
testRunRandomCropImageWithDensePose
(
self
):
def
graph_fn
():
image
=
self
.
createColorfulTestImage
()
boxes
=
self
.
createTestBoxes
()
labels
=
self
.
createTestLabels
()
weights
=
self
.
createTestGroundtruthWeights
()
dp_num_points
,
dp_part_ids
,
dp_surface_coords
=
self
.
createTestDensePose
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
image
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
labels
,
fields
.
InputDataFields
.
groundtruth_weights
:
weights
,
fields
.
InputDataFields
.
groundtruth_dp_num_points
:
dp_num_points
,
fields
.
InputDataFields
.
groundtruth_dp_part_ids
:
dp_part_ids
,
fields
.
InputDataFields
.
groundtruth_dp_surface_coords
:
dp_surface_coords
}
preprocessor_arg_map
=
preprocessor
.
get_default_func_arg_map
(
include_dense_pose
=
True
)
preprocessing_options
=
[(
preprocessor
.
random_crop_image
,
{})]
with
mock
.
patch
.
object
(
tf
.
image
,
'sample_distorted_bounding_box'
)
as
mock_sample_distorted_bounding_box
:
mock_sample_distorted_bounding_box
.
return_value
=
(
tf
.
constant
([
6
,
40
,
0
],
dtype
=
tf
.
int32
),
tf
.
constant
([
134
,
340
,
-
1
],
dtype
=
tf
.
int32
),
tf
.
constant
([[[
0.03
,
0.1
,
0.7
,
0.95
]]],
dtype
=
tf
.
float32
))
distorted_tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
,
func_arg_map
=
preprocessor_arg_map
)
distorted_image
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
image
]
distorted_dp_num_points
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_dp_num_points
]
distorted_dp_part_ids
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_dp_part_ids
]
distorted_dp_surface_coords
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_dp_surface_coords
]
return
[
distorted_image
,
distorted_dp_num_points
,
distorted_dp_part_ids
,
distorted_dp_surface_coords
]
(
distorted_image_
,
distorted_dp_num_points_
,
distorted_dp_part_ids_
,
distorted_dp_surface_coords_
)
=
self
.
execute_cpu
(
graph_fn
,
[])
expected_dp_num_points
=
np
.
array
([
1
,
1
])
expected_dp_part_ids
=
np
.
array
([[
4
],
[
0
]])
expected_dp_surface_coords
=
np
.
array
([
[[
0.10447761
,
0.1176470
,
0.6
,
0.7
]],
[[
0.10447761
,
0.2352941
,
0.2
,
0.8
]],
])
self
.
assertAllEqual
(
distorted_image_
.
shape
,
[
1
,
134
,
340
,
3
])
self
.
assertAllEqual
(
distorted_dp_num_points_
,
expected_dp_num_points
)
self
.
assertAllEqual
(
distorted_dp_part_ids_
,
expected_dp_part_ids
)
self
.
assertAllClose
(
distorted_dp_surface_coords_
,
expected_dp_surface_coords
)
def
testRunRetainBoxesAboveThreshold
(
self
):
def
graph_fn
():
boxes
=
self
.
createTestBoxes
()
...
...
@@ -2276,7 +2308,10 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
self
.
assertTrue
(
np
.
all
((
boxes_
[:,
3
]
-
boxes_
[:,
1
])
>=
(
padded_boxes_
[:,
3
]
-
padded_boxes_
[:,
1
])))
def
testRandomPadImageWithKeypointsAndMasks
(
self
):
@
parameterized
.
parameters
(
{
'include_dense_pose'
:
False
},
)
def
testRandomPadImageWithKeypointsAndMasks
(
self
,
include_dense_pose
):
def
graph_fn
():
preprocessing_options
=
[(
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
...
...
@@ -2290,12 +2325,15 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
labels
=
self
.
createTestLabels
()
masks
=
self
.
createTestMasks
()
keypoints
,
_
=
self
.
createTestKeypoints
()
_
,
_
,
dp_surface_coords
=
self
.
createTestDensePose
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
labels
,
fields
.
InputDataFields
.
groundtruth_instance_masks
:
masks
,
fields
.
InputDataFields
.
groundtruth_keypoints
:
keypoints
,
fields
.
InputDataFields
.
groundtruth_dp_surface_coords
:
dp_surface_coords
}
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
images
=
tensor_dict
[
fields
.
InputDataFields
.
image
]
...
...
@@ -2304,7 +2342,8 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
func_arg_map
=
preprocessor
.
get_default_func_arg_map
(
include_instance_masks
=
True
,
include_keypoints
=
True
,
include_keypoint_visibilities
=
True
)
include_keypoint_visibilities
=
True
,
include_dense_pose
=
include_dense_pose
)
padded_tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
,
func_arg_map
=
func_arg_map
)
...
...
@@ -2323,15 +2362,29 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
padded_keypoints_shape
=
tf
.
shape
(
padded_keypoints
)
images_shape
=
tf
.
shape
(
images
)
padded_images_shape
=
tf
.
shape
(
padded_images
)
return
[
boxes_shape
,
padded_boxes_shape
,
padded_masks_shape
,
keypoints_shape
,
padded_keypoints_shape
,
images_shape
,
padded_images_shape
,
boxes
,
padded_boxes
,
keypoints
,
padded_keypoints
]
(
boxes_shape_
,
padded_boxes_shape_
,
padded_masks_shape_
,
keypoints_shape_
,
padded_keypoints_shape_
,
images_shape_
,
padded_images_shape_
,
boxes_
,
padded_boxes_
,
keypoints_
,
padded_keypoints_
)
=
self
.
execute_cpu
(
graph_fn
,
[])
outputs
=
[
boxes_shape
,
padded_boxes_shape
,
padded_masks_shape
,
keypoints_shape
,
padded_keypoints_shape
,
images_shape
,
padded_images_shape
,
boxes
,
padded_boxes
,
keypoints
,
padded_keypoints
]
if
include_dense_pose
:
padded_dp_surface_coords
=
padded_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_dp_surface_coords
]
outputs
.
extend
([
dp_surface_coords
,
padded_dp_surface_coords
])
return
outputs
outputs
=
self
.
execute_cpu
(
graph_fn
,
[])
boxes_shape_
=
outputs
[
0
]
padded_boxes_shape_
=
outputs
[
1
]
padded_masks_shape_
=
outputs
[
2
]
keypoints_shape_
=
outputs
[
3
]
padded_keypoints_shape_
=
outputs
[
4
]
images_shape_
=
outputs
[
5
]
padded_images_shape_
=
outputs
[
6
]
boxes_
=
outputs
[
7
]
padded_boxes_
=
outputs
[
8
]
keypoints_
=
outputs
[
9
]
padded_keypoints_
=
outputs
[
10
]
self
.
assertAllEqual
(
boxes_shape_
,
padded_boxes_shape_
)
self
.
assertAllEqual
(
keypoints_shape_
,
padded_keypoints_shape_
)
self
.
assertTrue
((
images_shape_
[
1
]
>=
padded_images_shape_
[
1
]
*
0.5
).
all
)
...
...
@@ -2347,6 +2400,11 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
padded_keypoints_
[
1
,
:,
0
]
-
padded_keypoints_
[
0
,
:,
0
])))
self
.
assertTrue
(
np
.
all
((
keypoints_
[
1
,
:,
1
]
-
keypoints_
[
0
,
:,
1
])
>=
(
padded_keypoints_
[
1
,
:,
1
]
-
padded_keypoints_
[
0
,
:,
1
])))
if
include_dense_pose
:
dp_surface_coords
=
outputs
[
11
]
padded_dp_surface_coords
=
outputs
[
12
]
self
.
assertAllClose
(
padded_dp_surface_coords
[:,
:,
2
:],
dp_surface_coords
[:,
:,
2
:])
def
testRandomAbsolutePadImage
(
self
):
height_padding
=
10
...
...
@@ -3783,6 +3841,90 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
size
=
max
(
image
.
shape
)
self
.
assertAlmostEqual
(
scale
*
256.0
,
size
)
self
.
assertAllClose
(
image
[:,
:,
0
],
masks
[
0
,
:,
:])
@
parameterized
.
named_parameters
((
'scale_0_1'
,
0.1
),
(
'scale_1_0'
,
1.0
),
(
'scale_2_0'
,
2.0
))
def
test_random_scale_crop_and_pad_to_square
(
self
,
scale
):
def
graph_fn
():
image
=
np
.
random
.
randn
(
512
,
256
,
1
)
box_centers
=
[
0.25
,
0.5
,
0.75
]
box_size
=
0.1
box_corners
=
[]
box_labels
=
[]
box_label_weights
=
[]
keypoints
=
[]
masks
=
[]
for
center_y
in
box_centers
:
for
center_x
in
box_centers
:
box_corners
.
append
(
[
center_y
-
box_size
/
2.0
,
center_x
-
box_size
/
2.0
,
center_y
+
box_size
/
2.0
,
center_x
+
box_size
/
2.0
])
box_labels
.
append
([
1
])
box_label_weights
.
append
([
1.
])
keypoints
.
append
(
[[
center_y
-
box_size
/
2.0
,
center_x
-
box_size
/
2.0
],
[
center_y
+
box_size
/
2.0
,
center_x
+
box_size
/
2.0
]])
masks
.
append
(
image
[:,
:,
0
].
reshape
(
512
,
256
))
image
=
tf
.
constant
(
image
)
boxes
=
tf
.
constant
(
box_corners
)
labels
=
tf
.
constant
(
box_labels
)
label_weights
=
tf
.
constant
(
box_label_weights
)
keypoints
=
tf
.
constant
(
keypoints
)
masks
=
tf
.
constant
(
np
.
stack
(
masks
))
(
new_image
,
new_boxes
,
_
,
_
,
new_masks
,
new_keypoints
)
=
preprocessor
.
random_scale_crop_and_pad_to_square
(
image
,
boxes
,
labels
,
label_weights
,
masks
=
masks
,
keypoints
=
keypoints
,
scale_min
=
scale
,
scale_max
=
scale
,
output_size
=
512
)
return
new_image
,
new_boxes
,
new_masks
,
new_keypoints
image
,
boxes
,
masks
,
keypoints
=
self
.
execute_cpu
(
graph_fn
,
[])
# Since random_scale_crop_and_pad_to_square may prune and clip boxes,
# we only need to find one of the boxes that was not clipped and check
# that it matches the expected dimensions. Note, assertAlmostEqual(a, b)
# is equivalent to round(a-b, 7) == 0.
any_box_has_correct_size
=
False
effective_scale_y
=
int
(
scale
*
512
)
/
512.0
effective_scale_x
=
int
(
scale
*
256
)
/
512.0
expected_size_y
=
0.1
*
effective_scale_y
expected_size_x
=
0.1
*
effective_scale_x
for
box
in
boxes
:
ymin
,
xmin
,
ymax
,
xmax
=
box
any_box_has_correct_size
|=
(
(
round
(
ymin
,
7
)
!=
0.0
)
and
(
round
(
xmin
,
7
)
!=
0.0
)
and
(
round
(
ymax
,
7
)
!=
1.0
)
and
(
round
(
xmax
,
7
)
!=
1.0
)
and
(
round
((
ymax
-
ymin
)
-
expected_size_y
,
7
)
==
0.0
)
and
(
round
((
xmax
-
xmin
)
-
expected_size_x
,
7
)
==
0.0
))
self
.
assertTrue
(
any_box_has_correct_size
)
# Similar to the approach above where we check for at least one box with the
# expected dimensions, we check for at least one pair of keypoints whose
# distance matches the expected dimensions.
any_keypoint_pair_has_correct_dist
=
False
for
keypoint_pair
in
keypoints
:
ymin
,
xmin
=
keypoint_pair
[
0
]
ymax
,
xmax
=
keypoint_pair
[
1
]
any_keypoint_pair_has_correct_dist
|=
(
(
round
(
ymin
,
7
)
!=
0.0
)
and
(
round
(
xmin
,
7
)
!=
0.0
)
and
(
round
(
ymax
,
7
)
!=
1.0
)
and
(
round
(
xmax
,
7
)
!=
1.0
)
and
(
round
((
ymax
-
ymin
)
-
expected_size_y
,
7
)
==
0.0
)
and
(
round
((
xmax
-
xmin
)
-
expected_size_x
,
7
)
==
0.0
))
self
.
assertTrue
(
any_keypoint_pair_has_correct_dist
)
self
.
assertAlmostEqual
(
512.0
,
image
.
shape
[
0
])
self
.
assertAlmostEqual
(
512.0
,
image
.
shape
[
1
])
self
.
assertAllClose
(
image
[:,
:,
0
],
masks
[
0
,
:,
:])
...
...
research/object_detection/core/standard_fields.py
View file @
31ca3b97
...
...
@@ -66,6 +66,11 @@ class InputDataFields(object):
groundtruth_keypoint_weights: groundtruth weight factor for keypoints.
groundtruth_label_weights: groundtruth label weights.
groundtruth_weights: groundtruth weight factor for bounding boxes.
groundtruth_dp_num_points: The number of DensePose sampled points for each
instance.
groundtruth_dp_part_ids: Part indices for DensePose points.
groundtruth_dp_surface_coords: Image locations and UV coordinates for
DensePose points.
num_groundtruth_boxes: number of groundtruth boxes.
is_annotated: whether an image has been labeled or not.
true_image_shapes: true shapes of images in the resized images, as resized
...
...
@@ -108,6 +113,9 @@ class InputDataFields(object):
groundtruth_keypoint_weights
=
'groundtruth_keypoint_weights'
groundtruth_label_weights
=
'groundtruth_label_weights'
groundtruth_weights
=
'groundtruth_weights'
groundtruth_dp_num_points
=
'groundtruth_dp_num_points'
groundtruth_dp_part_ids
=
'groundtruth_dp_part_ids'
groundtruth_dp_surface_coords
=
'groundtruth_dp_surface_coords'
num_groundtruth_boxes
=
'num_groundtruth_boxes'
is_annotated
=
'is_annotated'
true_image_shape
=
'true_image_shape'
...
...
@@ -133,6 +141,8 @@ class DetectionResultFields(object):
for detection boxes in the image including background class.
detection_classes: detection-level class labels.
detection_masks: contains a segmentation mask for each detection box.
detection_surface_coords: contains DensePose surface coordinates for each
box.
detection_boundaries: contains an object boundary for each detection box.
detection_keypoints: contains detection keypoints for each detection box.
detection_keypoint_scores: contains detection keypoint scores.
...
...
@@ -153,6 +163,7 @@ class DetectionResultFields(object):
detection_features
=
'detection_features'
detection_classes
=
'detection_classes'
detection_masks
=
'detection_masks'
detection_surface_coords
=
'detection_surface_coords'
detection_boundaries
=
'detection_boundaries'
detection_keypoints
=
'detection_keypoints'
detection_keypoint_scores
=
'detection_keypoint_scores'
...
...
@@ -174,7 +185,11 @@ class BoxListFields(object):
masks: masks per bounding box.
boundaries: boundaries per bounding box.
keypoints: keypoints per bounding box.
keypoint_visibilities: keypoint visibilities per bounding box.
keypoint_heatmaps: keypoint heatmaps per bounding box.
densepose_num_points: number of DensePose points per bounding box.
densepose_part_ids: DensePose part ids per bounding box.
densepose_surface_coords: DensePose surface coordinates per bounding box.
is_crowd: is_crowd annotation per bounding box.
"""
boxes
=
'boxes'
...
...
@@ -188,6 +203,9 @@ class BoxListFields(object):
keypoints
=
'keypoints'
keypoint_visibilities
=
'keypoint_visibilities'
keypoint_heatmaps
=
'keypoint_heatmaps'
densepose_num_points
=
'densepose_num_points'
densepose_part_ids
=
'densepose_part_ids'
densepose_surface_coords
=
'densepose_surface_coords'
is_crowd
=
'is_crowd'
group_of
=
'group_of'
...
...
research/object_detection/core/target_assigner.py
View file @
31ca3b97
...
...
@@ -45,6 +45,7 @@ from object_detection.box_coders import mean_stddev_box_coder
from
object_detection.core
import
box_coder
from
object_detection.core
import
box_list
from
object_detection.core
import
box_list_ops
from
object_detection.core
import
densepose_ops
from
object_detection.core
import
keypoint_ops
from
object_detection.core
import
matcher
as
mat
from
object_detection.core
import
region_similarity_calculator
as
sim_calc
...
...
@@ -799,17 +800,15 @@ def get_batch_predictions_from_indices(batch_predictions, indices):
function.
Args:
batch_predictions: A tensor of shape [batch_size, height, width, 2] for
single class offsets and [batch_size, height, width, class, 2] for
multiple classes offsets (e.g. keypoint joint offsets) representing the
(height, width) or (y_offset, x_offset) predictions over a batch.
indices: A tensor of shape [num_instances, 3] for single class offset and
[num_instances, 4] for multiple classes offsets representing the indices
in the batch to be penalized in a loss function
batch_predictions: A tensor of shape [batch_size, height, width, channels]
or [batch_size, height, width, class, channels] for class-specific
features (e.g. keypoint joint offsets).
indices: A tensor of shape [num_instances, 3] for single class features or
[num_instances, 4] for multiple classes features.
Returns:
values: A tensor of shape [num_instances,
2
] holding the predicted
values
at the given indices.
values: A tensor of shape [num_instances,
channels
] holding the predicted
values
at the given indices.
"""
return
tf
.
gather_nd
(
batch_predictions
,
indices
)
...
...
@@ -1601,6 +1600,17 @@ class CenterNetKeypointTargetAssigner(object):
return
(
batch_indices
,
batch_offsets
,
batch_weights
)
def
_resize_masks
(
masks
,
height
,
width
,
method
):
# Resize segmentation masks to conform to output dimensions. Use TF2
# image resize because TF1's version is buggy:
# https://yaqs.corp.google.com/eng/q/4970450458378240
masks
=
tf2
.
image
.
resize
(
masks
[:,
:,
:,
tf
.
newaxis
],
size
=
(
height
,
width
),
method
=
method
)
return
masks
[:,
:,
:,
0
]
class
CenterNetMaskTargetAssigner
(
object
):
"""Wrapper to compute targets for segmentation masks."""
...
...
@@ -1642,13 +1652,9 @@ class CenterNetMaskTargetAssigner(object):
segmentation_targets_list
=
[]
for
gt_masks
,
gt_classes
in
zip
(
gt_masks_list
,
gt_classes_list
):
# Resize segmentation masks to conform to output dimensions. Use TF2
# image resize because TF1's version is buggy:
# https://yaqs.corp.google.com/eng/q/4970450458378240
gt_masks
=
tf2
.
image
.
resize
(
gt_masks
[:,
:,
:,
tf
.
newaxis
],
size
=
(
output_height
,
output_width
),
method
=
mask_resize_method
)
gt_masks
=
_resize_masks
(
gt_masks
,
output_height
,
output_width
,
mask_resize_method
)
gt_masks
=
gt_masks
[:,
:,
:,
tf
.
newaxis
]
gt_classes_reshaped
=
tf
.
reshape
(
gt_classes
,
[
-
1
,
1
,
1
,
num_classes
])
# Shape: [h, w, num_classes].
segmentations_for_image
=
tf
.
reduce_max
(
...
...
@@ -1657,3 +1663,235 @@ class CenterNetMaskTargetAssigner(object):
segmentation_target
=
tf
.
stack
(
segmentation_targets_list
,
axis
=
0
)
return
segmentation_target
class
CenterNetDensePoseTargetAssigner
(
object
):
"""Wrapper to compute targets for DensePose task."""
def
__init__
(
self
,
stride
,
num_parts
=
24
):
self
.
_stride
=
stride
self
.
_num_parts
=
num_parts
def
assign_part_and_coordinate_targets
(
self
,
height
,
width
,
gt_dp_num_points_list
,
gt_dp_part_ids_list
,
gt_dp_surface_coords_list
,
gt_weights_list
=
None
):
"""Returns the DensePose part_id and coordinate targets and their indices.
The returned values are expected to be used with predicted tensors
of size (batch_size, height//self._stride, width//self._stride, 2). The
predicted values at the relevant indices can be retrieved with the
get_batch_predictions_from_indices function.
Args:
height: int, height of input to the model. This is used to determine the
height of the output.
width: int, width of the input to the model. This is used to determine the
width of the output.
gt_dp_num_points_list: a list of 1-D tf.int32 tensors of shape [num_boxes]
containing the number of DensePose sampled points per box.
gt_dp_part_ids_list: a list of 2-D tf.int32 tensors of shape
[num_boxes, max_sampled_points] containing the DensePose part ids
(0-indexed) for each sampled point. Note that there may be padding, as
boxes may contain a different number of sampled points.
gt_dp_surface_coords_list: a list of 3-D tf.float32 tensors of shape
[num_boxes, max_sampled_points, 4] containing the DensePose surface
coordinates (normalized) for each sampled point. Note that there may be
padding.
gt_weights_list: A list of 1-D tensors with shape [num_boxes]
corresponding to the weight of each groundtruth detection box.
Returns:
batch_indices: an integer tensor of shape [num_total_points, 4] holding
the indices inside the predicted tensor which should be penalized. The
first column indicates the index along the batch dimension and the
second and third columns indicate the index along the y and x
dimensions respectively. The fourth column is the part index.
batch_part_ids: an int tensor of shape [num_total_points, num_parts]
holding 1-hot encodings of parts for each sampled point.
batch_surface_coords: a float tensor of shape [num_total_points, 2]
holding the expected (v, u) coordinates for each sampled point.
batch_weights: a float tensor of shape [num_total_points] indicating the
weight of each prediction.
Note that num_total_points = batch_size * num_boxes * max_sampled_points.
"""
if
gt_weights_list
is
None
:
gt_weights_list
=
[
None
]
*
len
(
gt_dp_num_points_list
)
batch_indices
=
[]
batch_part_ids
=
[]
batch_surface_coords
=
[]
batch_weights
=
[]
for
i
,
(
num_points
,
part_ids
,
surface_coords
,
weights
)
in
enumerate
(
zip
(
gt_dp_num_points_list
,
gt_dp_part_ids_list
,
gt_dp_surface_coords_list
,
gt_weights_list
)):
num_boxes
,
max_sampled_points
=
(
shape_utils
.
combined_static_and_dynamic_shape
(
part_ids
))
part_ids_flattened
=
tf
.
reshape
(
part_ids
,
[
-
1
])
part_ids_one_hot
=
tf
.
one_hot
(
part_ids_flattened
,
depth
=
self
.
_num_parts
)
# Get DensePose coordinates in the output space.
surface_coords_abs
=
densepose_ops
.
to_absolute_coordinates
(
surface_coords
,
height
//
self
.
_stride
,
width
//
self
.
_stride
)
surface_coords_abs
=
tf
.
reshape
(
surface_coords_abs
,
[
-
1
,
4
])
# Each tensor has shape [num_boxes * max_sampled_points].
yabs
,
xabs
,
v
,
u
=
tf
.
unstack
(
surface_coords_abs
,
axis
=-
1
)
# Get the indices (in output space) for the DensePose coordinates. Note
# that if self._stride is larger than 1, this will have the effect of
# reducing spatial resolution of the groundtruth points.
indices_y
=
tf
.
cast
(
yabs
,
tf
.
int32
)
indices_x
=
tf
.
cast
(
xabs
,
tf
.
int32
)
# Assign ones if weights are not provided.
if
weights
is
None
:
weights
=
tf
.
ones
(
num_boxes
,
dtype
=
tf
.
float32
)
# Create per-point weights.
weights_per_point
=
tf
.
reshape
(
tf
.
tile
(
weights
[:,
tf
.
newaxis
],
multiples
=
[
1
,
max_sampled_points
]),
shape
=
[
-
1
])
# Mask out invalid (i.e. padded) DensePose points.
num_points_tiled
=
tf
.
tile
(
num_points
[:,
tf
.
newaxis
],
multiples
=
[
1
,
max_sampled_points
])
range_tiled
=
tf
.
tile
(
tf
.
range
(
max_sampled_points
)[
tf
.
newaxis
,
:],
multiples
=
[
num_boxes
,
1
])
valid_points
=
tf
.
math
.
less
(
range_tiled
,
num_points_tiled
)
valid_points
=
tf
.
cast
(
tf
.
reshape
(
valid_points
,
[
-
1
]),
dtype
=
tf
.
float32
)
weights_per_point
=
weights_per_point
*
valid_points
# Shape of [num_boxes * max_sampled_points] integer tensor filled with
# current batch index.
batch_index
=
i
*
tf
.
ones_like
(
indices_y
,
dtype
=
tf
.
int32
)
batch_indices
.
append
(
tf
.
stack
([
batch_index
,
indices_y
,
indices_x
,
part_ids_flattened
],
axis
=
1
))
batch_part_ids
.
append
(
part_ids_one_hot
)
batch_surface_coords
.
append
(
tf
.
stack
([
v
,
u
],
axis
=
1
))
batch_weights
.
append
(
weights_per_point
)
batch_indices
=
tf
.
concat
(
batch_indices
,
axis
=
0
)
batch_part_ids
=
tf
.
concat
(
batch_part_ids
,
axis
=
0
)
batch_surface_coords
=
tf
.
concat
(
batch_surface_coords
,
axis
=
0
)
batch_weights
=
tf
.
concat
(
batch_weights
,
axis
=
0
)
return
batch_indices
,
batch_part_ids
,
batch_surface_coords
,
batch_weights
def
filter_mask_overlap_min_area
(
masks
):
"""If a pixel belongs to 2 instances, remove it from the larger instance."""
num_instances
=
tf
.
shape
(
masks
)[
0
]
def
_filter_min_area
():
"""Helper function to filter non empty masks."""
areas
=
tf
.
reduce_sum
(
masks
,
axis
=
[
1
,
2
],
keepdims
=
True
)
per_pixel_area
=
masks
*
areas
# Make sure background is ignored in argmin.
per_pixel_area
=
(
masks
*
per_pixel_area
+
(
1
-
masks
)
*
per_pixel_area
.
dtype
.
max
)
min_index
=
tf
.
cast
(
tf
.
argmin
(
per_pixel_area
,
axis
=
0
),
tf
.
int32
)
filtered_masks
=
(
tf
.
range
(
num_instances
)[:,
tf
.
newaxis
,
tf
.
newaxis
]
==
min_index
[
tf
.
newaxis
,
:,
:]
)
return
tf
.
cast
(
filtered_masks
,
tf
.
float32
)
*
masks
return
tf
.
cond
(
num_instances
>
0
,
_filter_min_area
,
lambda
:
masks
)
def
filter_mask_overlap
(
masks
,
method
=
'min_area'
):
if
method
==
'min_area'
:
return
filter_mask_overlap_min_area
(
masks
)
else
:
raise
ValueError
(
'Unknown mask overlap filter type - {}'
.
format
(
method
))
class
CenterNetCornerOffsetTargetAssigner
(
object
):
"""Wrapper to compute corner offsets for boxes using masks."""
def
__init__
(
self
,
stride
,
overlap_resolution
=
'min_area'
):
"""Initializes the corner offset target assigner.
Args:
stride: int, the stride of the network in output pixels.
overlap_resolution: string, specifies how we handle overlapping
instance masks. Currently only 'min_area' is supported which assigns
overlapping pixels to the instance with the minimum area.
"""
self
.
_stride
=
stride
self
.
_overlap_resolution
=
overlap_resolution
def
assign_corner_offset_targets
(
self
,
gt_boxes_list
,
gt_masks_list
):
"""Computes the corner offset targets and foreground map.
For each pixel that is part of any object's foreground, this function
computes the relative offsets to the top-left and bottom-right corners of
that instance's bounding box. It also returns a foreground map to indicate
which pixels contain valid corner offsets.
Args:
gt_boxes_list: A list of float tensors with shape [num_boxes, 4]
representing the groundtruth detection bounding boxes for each sample in
the batch. The coordinates are expected in normalized coordinates.
gt_masks_list: A list of float tensors with shape [num_boxes,
input_height, input_width] with values in {0, 1} representing instance
masks for each object.
Returns:
corner_offsets: A float tensor of shape [batch_size, height, width, 4]
containing, in order, the (y, x) offsets to the top left corner and
the (y, x) offsets to the bottom right corner for each foregroung pixel
foreground: A float tensor of shape [batch_size, height, width] in which
each pixel is set to 1 if it is a part of any instance's foreground
(and thus contains valid corner offsets) and 0 otherwise.
"""
_
,
input_height
,
input_width
=
(
shape_utils
.
combined_static_and_dynamic_shape
(
gt_masks_list
[
0
]))
output_height
=
input_height
//
self
.
_stride
output_width
=
input_width
//
self
.
_stride
y_grid
,
x_grid
=
tf
.
meshgrid
(
tf
.
range
(
output_height
),
tf
.
range
(
output_width
),
indexing
=
'ij'
)
y_grid
,
x_grid
=
tf
.
cast
(
y_grid
,
tf
.
float32
),
tf
.
cast
(
x_grid
,
tf
.
float32
)
corner_targets
=
[]
foreground_targets
=
[]
for
gt_masks
,
gt_boxes
in
zip
(
gt_masks_list
,
gt_boxes_list
):
gt_masks
=
_resize_masks
(
gt_masks
,
output_height
,
output_width
,
method
=
ResizeMethod
.
NEAREST_NEIGHBOR
)
gt_masks
=
filter_mask_overlap
(
gt_masks
,
self
.
_overlap_resolution
)
ymin
,
xmin
,
ymax
,
xmax
=
tf
.
unstack
(
gt_boxes
,
axis
=
1
)
ymin
,
ymax
=
ymin
*
output_height
,
ymax
*
output_height
xmin
,
xmax
=
xmin
*
output_width
,
xmax
*
output_width
top_y
=
ymin
[:,
tf
.
newaxis
,
tf
.
newaxis
]
-
y_grid
[
tf
.
newaxis
]
left_x
=
xmin
[:,
tf
.
newaxis
,
tf
.
newaxis
]
-
x_grid
[
tf
.
newaxis
]
bottom_y
=
ymax
[:,
tf
.
newaxis
,
tf
.
newaxis
]
-
y_grid
[
tf
.
newaxis
]
right_x
=
xmax
[:,
tf
.
newaxis
,
tf
.
newaxis
]
-
x_grid
[
tf
.
newaxis
]
foreground_target
=
tf
.
cast
(
tf
.
reduce_sum
(
gt_masks
,
axis
=
0
)
>
0.5
,
tf
.
float32
)
foreground_targets
.
append
(
foreground_target
)
corner_target
=
tf
.
stack
([
tf
.
reduce_sum
(
top_y
*
gt_masks
,
axis
=
0
),
tf
.
reduce_sum
(
left_x
*
gt_masks
,
axis
=
0
),
tf
.
reduce_sum
(
bottom_y
*
gt_masks
,
axis
=
0
),
tf
.
reduce_sum
(
right_x
*
gt_masks
,
axis
=
0
),
],
axis
=
2
)
corner_targets
.
append
(
corner_target
)
return
(
tf
.
stack
(
corner_targets
,
axis
=
0
),
tf
.
stack
(
foreground_targets
,
axis
=
0
))
research/object_detection/core/target_assigner_test.py
View file @
31ca3b97
...
...
@@ -1906,6 +1906,274 @@ class CenterNetMaskTargetAssignerTest(test_case.TestCase):
expected_seg_target
,
segmentation_target
)
class
CenterNetDensePoseTargetAssignerTest
(
test_case
.
TestCase
):
def
test_assign_part_and_coordinate_targets
(
self
):
def
graph_fn
():
gt_dp_num_points_list
=
[
# Example 0.
tf
.
constant
([
2
,
0
,
3
],
dtype
=
tf
.
int32
),
# Example 1.
tf
.
constant
([
1
,
1
],
dtype
=
tf
.
int32
),
]
gt_dp_part_ids_list
=
[
# Example 0.
tf
.
constant
([[
1
,
6
,
0
],
[
0
,
0
,
0
],
[
0
,
2
,
3
]],
dtype
=
tf
.
int32
),
# Example 1.
tf
.
constant
([[
7
,
0
,
0
],
[
0
,
0
,
0
]],
dtype
=
tf
.
int32
),
]
gt_dp_surface_coords_list
=
[
# Example 0.
tf
.
constant
(
[[[
0.11
,
0.2
,
0.3
,
0.4
],
# Box 0.
[
0.6
,
0.4
,
0.1
,
0.0
],
[
0.0
,
0.0
,
0.0
,
0.0
]],
[[
0.0
,
0.0
,
0.0
,
0.0
],
# Box 1.
[
0.0
,
0.0
,
0.0
,
0.0
],
[
0.0
,
0.0
,
0.0
,
0.0
]],
[[
0.22
,
0.1
,
0.6
,
0.8
],
# Box 2.
[
0.0
,
0.4
,
0.5
,
1.0
],
[
0.3
,
0.2
,
0.4
,
0.1
]]],
dtype
=
tf
.
float32
),
# Example 1.
tf
.
constant
(
[[[
0.5
,
0.5
,
0.3
,
1.0
],
# Box 0.
[
0.0
,
0.0
,
0.0
,
0.0
],
[
0.0
,
0.0
,
0.0
,
0.0
]],
[[
0.2
,
0.2
,
0.5
,
0.8
],
# Box 1.
[
0.0
,
0.0
,
0.0
,
0.0
],
[
0.0
,
0.0
,
0.0
,
0.0
]]],
dtype
=
tf
.
float32
),
]
gt_weights_list
=
[
# Example 0.
tf
.
constant
([
1.0
,
1.0
,
0.5
],
dtype
=
tf
.
float32
),
# Example 1.
tf
.
constant
([
0.0
,
1.0
],
dtype
=
tf
.
float32
),
]
cn_assigner
=
targetassigner
.
CenterNetDensePoseTargetAssigner
(
stride
=
4
)
batch_indices
,
batch_part_ids
,
batch_surface_coords
,
batch_weights
=
(
cn_assigner
.
assign_part_and_coordinate_targets
(
height
=
120
,
width
=
80
,
gt_dp_num_points_list
=
gt_dp_num_points_list
,
gt_dp_part_ids_list
=
gt_dp_part_ids_list
,
gt_dp_surface_coords_list
=
gt_dp_surface_coords_list
,
gt_weights_list
=
gt_weights_list
))
return
batch_indices
,
batch_part_ids
,
batch_surface_coords
,
batch_weights
batch_indices
,
batch_part_ids
,
batch_surface_coords
,
batch_weights
=
(
self
.
execute
(
graph_fn
,
[]))
expected_batch_indices
=
np
.
array
([
# Example 0. e.g.
# The first set of indices is calculated as follows:
# floor(0.11*120/4) = 3, floor(0.2*80/4) = 4.
[
0
,
3
,
4
,
1
],
[
0
,
18
,
8
,
6
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
6
,
2
,
0
],
[
0
,
0
,
8
,
2
],
[
0
,
9
,
4
,
3
],
# Example 1.
[
1
,
15
,
10
,
7
],
[
1
,
0
,
0
,
0
],
[
1
,
0
,
0
,
0
],
[
1
,
6
,
4
,
0
],
[
1
,
0
,
0
,
0
],
[
1
,
0
,
0
,
0
]
],
dtype
=
np
.
int32
)
expected_batch_part_ids
=
tf
.
one_hot
(
[
1
,
6
,
0
,
0
,
0
,
0
,
0
,
2
,
3
,
7
,
0
,
0
,
0
,
0
,
0
],
depth
=
24
).
numpy
()
expected_batch_surface_coords
=
np
.
array
([
# Box 0.
[
0.3
,
0.4
],
[
0.1
,
0.0
],
[
0.0
,
0.0
],
[
0.0
,
0.0
],
[
0.0
,
0.0
],
[
0.0
,
0.0
],
[
0.6
,
0.8
],
[
0.5
,
1.0
],
[
0.4
,
0.1
],
# Box 1.
[
0.3
,
1.0
],
[
0.0
,
0.0
],
[
0.0
,
0.0
],
[
0.5
,
0.8
],
[
0.0
,
0.0
],
[
0.0
,
0.0
],
],
np
.
float32
)
expected_batch_weights
=
np
.
array
([
# Box 0.
1.0
,
1.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.5
,
0.5
,
0.5
,
# Box 1.
0.0
,
0.0
,
0.0
,
1.0
,
0.0
,
0.0
],
dtype
=
np
.
float32
)
self
.
assertAllEqual
(
expected_batch_indices
,
batch_indices
)
self
.
assertAllEqual
(
expected_batch_part_ids
,
batch_part_ids
)
self
.
assertAllClose
(
expected_batch_surface_coords
,
batch_surface_coords
)
self
.
assertAllClose
(
expected_batch_weights
,
batch_weights
)
class
CornerOffsetTargetAssignerTest
(
test_case
.
TestCase
):
def
test_filter_overlap_min_area_empty
(
self
):
"""Test that empty masks work on CPU."""
def
graph_fn
(
masks
):
return
targetassigner
.
filter_mask_overlap_min_area
(
masks
)
masks
=
self
.
execute_cpu
(
graph_fn
,
[
np
.
zeros
((
0
,
5
,
5
),
dtype
=
np
.
float32
)])
self
.
assertEqual
(
masks
.
shape
,
(
0
,
5
,
5
))
def
test_filter_overlap_min_area
(
self
):
"""Test the object with min. area is selected instead of overlap."""
def
graph_fn
(
masks
):
return
targetassigner
.
filter_mask_overlap_min_area
(
masks
)
masks
=
np
.
zeros
((
3
,
4
,
4
),
dtype
=
np
.
float32
)
masks
[
0
,
:
2
,
:
2
]
=
1.0
masks
[
1
,
:
3
,
:
3
]
=
1.0
masks
[
2
,
3
,
3
]
=
1.0
masks
=
self
.
execute
(
graph_fn
,
[
masks
])
self
.
assertAllClose
(
masks
[
0
],
[[
1
,
1
,
0
,
0
],
[
1
,
1
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]])
self
.
assertAllClose
(
masks
[
1
],
[[
0
,
0
,
1
,
0
],
[
0
,
0
,
1
,
0
],
[
1
,
1
,
1
,
0
],
[
0
,
0
,
0
,
0
]])
self
.
assertAllClose
(
masks
[
2
],
[[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
1
]])
def
test_assign_corner_offset_single_object
(
self
):
"""Test that corner offsets are correct with a single object."""
assigner
=
targetassigner
.
CenterNetCornerOffsetTargetAssigner
(
stride
=
1
)
def
graph_fn
():
boxes
=
[
tf
.
constant
([[
0.
,
0.
,
1.
,
1.
]])
]
mask
=
np
.
zeros
((
1
,
4
,
4
),
dtype
=
np
.
float32
)
mask
[
0
,
1
:
3
,
1
:
3
]
=
1.0
masks
=
[
tf
.
constant
(
mask
)]
return
assigner
.
assign_corner_offset_targets
(
boxes
,
masks
)
corner_offsets
,
foreground
=
self
.
execute
(
graph_fn
,
[])
self
.
assertAllClose
(
foreground
[
0
],
[[
0
,
0
,
0
,
0
],
[
0
,
1
,
1
,
0
],
[
0
,
1
,
1
,
0
],
[
0
,
0
,
0
,
0
]])
self
.
assertAllClose
(
corner_offsets
[
0
,
:,
:,
0
],
[[
0
,
0
,
0
,
0
],
[
0
,
-
1
,
-
1
,
0
],
[
0
,
-
2
,
-
2
,
0
],
[
0
,
0
,
0
,
0
]])
self
.
assertAllClose
(
corner_offsets
[
0
,
:,
:,
1
],
[[
0
,
0
,
0
,
0
],
[
0
,
-
1
,
-
2
,
0
],
[
0
,
-
1
,
-
2
,
0
],
[
0
,
0
,
0
,
0
]])
self
.
assertAllClose
(
corner_offsets
[
0
,
:,
:,
2
],
[[
0
,
0
,
0
,
0
],
[
0
,
3
,
3
,
0
],
[
0
,
2
,
2
,
0
],
[
0
,
0
,
0
,
0
]])
self
.
assertAllClose
(
corner_offsets
[
0
,
:,
:,
3
],
[[
0
,
0
,
0
,
0
],
[
0
,
3
,
2
,
0
],
[
0
,
3
,
2
,
0
],
[
0
,
0
,
0
,
0
]])
def
test_assign_corner_offset_multiple_objects
(
self
):
"""Test corner offsets are correct with multiple objects."""
assigner
=
targetassigner
.
CenterNetCornerOffsetTargetAssigner
(
stride
=
1
)
def
graph_fn
():
boxes
=
[
tf
.
constant
([[
0.
,
0.
,
1.
,
1.
],
[
0.
,
0.
,
0.
,
0.
]]),
tf
.
constant
([[
0.
,
0.
,
.
25
,
.
25
],
[.
25
,
.
25
,
1.
,
1.
]])
]
mask1
=
np
.
zeros
((
2
,
4
,
4
),
dtype
=
np
.
float32
)
mask1
[
0
,
0
,
0
]
=
1.0
mask1
[
0
,
3
,
3
]
=
1.0
mask2
=
np
.
zeros
((
2
,
4
,
4
),
dtype
=
np
.
float32
)
mask2
[
0
,
:
2
,
:
2
]
=
1.0
mask2
[
1
,
1
:,
1
:]
=
1.0
masks
=
[
tf
.
constant
(
mask1
),
tf
.
constant
(
mask2
)]
return
assigner
.
assign_corner_offset_targets
(
boxes
,
masks
)
corner_offsets
,
foreground
=
self
.
execute
(
graph_fn
,
[])
self
.
assertEqual
(
corner_offsets
.
shape
,
(
2
,
4
,
4
,
4
))
self
.
assertEqual
(
foreground
.
shape
,
(
2
,
4
,
4
))
self
.
assertAllClose
(
foreground
[
0
],
[[
1
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
1
]])
self
.
assertAllClose
(
corner_offsets
[
0
,
:,
:,
0
],
[[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
-
3
]])
self
.
assertAllClose
(
corner_offsets
[
0
,
:,
:,
1
],
[[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
-
3
]])
self
.
assertAllClose
(
corner_offsets
[
0
,
:,
:,
2
],
[[
4
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
1
]])
self
.
assertAllClose
(
corner_offsets
[
0
,
:,
:,
3
],
[[
4
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
1
]])
self
.
assertAllClose
(
foreground
[
1
],
[[
1
,
1
,
0
,
0
],
[
1
,
1
,
1
,
1
],
[
0
,
1
,
1
,
1
],
[
0
,
1
,
1
,
1
]])
self
.
assertAllClose
(
corner_offsets
[
1
,
:,
:,
0
],
[[
0
,
0
,
0
,
0
],
[
-
1
,
-
1
,
0
,
0
],
[
0
,
-
1
,
-
1
,
-
1
],
[
0
,
-
2
,
-
2
,
-
2
]])
self
.
assertAllClose
(
corner_offsets
[
1
,
:,
:,
1
],
[[
0
,
-
1
,
0
,
0
],
[
0
,
-
1
,
-
1
,
-
2
],
[
0
,
0
,
-
1
,
-
2
],
[
0
,
0
,
-
1
,
-
2
]])
self
.
assertAllClose
(
corner_offsets
[
1
,
:,
:,
2
],
[[
1
,
1
,
0
,
0
],
[
0
,
0
,
3
,
3
],
[
0
,
2
,
2
,
2
],
[
0
,
1
,
1
,
1
]])
self
.
assertAllClose
(
corner_offsets
[
1
,
:,
:,
3
],
[[
1
,
0
,
0
,
0
],
[
1
,
0
,
2
,
1
],
[
0
,
3
,
2
,
1
],
[
0
,
3
,
2
,
1
]])
def
test_assign_corner_offsets_no_objects
(
self
):
"""Test assignment works with empty input on cpu."""
assigner
=
targetassigner
.
CenterNetCornerOffsetTargetAssigner
(
stride
=
1
)
def
graph_fn
():
boxes
=
[
tf
.
zeros
((
0
,
4
),
dtype
=
tf
.
float32
)
]
masks
=
[
tf
.
zeros
((
0
,
5
,
5
),
dtype
=
tf
.
float32
)]
return
assigner
.
assign_corner_offset_targets
(
boxes
,
masks
)
corner_offsets
,
foreground
=
self
.
execute_cpu
(
graph_fn
,
[])
self
.
assertAllClose
(
corner_offsets
,
np
.
zeros
((
1
,
5
,
5
,
4
)))
self
.
assertAllClose
(
foreground
,
np
.
zeros
((
1
,
5
,
5
)))
if
__name__
==
'__main__'
:
tf
.
enable_v2_behavior
()
tf
.
test
.
main
()
research/object_detection/data_decoders/tf_example_decoder.py
View file @
31ca3b97
...
...
@@ -30,6 +30,7 @@ from object_detection.core import data_decoder
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.protos
import
input_reader_pb2
from
object_detection.utils
import
label_map_util
from
object_detection.utils
import
shape_utils
# pylint: disable=g-import-not-at-top
try
:
...
...
@@ -170,7 +171,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
num_additional_channels
=
0
,
load_multiclass_scores
=
False
,
load_context_features
=
False
,
expand_hierarchy_labels
=
False
):
expand_hierarchy_labels
=
False
,
load_dense_pose
=
False
):
"""Constructor sets keys_to_features and items_to_handlers.
Args:
...
...
@@ -201,6 +203,7 @@ class TfExampleDecoder(data_decoder.DataDecoder):
account the provided hierarchy in the label_map_proto_file. For positive
classes, the labels are extended to ancestor. For negative classes,
the labels are expanded to descendants.
load_dense_pose: Whether to load DensePose annotations.
Raises:
ValueError: If `instance_mask_type` option is not one of
...
...
@@ -371,6 +374,34 @@ class TfExampleDecoder(data_decoder.DataDecoder):
self
.
_decode_png_instance_masks
))
else
:
raise
ValueError
(
'Did not recognize the `instance_mask_type` option.'
)
if
load_dense_pose
:
self
.
keys_to_features
[
'image/object/densepose/num'
]
=
(
tf
.
VarLenFeature
(
tf
.
int64
))
self
.
keys_to_features
[
'image/object/densepose/part_index'
]
=
(
tf
.
VarLenFeature
(
tf
.
int64
))
self
.
keys_to_features
[
'image/object/densepose/x'
]
=
(
tf
.
VarLenFeature
(
tf
.
float32
))
self
.
keys_to_features
[
'image/object/densepose/y'
]
=
(
tf
.
VarLenFeature
(
tf
.
float32
))
self
.
keys_to_features
[
'image/object/densepose/u'
]
=
(
tf
.
VarLenFeature
(
tf
.
float32
))
self
.
keys_to_features
[
'image/object/densepose/v'
]
=
(
tf
.
VarLenFeature
(
tf
.
float32
))
self
.
items_to_handlers
[
fields
.
InputDataFields
.
groundtruth_dp_num_points
]
=
(
slim_example_decoder
.
Tensor
(
'image/object/densepose/num'
))
self
.
items_to_handlers
[
fields
.
InputDataFields
.
groundtruth_dp_part_ids
]
=
(
slim_example_decoder
.
ItemHandlerCallback
(
[
'image/object/densepose/part_index'
,
'image/object/densepose/num'
],
self
.
_dense_pose_part_indices
))
self
.
items_to_handlers
[
fields
.
InputDataFields
.
groundtruth_dp_surface_coords
]
=
(
slim_example_decoder
.
ItemHandlerCallback
(
[
'image/object/densepose/x'
,
'image/object/densepose/y'
,
'image/object/densepose/u'
,
'image/object/densepose/v'
,
'image/object/densepose/num'
],
self
.
_dense_pose_surface_coordinates
))
if
label_map_proto_file
:
# If the label_map_proto is provided, try to use it in conjunction with
# the class text, and fall back to a materialized ID.
...
...
@@ -547,6 +578,14 @@ class TfExampleDecoder(data_decoder.DataDecoder):
group_of
=
fields
.
InputDataFields
.
groundtruth_group_of
tensor_dict
[
group_of
]
=
tf
.
cast
(
tensor_dict
[
group_of
],
dtype
=
tf
.
bool
)
if
fields
.
InputDataFields
.
groundtruth_dp_num_points
in
tensor_dict
:
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_dp_num_points
]
=
tf
.
cast
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_dp_num_points
],
dtype
=
tf
.
int32
)
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_dp_part_ids
]
=
tf
.
cast
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_dp_part_ids
],
dtype
=
tf
.
int32
)
return
tensor_dict
def
_reshape_keypoints
(
self
,
keys_to_tensors
):
...
...
@@ -697,6 +736,97 @@ class TfExampleDecoder(data_decoder.DataDecoder):
lambda
:
tf
.
map_fn
(
decode_png_mask
,
png_masks
,
dtype
=
tf
.
float32
),
lambda
:
tf
.
zeros
(
tf
.
cast
(
tf
.
stack
([
0
,
height
,
width
]),
dtype
=
tf
.
int32
)))
def
_dense_pose_part_indices
(
self
,
keys_to_tensors
):
"""Creates a tensor that contains part indices for each DensePose point.
Args:
keys_to_tensors: a dictionary from keys to tensors.
Returns:
A 2-D int32 tensor of shape [num_instances, num_points] where each element
contains the DensePose part index (0-23). The value `num_points`
corresponds to the maximum number of sampled points across all instances
in the image. Note that instances with less sampled points will be padded
with zeros in the last dimension.
"""
num_points_per_instances
=
keys_to_tensors
[
'image/object/densepose/num'
]
part_index
=
keys_to_tensors
[
'image/object/densepose/part_index'
]
if
isinstance
(
num_points_per_instances
,
tf
.
SparseTensor
):
num_points_per_instances
=
tf
.
sparse_tensor_to_dense
(
num_points_per_instances
)
if
isinstance
(
part_index
,
tf
.
SparseTensor
):
part_index
=
tf
.
sparse_tensor_to_dense
(
part_index
)
part_index
=
tf
.
cast
(
part_index
,
dtype
=
tf
.
int32
)
max_points_per_instance
=
tf
.
cast
(
tf
.
math
.
reduce_max
(
num_points_per_instances
),
dtype
=
tf
.
int32
)
num_points_cumulative
=
tf
.
concat
([
[
0
],
tf
.
math
.
cumsum
(
num_points_per_instances
)],
axis
=
0
)
def
pad_parts_tensor
(
instance_ind
):
points_range_start
=
num_points_cumulative
[
instance_ind
]
points_range_end
=
num_points_cumulative
[
instance_ind
+
1
]
part_inds
=
part_index
[
points_range_start
:
points_range_end
]
return
shape_utils
.
pad_or_clip_nd
(
part_inds
,
output_shape
=
[
max_points_per_instance
])
return
tf
.
map_fn
(
pad_parts_tensor
,
tf
.
range
(
tf
.
size
(
num_points_per_instances
)),
dtype
=
tf
.
int32
)
def
_dense_pose_surface_coordinates
(
self
,
keys_to_tensors
):
"""Creates a tensor that contains surface coords for each DensePose point.
Args:
keys_to_tensors: a dictionary from keys to tensors.
Returns:
A 3-D float32 tensor of shape [num_instances, num_points, 4] where each
point contains (y, x, v, u) data for each sampled DensePose point. The
(y, x) coordinate has normalized image locations for the point, and (v, u)
contains the surface coordinate (also normalized) for the part. The value
`num_points` corresponds to the maximum number of sampled points across
all instances in the image. Note that instances with less sampled points
will be padded with zeros in dim=1.
"""
num_points_per_instances
=
keys_to_tensors
[
'image/object/densepose/num'
]
dp_y
=
keys_to_tensors
[
'image/object/densepose/y'
]
dp_x
=
keys_to_tensors
[
'image/object/densepose/x'
]
dp_v
=
keys_to_tensors
[
'image/object/densepose/v'
]
dp_u
=
keys_to_tensors
[
'image/object/densepose/u'
]
if
isinstance
(
num_points_per_instances
,
tf
.
SparseTensor
):
num_points_per_instances
=
tf
.
sparse_tensor_to_dense
(
num_points_per_instances
)
if
isinstance
(
dp_y
,
tf
.
SparseTensor
):
dp_y
=
tf
.
sparse_tensor_to_dense
(
dp_y
)
if
isinstance
(
dp_x
,
tf
.
SparseTensor
):
dp_x
=
tf
.
sparse_tensor_to_dense
(
dp_x
)
if
isinstance
(
dp_v
,
tf
.
SparseTensor
):
dp_v
=
tf
.
sparse_tensor_to_dense
(
dp_v
)
if
isinstance
(
dp_u
,
tf
.
SparseTensor
):
dp_u
=
tf
.
sparse_tensor_to_dense
(
dp_u
)
max_points_per_instance
=
tf
.
cast
(
tf
.
math
.
reduce_max
(
num_points_per_instances
),
dtype
=
tf
.
int32
)
num_points_cumulative
=
tf
.
concat
([
[
0
],
tf
.
math
.
cumsum
(
num_points_per_instances
)],
axis
=
0
)
def
pad_surface_coordinates_tensor
(
instance_ind
):
"""Pads DensePose surface coordinates for each instance."""
points_range_start
=
num_points_cumulative
[
instance_ind
]
points_range_end
=
num_points_cumulative
[
instance_ind
+
1
]
y
=
dp_y
[
points_range_start
:
points_range_end
]
x
=
dp_x
[
points_range_start
:
points_range_end
]
v
=
dp_v
[
points_range_start
:
points_range_end
]
u
=
dp_u
[
points_range_start
:
points_range_end
]
# Create [num_points_i, 4] tensor, where num_points_i is the number of
# sampled points for instance i.
unpadded_tensor
=
tf
.
stack
([
y
,
x
,
v
,
u
],
axis
=
1
)
return
shape_utils
.
pad_or_clip_nd
(
unpadded_tensor
,
output_shape
=
[
max_points_per_instance
,
4
])
return
tf
.
map_fn
(
pad_surface_coordinates_tensor
,
tf
.
range
(
tf
.
size
(
num_points_per_instances
)),
dtype
=
tf
.
float32
)
def
_expand_image_label_hierarchy
(
self
,
image_classes
,
image_confidences
):
"""Expand image level labels according to the hierarchy.
...
...
research/object_detection/data_decoders/tf_example_decoder_test.py
View file @
31ca3b97
...
...
@@ -1096,8 +1096,8 @@ class TfExampleDecoderTest(test_case.TestCase):
return
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
tensor_dict
=
self
.
execute_cpu
(
graph_fn
,
[])
self
.
assert
True
(
fields
.
InputDataFields
.
groundtruth_instance_masks
not
in
tensor_dict
)
self
.
assert
NotIn
(
fields
.
InputDataFields
.
groundtruth_instance_masks
,
tensor_dict
)
def
testDecodeImageLabels
(
self
):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
...
...
@@ -1116,8 +1116,7 @@ class TfExampleDecoderTest(test_case.TestCase):
return
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
tensor_dict
=
self
.
execute_cpu
(
graph_fn_1
,
[])
self
.
assertTrue
(
fields
.
InputDataFields
.
groundtruth_image_classes
in
tensor_dict
)
self
.
assertIn
(
fields
.
InputDataFields
.
groundtruth_image_classes
,
tensor_dict
)
self
.
assertAllEqual
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_image_classes
],
np
.
array
([
1
,
2
]))
...
...
@@ -1152,8 +1151,7 @@ class TfExampleDecoderTest(test_case.TestCase):
return
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
tensor_dict
=
self
.
execute_cpu
(
graph_fn_2
,
[])
self
.
assertTrue
(
fields
.
InputDataFields
.
groundtruth_image_classes
in
tensor_dict
)
self
.
assertIn
(
fields
.
InputDataFields
.
groundtruth_image_classes
,
tensor_dict
)
self
.
assertAllEqual
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_image_classes
],
np
.
array
([
1
,
3
]))
...
...
@@ -1345,6 +1343,93 @@ class TfExampleDecoderTest(test_case.TestCase):
expected_image_confidence
,
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_image_confidences
])
def
testDecodeDensePose
(
self
):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
,
_
=
self
.
_create_encoded_and_decoded_data
(
image_tensor
,
'jpeg'
)
bbox_ymins
=
[
0.0
,
4.0
,
2.0
]
bbox_xmins
=
[
1.0
,
5.0
,
8.0
]
bbox_ymaxs
=
[
2.0
,
6.0
,
1.0
]
bbox_xmaxs
=
[
3.0
,
7.0
,
3.3
]
densepose_num
=
[
0
,
4
,
2
]
densepose_part_index
=
[
2
,
2
,
3
,
4
,
2
,
9
]
densepose_x
=
[
0.1
,
0.2
,
0.3
,
0.4
,
0.5
,
0.6
]
densepose_y
=
[
0.9
,
0.8
,
0.7
,
0.6
,
0.5
,
0.4
]
densepose_u
=
[
0.01
,
0.02
,
0.03
,
0.04
,
0.05
,
0.06
]
densepose_v
=
[
0.99
,
0.98
,
0.97
,
0.96
,
0.95
,
0.94
]
def
graph_fn
():
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
dataset_util
.
bytes_feature
(
encoded_jpeg
),
'image/format'
:
dataset_util
.
bytes_feature
(
six
.
b
(
'jpeg'
)),
'image/object/bbox/ymin'
:
dataset_util
.
float_list_feature
(
bbox_ymins
),
'image/object/bbox/xmin'
:
dataset_util
.
float_list_feature
(
bbox_xmins
),
'image/object/bbox/ymax'
:
dataset_util
.
float_list_feature
(
bbox_ymaxs
),
'image/object/bbox/xmax'
:
dataset_util
.
float_list_feature
(
bbox_xmaxs
),
'image/object/densepose/num'
:
dataset_util
.
int64_list_feature
(
densepose_num
),
'image/object/densepose/part_index'
:
dataset_util
.
int64_list_feature
(
densepose_part_index
),
'image/object/densepose/x'
:
dataset_util
.
float_list_feature
(
densepose_x
),
'image/object/densepose/y'
:
dataset_util
.
float_list_feature
(
densepose_y
),
'image/object/densepose/u'
:
dataset_util
.
float_list_feature
(
densepose_u
),
'image/object/densepose/v'
:
dataset_util
.
float_list_feature
(
densepose_v
),
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
(
load_dense_pose
=
True
)
output
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
dp_num_points
=
output
[
fields
.
InputDataFields
.
groundtruth_dp_num_points
]
dp_part_ids
=
output
[
fields
.
InputDataFields
.
groundtruth_dp_part_ids
]
dp_surface_coords
=
output
[
fields
.
InputDataFields
.
groundtruth_dp_surface_coords
]
return
dp_num_points
,
dp_part_ids
,
dp_surface_coords
dp_num_points
,
dp_part_ids
,
dp_surface_coords
=
self
.
execute_cpu
(
graph_fn
,
[])
expected_dp_num_points
=
[
0
,
4
,
2
]
expected_dp_part_ids
=
[
[
0
,
0
,
0
,
0
],
[
2
,
2
,
3
,
4
],
[
2
,
9
,
0
,
0
]
]
expected_dp_surface_coords
=
np
.
array
(
[
# Instance 0 (no points).
[[
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
]],
# Instance 1 (4 points).
[[
0.9
,
0.1
,
0.99
,
0.01
],
[
0.8
,
0.2
,
0.98
,
0.02
],
[
0.7
,
0.3
,
0.97
,
0.03
],
[
0.6
,
0.4
,
0.96
,
0.04
]],
# Instance 2 (2 points).
[[
0.5
,
0.5
,
0.95
,
0.05
],
[
0.4
,
0.6
,
0.94
,
0.06
],
[
0.
,
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
,
0.
]],
],
dtype
=
np
.
float32
)
self
.
assertAllEqual
(
dp_num_points
,
expected_dp_num_points
)
self
.
assertAllEqual
(
dp_part_ids
,
expected_dp_part_ids
)
self
.
assertAllClose
(
dp_surface_coords
,
expected_dp_surface_coords
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/object_detection/dataset_tools/context_rcnn/add_context_to_examples.py
View file @
31ca3b97
...
...
@@ -43,70 +43,22 @@ from __future__ import absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
argparse
import
copy
import
datetime
import
io
import
itertools
import
json
import
os
from
absl
import
app
from
absl
import
flags
import
apache_beam
as
beam
import
numpy
as
np
import
PIL.Image
import
six
import
tensorflow
as
tf
from
apache_beam
import
runners
flags
.
DEFINE_string
(
'input_tfrecord'
,
None
,
'TFRecord containing images in '
'tf.Example format for object detection, with bounding'
'boxes and contextual feature embeddings.'
)
flags
.
DEFINE_string
(
'output_tfrecord'
,
None
,
'TFRecord containing images in tf.Example format, with '
'added contextual memory banks.'
)
flags
.
DEFINE_string
(
'sequence_key'
,
None
,
'Key to use when grouping sequences: '
'so far supports `image/seq_id` and `image/location`.'
)
flags
.
DEFINE_string
(
'time_horizon'
,
None
,
'What time horizon to use when '
'splitting the data, if any. Options are: `year`, `month`,'
' `week`, `day `, `hour`, `minute`, `None`.'
)
flags
.
DEFINE_integer
(
'subsample_context_features_rate'
,
0
,
'Whether to '
'subsample the context_features, and if so how many to '
'sample. If the rate is set to X, it will sample context '
'from 1 out of every X images. Default is sampling from '
'every image, which is X=0.'
)
flags
.
DEFINE_boolean
(
'reduce_image_size'
,
True
,
'downsamples images to'
'have longest side max_image_dimension, maintaining aspect'
' ratio'
)
flags
.
DEFINE_integer
(
'max_image_dimension'
,
1024
,
'sets max image dimension'
)
flags
.
DEFINE_boolean
(
'add_context_features'
,
True
,
'adds a memory bank of'
'embeddings to each clip'
)
flags
.
DEFINE_boolean
(
'sorted_image_ids'
,
True
,
'whether the image source_ids '
'are sortable to deal with date_captured tie-breaks'
)
flags
.
DEFINE_string
(
'image_ids_to_keep'
,
'All'
,
'path to .json list of image'
'ids to keep, used for ground truth eval creation'
)
flags
.
DEFINE_boolean
(
'keep_context_features_image_id_list'
,
False
,
'Whether or '
'not to keep a list of the image_ids corresponding to the '
'memory bank'
)
flags
.
DEFINE_boolean
(
'keep_only_positives'
,
False
,
'Whether or not to '
'keep only positive boxes based on score'
)
flags
.
DEFINE_boolean
(
'keep_only_positives_gt'
,
False
,
'Whether or not to '
'keep only positive boxes based on gt class'
)
flags
.
DEFINE_float
(
'context_features_score_threshold'
,
0.7
,
'What score '
'threshold to use for boxes in context_features'
)
flags
.
DEFINE_integer
(
'max_num_elements_in_context_features'
,
2000
,
'Sets max '
'num elements per memory bank'
)
flags
.
DEFINE_integer
(
'num_shards'
,
0
,
'Number of output shards.'
)
flags
.
DEFINE_string
(
'output_type'
,
'tf_sequence_example'
,
'Output type, one of '
'`tf_example`, `tf_sequence_example`'
)
flags
.
DEFINE_integer
(
'max_clip_length'
,
None
,
'Max length for sequence '
'example outputs.'
)
FLAGS
=
flags
.
FLAGS
DEFAULT_FEATURE_LENGTH
=
2057
import
tensorflow.compat.v1
as
tf
try
:
import
apache_beam
as
beam
# pylint:disable=g-import-not-at-top
except
ModuleNotFoundError
:
pass
class
ReKeyDataFn
(
beam
.
DoFn
):
...
...
@@ -406,7 +358,8 @@ class GenerateContextFn(beam.DoFn):
keep_only_positives_gt
=
False
,
max_num_elements_in_context_features
=
5000
,
pad_context_features
=
False
,
output_type
=
'tf_example'
,
max_clip_length
=
None
):
output_type
=
'tf_example'
,
max_clip_length
=
None
,
context_feature_length
=
2057
):
"""Initialization function.
Args:
...
...
@@ -432,6 +385,8 @@ class GenerateContextFn(beam.DoFn):
output_type: What type of output, tf_example of tf_sequence_example
max_clip_length: The maximum length of a sequence example, before
splitting into multiple
context_feature_length: The length of the context feature embeddings
stored in the input data.
"""
self
.
_session
=
None
self
.
_num_examples_processed
=
beam
.
metrics
.
Metrics
.
counter
(
...
...
@@ -456,6 +411,7 @@ class GenerateContextFn(beam.DoFn):
self
.
_context_features_score_threshold
=
context_features_score_threshold
self
.
_max_num_elements_in_context_features
=
(
max_num_elements_in_context_features
)
self
.
_context_feature_length
=
context_feature_length
self
.
_images_kept
=
beam
.
metrics
.
Metrics
.
counter
(
'sequence_data_generation'
,
'images_kept'
)
...
...
@@ -506,9 +462,9 @@ class GenerateContextFn(beam.DoFn):
context_features_image_id_list
.
append
(
example_image_id
)
if
not
example_embedding
:
example_embedding
.
append
(
np
.
zeros
(
DEFAULT_FEATURE_LENGTH
))
example_embedding
.
append
(
np
.
zeros
(
self
.
_context_feature_length
))
feature_length
=
DEFAULT_FEATURE_LENGTH
feature_length
=
self
.
_context_feature_length
# If the example_list is not empty and image/embedding_length is in the
# featture dict, feature_length will be assigned to that. Otherwise, it will
...
...
@@ -703,7 +659,8 @@ class GenerateContextFn(beam.DoFn):
return
list_of_examples
def
construct_pipeline
(
input_tfrecord
,
def
construct_pipeline
(
pipeline
,
input_tfrecord
,
output_tfrecord
,
sequence_key
,
time_horizon
=
None
,
...
...
@@ -720,10 +677,12 @@ def construct_pipeline(input_tfrecord,
max_num_elements_in_context_features
=
5000
,
num_shards
=
0
,
output_type
=
'tf_example'
,
max_clip_length
=
None
):
max_clip_length
=
None
,
context_feature_length
=
2057
):
"""Returns a beam pipeline to run object detection inference.
Args:
pipeline: Initialized beam pipeline.
input_tfrecord: An TFRecord of tf.train.Example protos containing images.
output_tfrecord: An TFRecord of tf.train.Example protos that contain images
in the input TFRecord and the detections from the model.
...
...
@@ -755,91 +714,224 @@ def construct_pipeline(input_tfrecord,
output_type: What type of output, tf_example of tf_sequence_example
max_clip_length: The maximum length of a sequence example, before
splitting into multiple
context_feature_length: The length of the context feature embeddings stored
in the input data.
"""
def
pipeline
(
root
):
if
output_type
==
'tf_example'
:
coder
=
beam
.
coders
.
ProtoCoder
(
tf
.
train
.
Example
)
elif
output_type
==
'tf_sequence_example'
:
coder
=
beam
.
coders
.
ProtoCoder
(
tf
.
train
.
SequenceExample
)
else
:
raise
ValueError
(
'Unsupported output type.'
)
input_collection
=
(
root
|
'ReadInputTFRecord'
>>
beam
.
io
.
tfrecordio
.
ReadFromTFRecord
(
input_tfrecord
,
coder
=
beam
.
coders
.
BytesCoder
()))
rekey_collection
=
input_collection
|
'RekeyExamples'
>>
beam
.
ParDo
(
ReKeyDataFn
(
sequence_key
,
time_horizon
,
reduce_image_size
,
max_image_dimension
))
grouped_collection
=
(
rekey_collection
|
'GroupBySequenceKey'
>>
beam
.
GroupByKey
())
grouped_collection
=
(
grouped_collection
|
'ReshuffleGroups'
>>
beam
.
Reshuffle
())
ordered_collection
=
(
grouped_collection
|
'OrderByFrameNumber'
>>
beam
.
ParDo
(
SortGroupedDataFn
(
sequence_key
,
sorted_image_ids
,
max_num_elements_in_context_features
)))
ordered_collection
=
(
ordered_collection
|
'ReshuffleSortedGroups'
>>
beam
.
Reshuffle
())
output_collection
=
(
ordered_collection
|
'AddContextToExamples'
>>
beam
.
ParDo
(
GenerateContextFn
(
sequence_key
,
add_context_features
,
image_ids_to_keep
,
keep_context_features_image_id_list
=
(
keep_context_features_image_id_list
),
subsample_context_features_rate
=
subsample_context_features_rate
,
keep_only_positives
=
keep_only_positives
,
keep_only_positives_gt
=
keep_only_positives_gt
,
context_features_score_threshold
=
(
context_features_score_threshold
),
max_num_elements_in_context_features
=
(
max_num_elements_in_context_features
),
output_type
=
output_type
,
max_clip_length
=
max_clip_length
)))
output_collection
=
(
output_collection
|
'ReshuffleExamples'
>>
beam
.
Reshuffle
())
_
=
output_collection
|
'WritetoDisk'
>>
beam
.
io
.
tfrecordio
.
WriteToTFRecord
(
output_tfrecord
,
num_shards
=
num_shards
,
coder
=
coder
)
return
pipeline
def
main
(
_
):
"""Runs the Beam pipeline that builds context features.
if
output_type
==
'tf_example'
:
coder
=
beam
.
coders
.
ProtoCoder
(
tf
.
train
.
Example
)
elif
output_type
==
'tf_sequence_example'
:
coder
=
beam
.
coders
.
ProtoCoder
(
tf
.
train
.
SequenceExample
)
else
:
raise
ValueError
(
'Unsupported output type.'
)
input_collection
=
(
pipeline
|
'ReadInputTFRecord'
>>
beam
.
io
.
tfrecordio
.
ReadFromTFRecord
(
input_tfrecord
,
coder
=
beam
.
coders
.
BytesCoder
()))
rekey_collection
=
input_collection
|
'RekeyExamples'
>>
beam
.
ParDo
(
ReKeyDataFn
(
sequence_key
,
time_horizon
,
reduce_image_size
,
max_image_dimension
))
grouped_collection
=
(
rekey_collection
|
'GroupBySequenceKey'
>>
beam
.
GroupByKey
())
grouped_collection
=
(
grouped_collection
|
'ReshuffleGroups'
>>
beam
.
Reshuffle
())
ordered_collection
=
(
grouped_collection
|
'OrderByFrameNumber'
>>
beam
.
ParDo
(
SortGroupedDataFn
(
sequence_key
,
sorted_image_ids
,
max_num_elements_in_context_features
)))
ordered_collection
=
(
ordered_collection
|
'ReshuffleSortedGroups'
>>
beam
.
Reshuffle
())
output_collection
=
(
ordered_collection
|
'AddContextToExamples'
>>
beam
.
ParDo
(
GenerateContextFn
(
sequence_key
,
add_context_features
,
image_ids_to_keep
,
keep_context_features_image_id_list
=
(
keep_context_features_image_id_list
),
subsample_context_features_rate
=
subsample_context_features_rate
,
keep_only_positives
=
keep_only_positives
,
keep_only_positives_gt
=
keep_only_positives_gt
,
context_features_score_threshold
=
(
context_features_score_threshold
),
max_num_elements_in_context_features
=
(
max_num_elements_in_context_features
),
output_type
=
output_type
,
max_clip_length
=
max_clip_length
,
context_feature_length
=
context_feature_length
)))
output_collection
=
(
output_collection
|
'ReshuffleExamples'
>>
beam
.
Reshuffle
())
_
=
output_collection
|
'WritetoDisk'
>>
beam
.
io
.
tfrecordio
.
WriteToTFRecord
(
output_tfrecord
,
num_shards
=
num_shards
,
coder
=
coder
)
def
parse_args
(
argv
):
"""Command-line argument parser.
Args:
_: unused
argv: command line arguments
Returns:
beam_args: Arguments for the beam pipeline.
pipeline_args: Arguments for the pipeline options, such as runner type.
"""
# must create before flags are used
runner
=
runners
.
DirectRunner
()
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--input_tfrecord'
,
dest
=
'input_tfrecord'
,
required
=
True
,
help
=
'TFRecord containing images in tf.Example format for object '
'detection, with bounding boxes and contextual feature embeddings.'
)
parser
.
add_argument
(
'--output_tfrecord'
,
dest
=
'output_tfrecord'
,
required
=
True
,
help
=
'TFRecord containing images in tf.Example format, with added '
'contextual memory banks.'
)
parser
.
add_argument
(
'--sequence_key'
,
dest
=
'sequence_key'
,
default
=
'image/location'
,
help
=
'Key to use when grouping sequences: so far supports `image/seq_id` '
'and `image/location`.'
)
parser
.
add_argument
(
'--context_feature_length'
,
dest
=
'context_feature_length'
,
default
=
2057
,
help
=
'The length of the context feature embeddings stored in the input '
'data.'
)
parser
.
add_argument
(
'--time_horizon'
,
dest
=
'time_horizon'
,
default
=
None
,
help
=
'What time horizon to use when splitting the data, if any. Options '
'are: `year`, `month`, `week`, `day `, `hour`, `minute`, `None`.'
)
parser
.
add_argument
(
'--subsample_context_features_rate'
,
dest
=
'subsample_context_features_rate'
,
default
=
0
,
help
=
'Whether to subsample the context_features, and if so how many to '
'sample. If the rate is set to X, it will sample context from 1 out of '
'every X images. Default is sampling from every image, which is X=0.'
)
parser
.
add_argument
(
'--reduce_image_size'
,
dest
=
'reduce_image_size'
,
default
=
True
,
help
=
'downsamples images to have longest side max_image_dimension, '
'maintaining aspect ratio'
)
parser
.
add_argument
(
'--max_image_dimension'
,
dest
=
'max_image_dimension'
,
default
=
1024
,
help
=
'Sets max image dimension for resizing.'
)
parser
.
add_argument
(
'--add_context_features'
,
dest
=
'add_context_features'
,
default
=
True
,
help
=
'Adds a memory bank of embeddings to each clip'
)
parser
.
add_argument
(
'--sorted_image_ids'
,
dest
=
'sorted_image_ids'
,
default
=
True
,
help
=
'Whether the image source_ids are sortable to deal with '
'date_captured tie-breaks.'
)
parser
.
add_argument
(
'--image_ids_to_keep'
,
dest
=
'image_ids_to_keep'
,
default
=
'All'
,
help
=
'Path to .json list of image ids to keep, used for ground truth '
'eval creation.'
)
parser
.
add_argument
(
'--keep_context_features_image_id_list'
,
dest
=
'keep_context_features_image_id_list'
,
default
=
False
,
help
=
'Whether or not to keep a list of the image_ids corresponding to '
'the memory bank.'
)
parser
.
add_argument
(
'--keep_only_positives'
,
dest
=
'keep_only_positives'
,
default
=
False
,
help
=
'Whether or not to keep only positive boxes based on score.'
)
parser
.
add_argument
(
'--context_features_score_threshold'
,
dest
=
'context_features_score_threshold'
,
default
=
0.7
,
help
=
'What score threshold to use for boxes in context_features, when '
'`keep_only_positives` is set to `True`.'
)
parser
.
add_argument
(
'--keep_only_positives_gt'
,
dest
=
'keep_only_positives_gt'
,
default
=
False
,
help
=
'Whether or not to keep only positive boxes based on gt class.'
)
parser
.
add_argument
(
'--max_num_elements_in_context_features'
,
dest
=
'max_num_elements_in_context_features'
,
default
=
2000
,
help
=
'Sets max number of context feature elements per memory bank. '
'If the number of images in the context group is greater than '
'`max_num_elements_in_context_features`, the context group will be split.'
)
parser
.
add_argument
(
'--output_type'
,
dest
=
'output_type'
,
default
=
'tf_example'
,
help
=
'Output type, one of `tf_example`, `tf_sequence_example`.'
)
parser
.
add_argument
(
'--max_clip_length'
,
dest
=
'max_clip_length'
,
default
=
None
,
help
=
'Max length for sequence example outputs.'
)
parser
.
add_argument
(
'--num_shards'
,
dest
=
'num_shards'
,
default
=
0
,
help
=
'Number of output shards.'
)
beam_args
,
pipeline_args
=
parser
.
parse_known_args
(
argv
)
return
beam_args
,
pipeline_args
def
main
(
argv
=
None
,
save_main_session
=
True
):
"""Runs the Beam pipeline that performs inference.
dirname
=
os
.
path
.
dirname
(
FLAGS
.
output_tfrecord
)
Args:
argv: Command line arguments.
save_main_session: Whether to save the main session.
"""
args
,
pipeline_args
=
parse_args
(
argv
)
pipeline_options
=
beam
.
options
.
pipeline_options
.
PipelineOptions
(
pipeline_args
)
pipeline_options
.
view_as
(
beam
.
options
.
pipeline_options
.
SetupOptions
).
save_main_session
=
(
save_main_session
)
dirname
=
os
.
path
.
dirname
(
args
.
output_tfrecord
)
tf
.
io
.
gfile
.
makedirs
(
dirname
)
runner
.
run
(
construct_pipeline
(
FLAGS
.
input_tfrecord
,
FLAGS
.
output_tfrecord
,
FLAGS
.
sequence_key
,
FLAGS
.
time_horizon
,
FLAGS
.
subsample_context_features_rate
,
FLAGS
.
reduce_image_size
,
FLAGS
.
max_image_dimension
,
FLAGS
.
add_context_features
,
FLAGS
.
sorted_image_ids
,
FLAGS
.
image_ids_to_keep
,
FLAGS
.
keep_context_features_image_id_list
,
FLAGS
.
keep_only_positives
,
FLAGS
.
context_features_score_threshold
,
FLAGS
.
keep_only_positives_gt
,
FLAGS
.
max_num_elements_in_context_features
,
FLAGS
.
num_shards
,
FLAGS
.
output_type
,
FLAGS
.
max_clip_length
))
p
=
beam
.
Pipeline
(
options
=
pipeline_options
)
construct_pipeline
(
p
,
args
.
input_tfrecord
,
args
.
output_tfrecord
,
args
.
sequence_key
,
args
.
time_horizon
,
args
.
subsample_context_features_rate
,
args
.
reduce_image_size
,
args
.
max_image_dimension
,
args
.
add_context_features
,
args
.
sorted_image_ids
,
args
.
image_ids_to_keep
,
args
.
keep_context_features_image_id_list
,
args
.
keep_only_positives
,
args
.
context_features_score_threshold
,
args
.
keep_only_positives_gt
,
args
.
max_num_elements_in_context_features
,
args
.
output_type
,
args
.
max_clip_length
,
args
.
context_feature_length
)
p
.
run
()
if
__name__
==
'__main__'
:
flags
.
mark_flags_as_required
([
'input_tfrecord'
,
'output_tfrecord'
])
app
.
run
(
main
)
main
()
research/object_detection/dataset_tools/context_rcnn/add_context_to_examples_tf1_test.py
View file @
31ca3b97
...
...
@@ -22,13 +22,19 @@ import datetime
import
os
import
tempfile
import
unittest
import
numpy
as
np
import
six
import
tensorflow.compat.v1
as
tf
from
object_detection.dataset_tools.context_rcnn
import
add_context_to_examples
from
object_detection.utils
import
tf_version
from
apache_beam
import
runners
try
:
import
apache_beam
as
beam
# pylint:disable=g-import-not-at-top
except
ModuleNotFoundError
:
pass
@
contextlib
.
contextmanager
...
...
@@ -200,7 +206,7 @@ class GenerateContextDataTest(tf.test.TestCase):
seq_feature_dict
[
'region/label/string'
].
feature
[
1
].
bytes_list
.
value
[:])
def
assert_expected_key
(
self
,
key
):
self
.
assertAllEqual
(
key
,
'01'
)
self
.
assertAllEqual
(
key
,
b
'01'
)
def
assert_sorted
(
self
,
example_collection
):
example_list
=
list
(
example_collection
)
...
...
@@ -329,19 +335,22 @@ class GenerateContextDataTest(tf.test.TestCase):
with
InMemoryTFRecord
(
[
self
.
_create_first_tf_example
(),
self
.
_create_second_tf_example
()])
as
input_tfrecord
:
runner
=
runners
.
DirectRunner
()
temp_dir
=
tempfile
.
mkdtemp
(
dir
=
os
.
environ
.
get
(
'TEST_TMPDIR'
))
output_tfrecord
=
os
.
path
.
join
(
temp_dir
,
'output_tfrecord'
)
sequence_key
=
six
.
ensure_binary
(
'image/seq_id'
)
max_num_elements
=
10
num_shards
=
1
pipeline
=
add_context_to_examples
.
construct_pipeline
(
pipeline_options
=
beam
.
options
.
pipeline_options
.
PipelineOptions
(
runner
=
'DirectRunner'
)
p
=
beam
.
Pipeline
(
options
=
pipeline_options
)
add_context_to_examples
.
construct_pipeline
(
p
,
input_tfrecord
,
output_tfrecord
,
sequence_key
,
max_num_elements_in_context_features
=
max_num_elements
,
num_shards
=
num_shards
)
runner
.
run
(
pipeline
)
p
.
run
(
)
filenames
=
tf
.
io
.
gfile
.
glob
(
output_tfrecord
+
'-?????-of-?????'
)
actual_output
=
[]
record_iterator
=
tf
.
python_io
.
tf_record_iterator
(
path
=
filenames
[
0
])
...
...
@@ -355,20 +364,23 @@ class GenerateContextDataTest(tf.test.TestCase):
with
InMemoryTFRecord
(
[
self
.
_create_first_tf_example
(),
self
.
_create_second_tf_example
()])
as
input_tfrecord
:
runner
=
runners
.
DirectRunner
()
temp_dir
=
tempfile
.
mkdtemp
(
dir
=
os
.
environ
.
get
(
'TEST_TMPDIR'
))
output_tfrecord
=
os
.
path
.
join
(
temp_dir
,
'output_tfrecord'
)
sequence_key
=
six
.
ensure_binary
(
'image/seq_id'
)
max_num_elements
=
10
num_shards
=
1
pipeline
=
add_context_to_examples
.
construct_pipeline
(
pipeline_options
=
beam
.
options
.
pipeline_options
.
PipelineOptions
(
runner
=
'DirectRunner'
)
p
=
beam
.
Pipeline
(
options
=
pipeline_options
)
add_context_to_examples
.
construct_pipeline
(
p
,
input_tfrecord
,
output_tfrecord
,
sequence_key
,
max_num_elements_in_context_features
=
max_num_elements
,
num_shards
=
num_shards
,
output_type
=
'tf_sequence_example'
)
runner
.
run
(
pipeline
)
p
.
run
(
)
filenames
=
tf
.
io
.
gfile
.
glob
(
output_tfrecord
+
'-?????-of-?????'
)
actual_output
=
[]
record_iterator
=
tf
.
python_io
.
tf_record_iterator
(
...
...
research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_main.py
View file @
31ca3b97
...
...
@@ -33,31 +33,21 @@ from __future__ import absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
argparse
import
hashlib
import
io
import
json
import
logging
import
os
from
absl
import
app
from
absl
import
flags
import
apache_beam
as
beam
import
numpy
as
np
import
PIL.Image
import
tensorflow.compat.v1
as
tf
from
apache_beam
import
runners
from
object_detection.utils
import
dataset_util
flags
.
DEFINE_string
(
'image_directory'
,
None
,
'Directory where images are '
'stored'
)
flags
.
DEFINE_string
(
'output_tfrecord_prefix'
,
None
,
'TFRecord containing images in tf.Example format.'
)
flags
.
DEFINE_string
(
'input_annotations_file'
,
None
,
'Path to Coco-CameraTraps'
'style annotations file'
)
flags
.
DEFINE_integer
(
'num_images_per_shard'
,
200
,
'The number of images to be stored in each shard.'
)
FLAGS
=
flags
.
FLAGS
try
:
import
apache_beam
as
beam
# pylint:disable=g-import-not-at-top
except
ModuleNotFoundError
:
pass
class
ParseImage
(
beam
.
DoFn
):
...
...
@@ -243,13 +233,14 @@ class ParseImage(beam.DoFn):
return
[(
example
)]
def
_
load_json_data
(
data_file
):
def
load_json_data
(
data_file
):
with
tf
.
io
.
gfile
.
GFile
(
data_file
,
'r'
)
as
fid
:
data_dict
=
json
.
load
(
fid
)
return
data_dict
def
create_pipeline
(
image_directory
,
def
create_pipeline
(
pipeline
,
image_directory
,
input_annotations_file
,
output_tfrecord_prefix
=
None
,
num_images_per_shard
=
200
,
...
...
@@ -257,68 +248,97 @@ def create_pipeline(image_directory,
"""Creates a beam pipeline for producing a COCO-CameraTraps Image dataset.
Args:
pipeline: Initialized beam pipeline.
image_directory: Path to image directory
input_annotations_file: Path to a coco-cameratraps annotation file
output_tfrecord_prefix: Absolute path for tfrecord outputs. Final files will
be named {output_tfrecord_prefix}@N.
num_images_per_shard: The number of images to store in each shard
keep_bboxes: Whether to keep any bounding boxes that exist in the json file
Returns:
A Beam pipeline.
"""
logging
.
info
(
'Reading data from COCO-CameraTraps Dataset.'
)
data
=
_
load_json_data
(
input_annotations_file
)
data
=
load_json_data
(
input_annotations_file
)
num_shards
=
int
(
np
.
ceil
(
float
(
len
(
data
[
'images'
]))
/
num_images_per_shard
))
def
pipeline
(
root
):
"""Builds beam pipeline."""
image_examples
=
(
root
|
(
'CreateCollections'
)
>>
beam
.
Create
(
[
im
[
'id'
]
for
im
in
data
[
'images'
]])
|
(
'ParseImage'
)
>>
beam
.
ParDo
(
ParseImage
(
image_directory
,
data
[
'images'
],
data
[
'annotations'
],
data
[
'categories'
],
keep_bboxes
=
keep_bboxes
)))
_
=
(
image_examples
|
(
'Reshuffle'
)
>>
beam
.
Reshuffle
()
|
(
'WriteTfImageExample'
)
>>
beam
.
io
.
tfrecordio
.
WriteToTFRecord
(
output_tfrecord_prefix
,
num_shards
=
num_shards
,
coder
=
beam
.
coders
.
ProtoCoder
(
tf
.
train
.
Example
)))
image_examples
=
(
pipeline
|
(
'CreateCollections'
)
>>
beam
.
Create
(
[
im
[
'id'
]
for
im
in
data
[
'images'
]])
|
(
'ParseImage'
)
>>
beam
.
ParDo
(
ParseImage
(
image_directory
,
data
[
'images'
],
data
[
'annotations'
],
data
[
'categories'
],
keep_bboxes
=
keep_bboxes
)))
_
=
(
image_examples
|
(
'Reshuffle'
)
>>
beam
.
Reshuffle
()
|
(
'WriteTfImageExample'
)
>>
beam
.
io
.
tfrecordio
.
WriteToTFRecord
(
output_tfrecord_prefix
,
num_shards
=
num_shards
,
coder
=
beam
.
coders
.
ProtoCoder
(
tf
.
train
.
Example
)))
return
pipeline
def
parse_args
(
argv
):
"""Command-line argument parser.
def
main
(
_
):
Args:
argv: command line arguments
Returns:
beam_args: Arguments for the beam pipeline.
pipeline_args: Arguments for the pipeline options, such as runner type.
"""
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--image_directory'
,
dest
=
'image_directory'
,
required
=
True
,
help
=
'Path to the directory where the images are stored.'
)
parser
.
add_argument
(
'--output_tfrecord_prefix'
,
dest
=
'output_tfrecord_prefix'
,
required
=
True
,
help
=
'Path and prefix to store TFRecords containing images in tf.Example'
'format.'
)
parser
.
add_argument
(
'--input_annotations_file'
,
dest
=
'input_annotations_file'
,
required
=
True
,
help
=
'Path to Coco-CameraTraps style annotations file.'
)
parser
.
add_argument
(
'--num_images_per_shard'
,
dest
=
'num_images_per_shard'
,
default
=
200
,
help
=
'The number of images to be stored in each outputshard.'
)
beam_args
,
pipeline_args
=
parser
.
parse_known_args
(
argv
)
return
beam_args
,
pipeline_args
def
main
(
argv
=
None
,
save_main_session
=
True
):
"""Runs the Beam pipeline that performs inference.
Args:
_: unused
argv: Command line arguments.
save_main_session: Whether to save the main session.
"""
args
,
pipeline_args
=
parse_args
(
argv
)
# must create before flags are used
runner
=
runners
.
DirectRunner
()
pipeline_options
=
beam
.
options
.
pipeline_options
.
PipelineOptions
(
pipeline_args
)
pipeline_options
.
view_as
(
beam
.
options
.
pipeline_options
.
SetupOptions
).
save_main_session
=
(
save_main_session
)
dirname
=
os
.
path
.
dirname
(
FLAGS
.
output_tfrecord_prefix
)
dirname
=
os
.
path
.
dirname
(
args
.
output_tfrecord_prefix
)
tf
.
io
.
gfile
.
makedirs
(
dirname
)
runner
.
run
(
create_pipeline
(
image_directory
=
FLAGS
.
image_directory
,
input_annotations_file
=
FLAGS
.
input_annotations_file
,
output_tfrecord_prefix
=
FLAGS
.
output_tfrecord_prefix
,
num_images_per_shard
=
FLAGS
.
num_images_per_shard
))
p
=
beam
.
Pipeline
(
options
=
pipeline_options
)
create_pipeline
(
pipeline
=
p
,
image_directory
=
args
.
image_directory
,
input_annotations_file
=
args
.
input_annotations_file
,
output_tfrecord_prefix
=
args
.
output_tfrecord_prefix
,
num_images_per_shard
=
args
.
num_images_per_shard
)
p
.
run
()
if
__name__
==
'__main__'
:
flags
.
mark_flags_as_required
([
'image_directory'
,
'input_annotations_file'
,
'output_tfrecord_prefix'
])
app
.
run
(
main
)
main
()
research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_tf1_test.py
View file @
31ca3b97
...
...
@@ -21,13 +21,18 @@ import json
import
os
import
tempfile
import
unittest
import
numpy
as
np
from
PIL
import
Image
import
tensorflow.compat.v1
as
tf
from
object_detection.dataset_tools.context_rcnn
import
create_cococameratraps_tfexample_main
from
object_detection.utils
import
tf_version
from
apache_beam
import
runners
try
:
import
apache_beam
as
beam
# pylint:disable=g-import-not-at-top
except
ModuleNotFoundError
:
pass
@
unittest
.
skipIf
(
tf_version
.
is_tf2
(),
'Skipping TF1.X only test.'
)
...
...
@@ -95,13 +100,13 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase):
.
int64_list
.
value
,
[
1
])
self
.
assertAllEqual
(
example
.
features
.
feature
[
'image/object/class/text'
]
.
bytes_list
.
value
,
[
'animal'
])
.
bytes_list
.
value
,
[
b
'animal'
])
self
.
assertAllClose
(
example
.
features
.
feature
[
'image/class/label'
]
.
int64_list
.
value
,
[
1
])
self
.
assertAllEqual
(
example
.
features
.
feature
[
'image/class/text'
]
.
bytes_list
.
value
,
[
'animal'
])
.
bytes_list
.
value
,
[
b
'animal'
])
# Check other essential attributes.
self
.
assertAllEqual
(
...
...
@@ -112,7 +117,7 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase):
[
self
.
IMAGE_WIDTH
])
self
.
assertAllEqual
(
example
.
features
.
feature
[
'image/source_id'
].
bytes_list
.
value
,
[
'im_0'
])
[
b
'im_0'
])
self
.
assertTrue
(
example
.
features
.
feature
[
'image/encoded'
].
bytes_list
.
value
)
...
...
@@ -134,13 +139,13 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase):
.
int64_list
.
value
,
[
1
])
self
.
assertAllEqual
(
example
.
features
.
feature
[
'image/object/class/text'
]
.
bytes_list
.
value
,
[
'animal'
])
.
bytes_list
.
value
,
[
b
'animal'
])
self
.
assertAllClose
(
example
.
features
.
feature
[
'image/class/label'
]
.
int64_list
.
value
,
[
1
])
self
.
assertAllEqual
(
example
.
features
.
feature
[
'image/class/text'
]
.
bytes_list
.
value
,
[
'animal'
])
.
bytes_list
.
value
,
[
b
'animal'
])
# Check other essential attributes.
self
.
assertAllEqual
(
...
...
@@ -151,21 +156,23 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase):
[
self
.
IMAGE_WIDTH
])
self
.
assertAllEqual
(
example
.
features
.
feature
[
'image/source_id'
].
bytes_list
.
value
,
[
'im_0'
])
[
b
'im_0'
])
self
.
assertTrue
(
example
.
features
.
feature
[
'image/encoded'
].
bytes_list
.
value
)
def
test_beam_pipeline
(
self
):
runner
=
runners
.
DirectRunner
()
num_frames
=
1
temp_dir
=
tempfile
.
mkdtemp
(
dir
=
os
.
environ
.
get
(
'TEST_TMPDIR'
))
json_path
=
self
.
_create_json_file
(
temp_dir
,
num_frames
)
output_tfrecord
=
temp_dir
+
'/output'
self
.
_write_random_images_to_directory
(
temp_dir
,
num_frames
)
pipeline
=
create_cococameratraps_tfexample_main
.
create_pipeline
(
temp_dir
,
json_path
,
pipeline_options
=
beam
.
options
.
pipeline_options
.
PipelineOptions
(
runner
=
'DirectRunner'
)
p
=
beam
.
Pipeline
(
options
=
pipeline_options
)
create_cococameratraps_tfexample_main
.
create_pipeline
(
p
,
temp_dir
,
json_path
,
output_tfrecord_prefix
=
output_tfrecord
)
runner
.
run
(
pipeline
)
p
.
run
(
)
filenames
=
tf
.
io
.
gfile
.
glob
(
output_tfrecord
+
'-?????-of-?????'
)
actual_output
=
[]
record_iterator
=
tf
.
python_io
.
tf_record_iterator
(
path
=
filenames
[
0
])
...
...
@@ -176,17 +183,19 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase):
actual_output
[
0
]))
def
test_beam_pipeline_bbox
(
self
):
runner
=
runners
.
DirectRunner
()
num_frames
=
1
temp_dir
=
tempfile
.
mkdtemp
(
dir
=
os
.
environ
.
get
(
'TEST_TMPDIR'
))
json_path
=
self
.
_create_json_file
(
temp_dir
,
num_frames
,
keep_bboxes
=
True
)
output_tfrecord
=
temp_dir
+
'/output'
self
.
_write_random_images_to_directory
(
temp_dir
,
num_frames
)
pipeline
=
create_cococameratraps_tfexample_main
.
create_pipeline
(
temp_dir
,
json_path
,
pipeline_options
=
beam
.
options
.
pipeline_options
.
PipelineOptions
(
runner
=
'DirectRunner'
)
p
=
beam
.
Pipeline
(
options
=
pipeline_options
)
create_cococameratraps_tfexample_main
.
create_pipeline
(
p
,
temp_dir
,
json_path
,
output_tfrecord_prefix
=
output_tfrecord
,
keep_bboxes
=
True
)
runner
.
run
(
pipeline
)
p
.
run
(
)
filenames
=
tf
.
io
.
gfile
.
glob
(
output_tfrecord
+
'-?????-of-?????'
)
actual_output
=
[]
record_iterator
=
tf
.
python_io
.
tf_record_iterator
(
path
=
filenames
[
0
])
...
...
research/object_detection/dataset_tools/context_rcnn/generate_detection_data.py
View file @
31ca3b97
...
...
@@ -45,26 +45,14 @@ from __future__ import absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
argparse
import
os
import
threading
from
absl
import
app
from
absl
import
flags
import
apache_beam
as
beam
import
tensorflow.compat.v1
as
tf
from
apache_beam
import
runners
flags
.
DEFINE_string
(
'detection_input_tfrecord'
,
None
,
'TFRecord containing '
'images in tf.Example format for object detection.'
)
flags
.
DEFINE_string
(
'detection_output_tfrecord'
,
None
,
'TFRecord containing detections in tf.Example format.'
)
flags
.
DEFINE_string
(
'detection_model_dir'
,
None
,
'Path to directory containing'
'an object detection SavedModel.'
)
flags
.
DEFINE_float
(
'confidence_threshold'
,
0.9
,
'Min confidence to keep bounding boxes'
)
flags
.
DEFINE_integer
(
'num_shards'
,
0
,
'Number of output shards.'
)
FLAGS
=
flags
.
FLAGS
try
:
import
apache_beam
as
beam
# pylint:disable=g-import-not-at-top
except
ModuleNotFoundError
:
pass
class
GenerateDetectionDataFn
(
beam
.
DoFn
):
...
...
@@ -205,58 +193,103 @@ class GenerateDetectionDataFn(beam.DoFn):
return
[
example
]
def
construct_pipeline
(
input_tfrecord
,
output_tfrecord
,
model_dir
,
def
construct_pipeline
(
pipeline
,
input_tfrecord
,
output_tfrecord
,
model_dir
,
confidence_threshold
,
num_shards
):
"""Returns a Beam pipeline to run object detection inference.
Args:
pipeline: Initialized beam pipeline.
input_tfrecord: A TFRecord of tf.train.Example protos containing images.
output_tfrecord: A TFRecord of tf.train.Example protos that contain images
in the input TFRecord and the detections from the model.
model_dir: Path to `saved_model` to use for inference.
confidence_threshold: Threshold to use when keeping detection results.
num_shards: The number of output shards.
"""
input_collection
=
(
pipeline
|
'ReadInputTFRecord'
>>
beam
.
io
.
tfrecordio
.
ReadFromTFRecord
(
input_tfrecord
,
coder
=
beam
.
coders
.
BytesCoder
()))
output_collection
=
input_collection
|
'RunInference'
>>
beam
.
ParDo
(
GenerateDetectionDataFn
(
model_dir
,
confidence_threshold
))
output_collection
=
output_collection
|
'Reshuffle'
>>
beam
.
Reshuffle
()
_
=
output_collection
|
'WritetoDisk'
>>
beam
.
io
.
tfrecordio
.
WriteToTFRecord
(
output_tfrecord
,
num_shards
=
num_shards
,
coder
=
beam
.
coders
.
ProtoCoder
(
tf
.
train
.
Example
))
def
parse_args
(
argv
):
"""Command-line argument parser.
Args:
argv: command line arguments
Returns:
pipeline: A Beam pipeline.
beam_args: Arguments for the beam pipeline.
pipeline_args: Arguments for the pipeline options, such as runner type.
"""
def
pipeline
(
root
):
input_collection
=
(
root
|
'ReadInputTFRecord'
>>
beam
.
io
.
tfrecordio
.
ReadFromTFRecord
(
input_tfrecord
,
coder
=
beam
.
coders
.
BytesCoder
()))
output_collection
=
input_collection
|
'RunInference'
>>
beam
.
ParDo
(
GenerateDetectionDataFn
(
model_dir
,
confidence_threshold
))
output_collection
=
output_collection
|
'Reshuffle'
>>
beam
.
Reshuffle
()
_
=
output_collection
|
'WritetoDisk'
>>
beam
.
io
.
tfrecordio
.
WriteToTFRecord
(
output_tfrecord
,
num_shards
=
num_shards
,
coder
=
beam
.
coders
.
ProtoCoder
(
tf
.
train
.
Example
))
return
pipeline
def
main
(
_
):
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--detection_input_tfrecord'
,
dest
=
'detection_input_tfrecord'
,
required
=
True
,
help
=
'TFRecord containing images in tf.Example format for object '
'detection.'
)
parser
.
add_argument
(
'--detection_output_tfrecord'
,
dest
=
'detection_output_tfrecord'
,
required
=
True
,
help
=
'TFRecord containing detections in tf.Example format.'
)
parser
.
add_argument
(
'--detection_model_dir'
,
dest
=
'detection_model_dir'
,
required
=
True
,
help
=
'Path to directory containing an object detection SavedModel.'
)
parser
.
add_argument
(
'--confidence_threshold'
,
dest
=
'confidence_threshold'
,
default
=
0.9
,
help
=
'Min confidence to keep bounding boxes.'
)
parser
.
add_argument
(
'--num_shards'
,
dest
=
'num_shards'
,
default
=
0
,
help
=
'Number of output shards.'
)
beam_args
,
pipeline_args
=
parser
.
parse_known_args
(
argv
)
return
beam_args
,
pipeline_args
def
main
(
argv
=
None
,
save_main_session
=
True
):
"""Runs the Beam pipeline that performs inference.
Args:
_: unused
argv: Command line arguments.
save_main_session: Whether to save the main session.
"""
# must create before flags are used
runner
=
runners
.
DirectRunner
()
dirname
=
os
.
path
.
dirname
(
FLAGS
.
detection_output_tfrecord
)
args
,
pipeline_args
=
parse_args
(
argv
)
pipeline_options
=
beam
.
options
.
pipeline_options
.
PipelineOptions
(
pipeline_args
)
pipeline_options
.
view_as
(
beam
.
options
.
pipeline_options
.
SetupOptions
).
save_main_session
=
(
save_main_session
)
dirname
=
os
.
path
.
dirname
(
args
.
detection_output_tfrecord
)
tf
.
io
.
gfile
.
makedirs
(
dirname
)
runner
.
run
(
construct_pipeline
(
FLAGS
.
detection_input_tfrecord
,
FLAGS
.
detection_output_tfrecord
,
FLAGS
.
detection_model_dir
,
FLAGS
.
confidence_threshold
,
FLAGS
.
num_shards
))
p
=
beam
.
Pipeline
(
options
=
pipeline_options
)
construct_pipeline
(
p
,
args
.
detection_input_tfrecord
,
args
.
detection_output_tfrecord
,
args
.
detection_model_dir
,
args
.
confidence_threshold
,
args
.
num_shards
)
p
.
run
()
if
__name__
==
'__main__'
:
flags
.
mark_flags_as_required
([
'detection_input_tfrecord'
,
'detection_output_tfrecord'
,
'detection_model_dir'
])
app
.
run
(
main
)
main
()
research/object_detection/dataset_tools/context_rcnn/generate_detection_data_tf1_test.py
View file @
31ca3b97
...
...
@@ -32,13 +32,17 @@ from object_detection.core import model
from
object_detection.dataset_tools.context_rcnn
import
generate_detection_data
from
object_detection.protos
import
pipeline_pb2
from
object_detection.utils
import
tf_version
from
apache_beam
import
runners
if
six
.
PY2
:
import
mock
# pylint: disable=g-import-not-at-top
else
:
mock
=
unittest
.
mock
try
:
import
apache_beam
as
beam
# pylint:disable=g-import-not-at-top
except
ModuleNotFoundError
:
pass
class
FakeModel
(
model
.
DetectionModel
):
"""A Fake Detection model with expected output nodes from post-processing."""
...
...
@@ -67,6 +71,9 @@ class FakeModel(model.DetectionModel):
def
restore_map
(
self
,
checkpoint_path
,
fine_tune_checkpoint_type
):
pass
def
restore_from_objects
(
self
,
fine_tune_checkpoint_type
):
pass
def
loss
(
self
,
prediction_dict
,
true_image_shapes
):
pass
...
...
@@ -243,16 +250,18 @@ class GenerateDetectionDataTest(tf.test.TestCase):
def
test_beam_pipeline
(
self
):
with
InMemoryTFRecord
([
self
.
_create_tf_example
()])
as
input_tfrecord
:
runner
=
runners
.
DirectRunner
()
temp_dir
=
tempfile
.
mkdtemp
(
dir
=
os
.
environ
.
get
(
'TEST_TMPDIR'
))
output_tfrecord
=
os
.
path
.
join
(
temp_dir
,
'output_tfrecord'
)
saved_model_path
=
self
.
_export_saved_model
()
confidence_threshold
=
0.8
num_shards
=
1
pipeline
=
generate_detection_data
.
construct_pipeline
(
input_tfrecord
,
output_tfrecord
,
saved_model_path
,
pipeline_options
=
beam
.
options
.
pipeline_options
.
PipelineOptions
(
runner
=
'DirectRunner'
)
p
=
beam
.
Pipeline
(
options
=
pipeline_options
)
generate_detection_data
.
construct_pipeline
(
p
,
input_tfrecord
,
output_tfrecord
,
saved_model_path
,
confidence_threshold
,
num_shards
)
runner
.
run
(
pipeline
)
p
.
run
(
)
filenames
=
tf
.
io
.
gfile
.
glob
(
output_tfrecord
+
'-?????-of-?????'
)
actual_output
=
[]
record_iterator
=
tf
.
python_io
.
tf_record_iterator
(
path
=
filenames
[
0
])
...
...
research/object_detection/dataset_tools/context_rcnn/generate_embedding_data.py
View file @
31ca3b97
...
...
@@ -34,7 +34,8 @@ python tensorflow_models/object_detection/export_inference_graph.py \
--input_type tf_example \
--pipeline_config_path path/to/faster_rcnn_model.config \
--trained_checkpoint_prefix path/to/model.ckpt \
--output_directory path/to/exported_model_directory
--output_directory path/to/exported_model_directory \
--additional_output_tensor_names detection_features
python generate_embedding_data.py \
--alsologtostderr \
...
...
@@ -47,34 +48,19 @@ from __future__ import absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
argparse
import
datetime
import
os
import
threading
from
absl
import
app
from
absl
import
flags
import
apache_beam
as
beam
import
numpy
as
np
import
six
import
tensorflow.compat.v1
as
tf
from
apache_beam
import
runners
flags
.
DEFINE_string
(
'embedding_input_tfrecord'
,
None
,
'TFRecord containing'
'images in tf.Example format for object detection.'
)
flags
.
DEFINE_string
(
'embedding_output_tfrecord'
,
None
,
'TFRecord containing embeddings in tf.Example format.'
)
flags
.
DEFINE_string
(
'embedding_model_dir'
,
None
,
'Path to directory containing'
'an object detection SavedModel with'
'detection_box_classifier_features in the output.'
)
flags
.
DEFINE_integer
(
'top_k_embedding_count'
,
1
,
'The number of top k embeddings to add to the memory bank.'
)
flags
.
DEFINE_integer
(
'bottom_k_embedding_count'
,
0
,
'The number of bottom k embeddings to add to the memory '
'bank.'
)
flags
.
DEFINE_integer
(
'num_shards'
,
0
,
'Number of output shards.'
)
FLAGS
=
flags
.
FLAGS
try
:
import
apache_beam
as
beam
# pylint:disable=g-import-not-at-top
except
ModuleNotFoundError
:
pass
class
GenerateEmbeddingDataFn
(
beam
.
DoFn
):
...
...
@@ -321,12 +307,13 @@ class GenerateEmbeddingDataFn(beam.DoFn):
return
[
example
]
def
construct_pipeline
(
input_tfrecord
,
output_tfrecord
,
model_dir
,
def
construct_pipeline
(
pipeline
,
input_tfrecord
,
output_tfrecord
,
model_dir
,
top_k_embedding_count
,
bottom_k_embedding_count
,
num_shards
):
"""Returns a beam pipeline to run object detection inference.
Args:
pipeline: Initialized beam pipeline.
input_tfrecord: An TFRecord of tf.train.Example protos containing images.
output_tfrecord: An TFRecord of tf.train.Example protos that contain images
in the input TFRecord and the detections from the model.
...
...
@@ -335,44 +322,98 @@ def construct_pipeline(input_tfrecord, output_tfrecord, model_dir,
bottom_k_embedding_count: The number of low-confidence embeddings to store.
num_shards: The number of output shards.
"""
def
pipeline
(
root
):
input_collection
=
(
root
|
'ReadInputTFRecord'
>>
beam
.
io
.
tfrecordio
.
ReadFromTFRecord
(
input_tfrecord
,
coder
=
beam
.
coders
.
BytesCoder
()))
output_collection
=
input_collection
|
'ExtractEmbedding'
>>
beam
.
ParDo
(
GenerateEmbeddingDataFn
(
model_dir
,
top_k_embedding_count
,
bottom_k_embedding_count
))
output_collection
=
output_collection
|
'Reshuffle'
>>
beam
.
Reshuffle
()
_
=
output_collection
|
'WritetoDisk'
>>
beam
.
io
.
tfrecordio
.
WriteToTFRecord
(
output_tfrecord
,
num_shards
=
num_shards
,
coder
=
beam
.
coders
.
ProtoCoder
(
tf
.
train
.
Example
))
return
pipeline
def
main
(
_
):
input_collection
=
(
pipeline
|
'ReadInputTFRecord'
>>
beam
.
io
.
tfrecordio
.
ReadFromTFRecord
(
input_tfrecord
,
coder
=
beam
.
coders
.
BytesCoder
()))
output_collection
=
input_collection
|
'ExtractEmbedding'
>>
beam
.
ParDo
(
GenerateEmbeddingDataFn
(
model_dir
,
top_k_embedding_count
,
bottom_k_embedding_count
))
output_collection
=
output_collection
|
'Reshuffle'
>>
beam
.
Reshuffle
()
_
=
output_collection
|
'WritetoDisk'
>>
beam
.
io
.
tfrecordio
.
WriteToTFRecord
(
output_tfrecord
,
num_shards
=
num_shards
,
coder
=
beam
.
coders
.
ProtoCoder
(
tf
.
train
.
Example
))
def
parse_args
(
argv
):
"""Command-line argument parser.
Args:
argv: command line arguments
Returns:
beam_args: Arguments for the beam pipeline.
pipeline_args: Arguments for the pipeline options, such as runner type.
"""
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--embedding_input_tfrecord'
,
dest
=
'embedding_input_tfrecord'
,
required
=
True
,
help
=
'TFRecord containing images in tf.Example format for object '
'detection.'
)
parser
.
add_argument
(
'--embedding_output_tfrecord'
,
dest
=
'embedding_output_tfrecord'
,
required
=
True
,
help
=
'TFRecord containing embeddings in tf.Example format.'
)
parser
.
add_argument
(
'--embedding_model_dir'
,
dest
=
'embedding_model_dir'
,
required
=
True
,
help
=
'Path to directory containing an object detection SavedModel with'
'detection_box_classifier_features in the output.'
)
parser
.
add_argument
(
'--top_k_embedding_count'
,
dest
=
'top_k_embedding_count'
,
default
=
1
,
help
=
'The number of top k embeddings to add to the memory bank.'
)
parser
.
add_argument
(
'--bottom_k_embedding_count'
,
dest
=
'bottom_k_embedding_count'
,
default
=
0
,
help
=
'The number of bottom k embeddings to add to the memory bank.'
)
parser
.
add_argument
(
'--num_shards'
,
dest
=
'num_shards'
,
default
=
0
,
help
=
'Number of output shards.'
)
beam_args
,
pipeline_args
=
parser
.
parse_known_args
(
argv
)
return
beam_args
,
pipeline_args
def
main
(
argv
=
None
,
save_main_session
=
True
):
"""Runs the Beam pipeline that performs inference.
Args:
_: unused
argv: Command line arguments.
save_main_session: Whether to save the main session.
"""
# must create before flags are used
runner
=
runners
.
DirectRunner
()
args
,
pipeline_args
=
parse_args
(
argv
)
dirname
=
os
.
path
.
dirname
(
FLAGS
.
embedding_output_tfrecord
)
pipeline_options
=
beam
.
options
.
pipeline_options
.
PipelineOptions
(
pipeline_args
)
pipeline_options
.
view_as
(
beam
.
options
.
pipeline_options
.
SetupOptions
).
save_main_session
=
(
save_main_session
)
dirname
=
os
.
path
.
dirname
(
args
.
embedding_output_tfrecord
)
tf
.
io
.
gfile
.
makedirs
(
dirname
)
runner
.
run
(
construct_pipeline
(
FLAGS
.
embedding_input_tfrecord
,
FLAGS
.
embedding_output_tfrecord
,
FLAGS
.
embedding_model_dir
,
FLAGS
.
top_k_embedding_count
,
FLAGS
.
bottom_k_embedding_count
,
FLAGS
.
num_shards
))
p
=
beam
.
Pipeline
(
options
=
pipeline_options
)
construct_pipeline
(
p
,
args
.
embedding_input_tfrecord
,
args
.
embedding_output_tfrecord
,
args
.
embedding_model_dir
,
args
.
top_k_embedding_count
,
args
.
bottom_k_embedding_count
,
args
.
num_shards
)
p
.
run
()
if
__name__
==
'__main__'
:
flags
.
mark_flags_as_required
([
'embedding_input_tfrecord'
,
'embedding_output_tfrecord'
,
'embedding_model_dir'
])
app
.
run
(
main
)
main
()
research/object_detection/dataset_tools/context_rcnn/generate_embedding_data_tf1_test.py
View file @
31ca3b97
...
...
@@ -30,13 +30,18 @@ from object_detection.core import model
from
object_detection.dataset_tools.context_rcnn
import
generate_embedding_data
from
object_detection.protos
import
pipeline_pb2
from
object_detection.utils
import
tf_version
from
apache_beam
import
runners
if
six
.
PY2
:
import
mock
# pylint: disable=g-import-not-at-top
else
:
mock
=
unittest
.
mock
try
:
import
apache_beam
as
beam
# pylint:disable=g-import-not-at-top
except
ModuleNotFoundError
:
pass
class
FakeModel
(
model
.
DetectionModel
):
"""A Fake Detection model with expected output nodes from post-processing."""
...
...
@@ -73,6 +78,9 @@ class FakeModel(model.DetectionModel):
def
restore_map
(
self
,
checkpoint_path
,
fine_tune_checkpoint_type
):
pass
def
restore_from_objects
(
self
,
fine_tune_checkpoint_type
):
pass
def
loss
(
self
,
prediction_dict
,
true_image_shapes
):
pass
...
...
@@ -236,13 +244,13 @@ class GenerateEmbeddingData(tf.test.TestCase):
.
int64_list
.
value
,
[
5
])
self
.
assertAllEqual
(
example
.
features
.
feature
[
'image/object/class/text'
]
.
bytes_list
.
value
,
[
'hyena'
])
.
bytes_list
.
value
,
[
b
'hyena'
])
self
.
assertAllClose
(
example
.
features
.
feature
[
'image/class/label'
]
.
int64_list
.
value
,
[
5
])
self
.
assertAllEqual
(
example
.
features
.
feature
[
'image/class/text'
]
.
bytes_list
.
value
,
[
'hyena'
])
.
bytes_list
.
value
,
[
b
'hyena'
])
# Check other essential attributes.
self
.
assertAllEqual
(
...
...
@@ -251,7 +259,7 @@ class GenerateEmbeddingData(tf.test.TestCase):
example
.
features
.
feature
[
'image/width'
].
int64_list
.
value
,
[
600
])
self
.
assertAllEqual
(
example
.
features
.
feature
[
'image/source_id'
].
bytes_list
.
value
,
[
'image_id'
])
[
b
'image_id'
])
self
.
assertTrue
(
example
.
features
.
feature
[
'image/encoded'
].
bytes_list
.
value
)
...
...
@@ -268,7 +276,7 @@ class GenerateEmbeddingData(tf.test.TestCase):
.
int64_list
.
value
,
[
5
])
self
.
assertAllEqual
(
tf
.
train
.
Example
.
FromString
(
generated_example
).
features
.
feature
[
'image/object/class/text'
]
.
bytes_list
.
value
,
[
'hyena'
])
.
bytes_list
.
value
,
[
b
'hyena'
])
output
=
inference_fn
.
process
(
generated_example
)
output_example
=
output
[
0
]
self
.
assert_expected_example
(
output_example
)
...
...
@@ -304,24 +312,26 @@ class GenerateEmbeddingData(tf.test.TestCase):
.
feature
[
'image/object/class/label'
].
int64_list
.
value
,
[
5
])
self
.
assertAllEqual
(
tf
.
train
.
Example
.
FromString
(
generated_example
).
features
.
feature
[
'image/object/class/text'
].
bytes_list
.
value
,
[
'hyena'
])
.
feature
[
'image/object/class/text'
].
bytes_list
.
value
,
[
b
'hyena'
])
output
=
inference_fn
.
process
(
generated_example
)
output_example
=
output
[
0
]
self
.
assert_expected_example
(
output_example
,
botk
=
True
)
def
test_beam_pipeline
(
self
):
with
InMemoryTFRecord
([
self
.
_create_tf_example
()])
as
input_tfrecord
:
runner
=
runners
.
DirectRunner
()
temp_dir
=
tempfile
.
mkdtemp
(
dir
=
os
.
environ
.
get
(
'TEST_TMPDIR'
))
output_tfrecord
=
os
.
path
.
join
(
temp_dir
,
'output_tfrecord'
)
saved_model_path
=
self
.
_export_saved_model
()
top_k_embedding_count
=
1
bottom_k_embedding_count
=
0
num_shards
=
1
pipeline
=
generate_embedding_data
.
construct_pipeline
(
input_tfrecord
,
output_tfrecord
,
saved_model_path
,
pipeline_options
=
beam
.
options
.
pipeline_options
.
PipelineOptions
(
runner
=
'DirectRunner'
)
p
=
beam
.
Pipeline
(
options
=
pipeline_options
)
generate_embedding_data
.
construct_pipeline
(
p
,
input_tfrecord
,
output_tfrecord
,
saved_model_path
,
top_k_embedding_count
,
bottom_k_embedding_count
,
num_shards
)
runner
.
run
(
pipeline
)
p
.
run
(
)
filenames
=
tf
.
io
.
gfile
.
glob
(
output_tfrecord
+
'-?????-of-?????'
)
actual_output
=
[]
...
...
research/object_detection/dataset_tools/create_coco_tf_record.py
View file @
31ca3b97
...
...
@@ -14,6 +14,9 @@
# ==============================================================================
r
"""Convert raw COCO dataset to TFRecord for object_detection.
This tool supports data generation for object detection (boxes, masks),
keypoint detection, and DensePose.
Please note that this tool creates sharded output files.
Example usage:
...
...
@@ -63,7 +66,18 @@ tf.flags.DEFINE_string('train_keypoint_annotations_file', '',
'Training annotations JSON file.'
)
tf
.
flags
.
DEFINE_string
(
'val_keypoint_annotations_file'
,
''
,
'Validation annotations JSON file.'
)
# DensePose is only available for coco 2014.
tf
.
flags
.
DEFINE_string
(
'train_densepose_annotations_file'
,
''
,
'Training annotations JSON file for DensePose.'
)
tf
.
flags
.
DEFINE_string
(
'val_densepose_annotations_file'
,
''
,
'Validation annotations JSON file for DensePose.'
)
tf
.
flags
.
DEFINE_string
(
'output_dir'
,
'/tmp/'
,
'Output data directory.'
)
# Whether to only produce images/annotations on person class (for keypoint /
# densepose task).
tf
.
flags
.
DEFINE_boolean
(
'remove_non_person_annotations'
,
False
,
'Whether to '
'remove all annotations for non-person objects.'
)
tf
.
flags
.
DEFINE_boolean
(
'remove_non_person_images'
,
False
,
'Whether to '
'remove all examples that do not contain a person.'
)
FLAGS
=
flags
.
FLAGS
...
...
@@ -77,13 +91,33 @@ _COCO_KEYPOINT_NAMES = [
b
'left_knee'
,
b
'right_knee'
,
b
'left_ankle'
,
b
'right_ankle'
]
_COCO_PART_NAMES
=
[
b
'torso_back'
,
b
'torso_front'
,
b
'right_hand'
,
b
'left_hand'
,
b
'left_foot'
,
b
'right_foot'
,
b
'right_upper_leg_back'
,
b
'left_upper_leg_back'
,
b
'right_upper_leg_front'
,
b
'left_upper_leg_front'
,
b
'right_lower_leg_back'
,
b
'left_lower_leg_back'
,
b
'right_lower_leg_front'
,
b
'left_lower_leg_front'
,
b
'left_upper_arm_back'
,
b
'right_upper_arm_back'
,
b
'left_upper_arm_front'
,
b
'right_upper_arm_front'
,
b
'left_lower_arm_back'
,
b
'right_lower_arm_back'
,
b
'left_lower_arm_front'
,
b
'right_lower_arm_front'
,
b
'right_face'
,
b
'left_face'
,
]
_DP_PART_ID_OFFSET
=
1
def
clip_to_unit
(
x
):
return
min
(
max
(
x
,
0.0
),
1.0
)
def
create_tf_example
(
image
,
annotations_list
,
image_dir
,
category_index
,
include_masks
=
False
,
keypoint_annotations_dict
=
None
):
keypoint_annotations_dict
=
None
,
densepose_annotations_dict
=
None
,
remove_non_person_annotations
=
False
,
remove_non_person_images
=
False
):
"""Converts image and annotations to a tf.Example proto.
Args:
...
...
@@ -108,10 +142,23 @@ def create_tf_example(image,
dictionary with keys: [u'keypoints', u'num_keypoints'] represeting the
keypoint information for this person object annotation. If None, then
no keypoint annotations will be populated.
densepose_annotations_dict: A dictionary that maps from annotation_id to a
dictionary with keys: [u'dp_I', u'dp_x', u'dp_y', 'dp_U', 'dp_V']
representing part surface coordinates. For more information see
http://densepose.org/.
remove_non_person_annotations: Whether to remove any annotations that are
not the "person" class.
remove_non_person_images: Whether to remove any images that do not contain
at least one "person" annotation.
Returns:
key: SHA256 hash of the image.
example: The converted tf.Example
num_annotations_skipped: Number of (invalid) annotations that were ignored.
num_keypoint_annotation_skipped: Number of keypoint annotations that were
skipped.
num_densepose_annotation_skipped: Number of DensePose annotations that were
skipped.
Raises:
ValueError: if the image pointed to by data['filename'] is not a valid JPEG
...
...
@@ -146,6 +193,16 @@ def create_tf_example(image,
num_annotations_skipped
=
0
num_keypoint_annotation_used
=
0
num_keypoint_annotation_skipped
=
0
dp_part_index
=
[]
dp_x
=
[]
dp_y
=
[]
dp_u
=
[]
dp_v
=
[]
dp_num_points
=
[]
densepose_keys
=
[
'dp_I'
,
'dp_U'
,
'dp_V'
,
'dp_x'
,
'dp_y'
,
'bbox'
]
include_densepose
=
densepose_annotations_dict
is
not
None
num_densepose_annotation_used
=
0
num_densepose_annotation_skipped
=
0
for
object_annotations
in
annotations_list
:
(
x
,
y
,
width
,
height
)
=
tuple
(
object_annotations
[
'bbox'
])
if
width
<=
0
or
height
<=
0
:
...
...
@@ -154,14 +211,18 @@ def create_tf_example(image,
if
x
+
width
>
image_width
or
y
+
height
>
image_height
:
num_annotations_skipped
+=
1
continue
category_id
=
int
(
object_annotations
[
'category_id'
])
category_name
=
category_index
[
category_id
][
'name'
].
encode
(
'utf8'
)
if
remove_non_person_annotations
and
category_name
!=
b
'person'
:
num_annotations_skipped
+=
1
continue
xmin
.
append
(
float
(
x
)
/
image_width
)
xmax
.
append
(
float
(
x
+
width
)
/
image_width
)
ymin
.
append
(
float
(
y
)
/
image_height
)
ymax
.
append
(
float
(
y
+
height
)
/
image_height
)
is_crowd
.
append
(
object_annotations
[
'iscrowd'
])
category_id
=
int
(
object_annotations
[
'category_id'
])
category_ids
.
append
(
category_id
)
category_names
.
append
(
category_
index
[
category_id
][
'name'
].
encode
(
'utf8'
)
)
category_names
.
append
(
category_
name
)
area
.
append
(
object_annotations
[
'area'
])
if
include_masks
:
...
...
@@ -197,6 +258,40 @@ def create_tf_example(image,
keypoints_visibility
.
extend
([
0
]
*
len
(
_COCO_KEYPOINT_NAMES
))
keypoints_name
.
extend
(
_COCO_KEYPOINT_NAMES
)
num_keypoints
.
append
(
0
)
if
include_densepose
:
annotation_id
=
object_annotations
[
'id'
]
if
(
annotation_id
in
densepose_annotations_dict
and
all
(
key
in
densepose_annotations_dict
[
annotation_id
]
for
key
in
densepose_keys
)):
dp_annotations
=
densepose_annotations_dict
[
annotation_id
]
num_densepose_annotation_used
+=
1
dp_num_points
.
append
(
len
(
dp_annotations
[
'dp_I'
]))
dp_part_index
.
extend
([
int
(
i
-
_DP_PART_ID_OFFSET
)
for
i
in
dp_annotations
[
'dp_I'
]])
# DensePose surface coordinates are defined on a [256, 256] grid
# relative to each instance box (i.e. absolute coordinates in range
# [0., 256.]). The following converts the coordinates
# so that they are expressed in normalized image coordinates.
dp_x_box_rel
=
[
clip_to_unit
(
val
/
256.
)
for
val
in
dp_annotations
[
'dp_x'
]]
dp_x_norm
=
[(
float
(
x
)
+
x_box_rel
*
width
)
/
image_width
for
x_box_rel
in
dp_x_box_rel
]
dp_y_box_rel
=
[
clip_to_unit
(
val
/
256.
)
for
val
in
dp_annotations
[
'dp_y'
]]
dp_y_norm
=
[(
float
(
y
)
+
y_box_rel
*
height
)
/
image_height
for
y_box_rel
in
dp_y_box_rel
]
dp_x
.
extend
(
dp_x_norm
)
dp_y
.
extend
(
dp_y_norm
)
dp_u
.
extend
(
dp_annotations
[
'dp_U'
])
dp_v
.
extend
(
dp_annotations
[
'dp_V'
])
else
:
dp_num_points
.
append
(
0
)
if
(
remove_non_person_images
and
not
any
(
name
==
b
'person'
for
name
in
category_names
)):
return
(
key
,
None
,
num_annotations_skipped
,
num_keypoint_annotation_skipped
,
num_densepose_annotation_skipped
)
feature_dict
=
{
'image/height'
:
dataset_util
.
int64_feature
(
image_height
),
...
...
@@ -243,15 +338,34 @@ def create_tf_example(image,
dataset_util
.
bytes_list_feature
(
keypoints_name
))
num_keypoint_annotation_skipped
=
(
len
(
keypoint_annotations_dict
)
-
num_keypoint_annotation_used
)
if
include_densepose
:
feature_dict
[
'image/object/densepose/num'
]
=
(
dataset_util
.
int64_list_feature
(
dp_num_points
))
feature_dict
[
'image/object/densepose/part_index'
]
=
(
dataset_util
.
int64_list_feature
(
dp_part_index
))
feature_dict
[
'image/object/densepose/x'
]
=
(
dataset_util
.
float_list_feature
(
dp_x
))
feature_dict
[
'image/object/densepose/y'
]
=
(
dataset_util
.
float_list_feature
(
dp_y
))
feature_dict
[
'image/object/densepose/u'
]
=
(
dataset_util
.
float_list_feature
(
dp_u
))
feature_dict
[
'image/object/densepose/v'
]
=
(
dataset_util
.
float_list_feature
(
dp_v
))
num_densepose_annotation_skipped
=
(
len
(
densepose_annotations_dict
)
-
num_densepose_annotation_used
)
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
feature_dict
))
return
key
,
example
,
num_annotations_skipped
,
num_keypoint_annotation_skipped
return
(
key
,
example
,
num_annotations_skipped
,
num_keypoint_annotation_skipped
,
num_densepose_annotation_skipped
)
def
_create_tf_record_from_coco_annotations
(
annotations_file
,
image_dir
,
output_path
,
include_masks
,
num_shards
,
keypoint_annotations_file
=
''
):
keypoint_annotations_file
=
''
,
densepose_annotations_file
=
''
,
remove_non_person_annotations
=
False
,
remove_non_person_images
=
False
):
"""Loads COCO annotation json files and converts to tf.Record format.
Args:
...
...
@@ -264,6 +378,12 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
keypoint_annotations_file: JSON file containing the person keypoint
annotations. If empty, then no person keypoint annotations will be
generated.
densepose_annotations_file: JSON file containing the DensePose annotations.
If empty, then no DensePose annotations will be generated.
remove_non_person_annotations: Whether to remove any annotations that are
not the "person" class.
remove_non_person_images: Whether to remove any images that do not contain
at least one "person" annotation.
"""
with
contextlib2
.
ExitStack
()
as
tf_record_close_stack
,
\
tf
.
gfile
.
GFile
(
annotations_file
,
'r'
)
as
fid
:
...
...
@@ -288,7 +408,8 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
if
image_id
not
in
annotations_index
:
missing_annotation_count
+=
1
annotations_index
[
image_id
]
=
[]
logging
.
info
(
'%d images are missing annotations.'
,
missing_annotation_count
)
logging
.
info
(
'%d images are missing annotations.'
,
missing_annotation_count
)
keypoint_annotations_index
=
{}
if
keypoint_annotations_file
:
...
...
@@ -301,8 +422,20 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
keypoint_annotations_index
[
image_id
]
=
{}
keypoint_annotations_index
[
image_id
][
annotation
[
'id'
]]
=
annotation
densepose_annotations_index
=
{}
if
densepose_annotations_file
:
with
tf
.
gfile
.
GFile
(
densepose_annotations_file
,
'r'
)
as
fid
:
densepose_groundtruth_data
=
json
.
load
(
fid
)
if
'annotations'
in
densepose_groundtruth_data
:
for
annotation
in
densepose_groundtruth_data
[
'annotations'
]:
image_id
=
annotation
[
'image_id'
]
if
image_id
not
in
densepose_annotations_index
:
densepose_annotations_index
[
image_id
]
=
{}
densepose_annotations_index
[
image_id
][
annotation
[
'id'
]]
=
annotation
total_num_annotations_skipped
=
0
total_num_keypoint_annotations_skipped
=
0
total_num_densepose_annotations_skipped
=
0
for
idx
,
image
in
enumerate
(
images
):
if
idx
%
100
==
0
:
logging
.
info
(
'On image %d of %d'
,
idx
,
len
(
images
))
...
...
@@ -312,19 +445,31 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
keypoint_annotations_dict
=
{}
if
image
[
'id'
]
in
keypoint_annotations_index
:
keypoint_annotations_dict
=
keypoint_annotations_index
[
image
[
'id'
]]
(
_
,
tf_example
,
num_annotations_skipped
,
num_keypoint_annotations_skipped
)
=
create_tf_example
(
densepose_annotations_dict
=
None
if
densepose_annotations_file
:
densepose_annotations_dict
=
{}
if
image
[
'id'
]
in
densepose_annotations_index
:
densepose_annotations_dict
=
densepose_annotations_index
[
image
[
'id'
]]
(
_
,
tf_example
,
num_annotations_skipped
,
num_keypoint_annotations_skipped
,
num_densepose_annotations_skipped
)
=
create_tf_example
(
image
,
annotations_list
,
image_dir
,
category_index
,
include_masks
,
keypoint_annotations_dict
)
keypoint_annotations_dict
,
densepose_annotations_dict
,
remove_non_person_annotations
,
remove_non_person_images
)
total_num_annotations_skipped
+=
num_annotations_skipped
total_num_keypoint_annotations_skipped
+=
num_keypoint_annotations_skipped
total_num_densepose_annotations_skipped
+=
(
num_densepose_annotations_skipped
)
shard_idx
=
idx
%
num_shards
output_tfrecords
[
shard_idx
].
write
(
tf_example
.
SerializeToString
())
if
tf_example
:
output_tfrecords
[
shard_idx
].
write
(
tf_example
.
SerializeToString
())
logging
.
info
(
'Finished writing, skipped %d annotations.'
,
total_num_annotations_skipped
)
if
keypoint_annotations_file
:
logging
.
info
(
'Finished writing, skipped %d keypoint annotations.'
,
total_num_keypoint_annotations_skipped
)
if
densepose_annotations_file
:
logging
.
info
(
'Finished writing, skipped %d DensePose annotations.'
,
total_num_densepose_annotations_skipped
)
def
main
(
_
):
...
...
@@ -347,20 +492,26 @@ def main(_):
train_output_path
,
FLAGS
.
include_masks
,
num_shards
=
100
,
keypoint_annotations_file
=
FLAGS
.
train_keypoint_annotations_file
)
keypoint_annotations_file
=
FLAGS
.
train_keypoint_annotations_file
,
densepose_annotations_file
=
FLAGS
.
train_densepose_annotations_file
,
remove_non_person_annotations
=
FLAGS
.
remove_non_person_annotations
,
remove_non_person_images
=
FLAGS
.
remove_non_person_images
)
_create_tf_record_from_coco_annotations
(
FLAGS
.
val_annotations_file
,
FLAGS
.
val_image_dir
,
val_output_path
,
FLAGS
.
include_masks
,
num_shards
=
100
,
keypoint_annotations_file
=
FLAGS
.
val_keypoint_annotations_file
)
num_shards
=
50
,
keypoint_annotations_file
=
FLAGS
.
val_keypoint_annotations_file
,
densepose_annotations_file
=
FLAGS
.
val_densepose_annotations_file
,
remove_non_person_annotations
=
FLAGS
.
remove_non_person_annotations
,
remove_non_person_images
=
FLAGS
.
remove_non_person_images
)
_create_tf_record_from_coco_annotations
(
FLAGS
.
testdev_annotations_file
,
FLAGS
.
test_image_dir
,
testdev_output_path
,
FLAGS
.
include_masks
,
num_shards
=
10
0
)
num_shards
=
5
0
)
if
__name__
==
'__main__'
:
...
...
research/object_detection/dataset_tools/create_coco_tf_record_test.py
View file @
31ca3b97
...
...
@@ -89,7 +89,7 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
}
(
_
,
example
,
num_annotations_skipped
,
_
)
=
create_coco_tf_record
.
create_tf_example
(
num_annotations_skipped
,
_
,
_
)
=
create_coco_tf_record
.
create_tf_example
(
image
,
annotations_list
,
image_dir
,
category_index
)
self
.
assertEqual
(
num_annotations_skipped
,
0
)
...
...
@@ -156,7 +156,7 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
}
(
_
,
example
,
num_annotations_skipped
,
_
)
=
create_coco_tf_record
.
create_tf_example
(
num_annotations_skipped
,
_
,
_
)
=
create_coco_tf_record
.
create_tf_example
(
image
,
annotations_list
,
image_dir
,
category_index
,
include_masks
=
True
)
self
.
assertEqual
(
num_annotations_skipped
,
0
)
...
...
@@ -259,14 +259,14 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
}
}
(
_
,
example
,
_
,
num_keypoint_annotation_skipped
)
=
create_coco_tf_record
.
create_tf_example
(
image
,
annotations_list
,
image_dir
,
category_index
,
include_masks
=
False
,
keypoint_annotations_dict
=
keypoint_annotations_dict
)
_
,
example
,
_
,
num_keypoint_annotation_skipped
,
_
=
(
create_coco_tf_record
.
create_tf_example
(
image
,
annotations_list
,
image_dir
,
category_index
,
include_masks
=
False
,
keypoint_annotations_dict
=
keypoint_annotations_dict
)
)
self
.
assertEqual
(
num_keypoint_annotation_skipped
,
0
)
self
.
_assertProtoEqual
(
...
...
@@ -310,6 +310,132 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
example
.
features
.
feature
[
'image/object/keypoint/visibility'
].
int64_list
.
value
,
vv
)
def
test_create_tf_example_with_dense_pose
(
self
):
image_dir
=
self
.
get_temp_dir
()
image_file_name
=
'tmp_image.jpg'
image_data
=
np
.
random
.
randint
(
low
=
0
,
high
=
256
,
size
=
(
256
,
256
,
3
)).
astype
(
np
.
uint8
)
save_path
=
os
.
path
.
join
(
image_dir
,
image_file_name
)
image
=
PIL
.
Image
.
fromarray
(
image_data
,
'RGB'
)
image
.
save
(
save_path
)
image
=
{
'file_name'
:
image_file_name
,
'height'
:
256
,
'width'
:
256
,
'id'
:
11
,
}
min_x
,
min_y
=
64
,
64
max_x
,
max_y
=
128
,
128
keypoints
=
[]
num_visible_keypoints
=
0
xv
=
[]
yv
=
[]
vv
=
[]
for
_
in
range
(
17
):
xc
=
min_x
+
int
(
np
.
random
.
rand
()
*
(
max_x
-
min_x
))
yc
=
min_y
+
int
(
np
.
random
.
rand
()
*
(
max_y
-
min_y
))
vis
=
np
.
random
.
randint
(
0
,
3
)
xv
.
append
(
xc
)
yv
.
append
(
yc
)
vv
.
append
(
vis
)
keypoints
.
extend
([
xc
,
yc
,
vis
])
num_visible_keypoints
+=
(
vis
>
0
)
annotations_list
=
[{
'area'
:
0.5
,
'iscrowd'
:
False
,
'image_id'
:
11
,
'bbox'
:
[
64
,
64
,
128
,
128
],
'category_id'
:
1
,
'id'
:
1000
}]
num_points
=
45
dp_i
=
np
.
random
.
randint
(
1
,
25
,
(
num_points
,)).
astype
(
np
.
float32
)
dp_u
=
np
.
random
.
randn
(
num_points
)
dp_v
=
np
.
random
.
randn
(
num_points
)
dp_x
=
np
.
random
.
rand
(
num_points
)
*
256.
dp_y
=
np
.
random
.
rand
(
num_points
)
*
256.
densepose_annotations_dict
=
{
1000
:
{
'dp_I'
:
dp_i
,
'dp_U'
:
dp_u
,
'dp_V'
:
dp_v
,
'dp_x'
:
dp_x
,
'dp_y'
:
dp_y
,
'bbox'
:
[
64
,
64
,
128
,
128
],
}
}
category_index
=
{
1
:
{
'name'
:
'person'
,
'id'
:
1
}
}
_
,
example
,
_
,
_
,
num_densepose_annotation_skipped
=
(
create_coco_tf_record
.
create_tf_example
(
image
,
annotations_list
,
image_dir
,
category_index
,
include_masks
=
False
,
densepose_annotations_dict
=
densepose_annotations_dict
))
self
.
assertEqual
(
num_densepose_annotation_skipped
,
0
)
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/height'
].
int64_list
.
value
,
[
256
])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/width'
].
int64_list
.
value
,
[
256
])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/filename'
].
bytes_list
.
value
,
[
six
.
b
(
image_file_name
)])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/source_id'
].
bytes_list
.
value
,
[
six
.
b
(
str
(
image
[
'id'
]))])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/format'
].
bytes_list
.
value
,
[
six
.
b
(
'jpeg'
)])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/object/bbox/xmin'
].
float_list
.
value
,
[
0.25
])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/object/bbox/ymin'
].
float_list
.
value
,
[
0.25
])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/object/bbox/xmax'
].
float_list
.
value
,
[
0.75
])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/object/bbox/ymax'
].
float_list
.
value
,
[
0.75
])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/object/class/text'
].
bytes_list
.
value
,
[
six
.
b
(
'person'
)])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/object/densepose/num'
].
int64_list
.
value
,
[
num_points
])
self
.
assertAllEqual
(
example
.
features
.
feature
[
'image/object/densepose/part_index'
].
int64_list
.
value
,
dp_i
.
astype
(
np
.
int64
)
-
create_coco_tf_record
.
_DP_PART_ID_OFFSET
)
self
.
assertAllClose
(
example
.
features
.
feature
[
'image/object/densepose/u'
].
float_list
.
value
,
dp_u
)
self
.
assertAllClose
(
example
.
features
.
feature
[
'image/object/densepose/v'
].
float_list
.
value
,
dp_v
)
expected_dp_x
=
(
64
+
dp_x
*
128.
/
256.
)
/
256.
expected_dp_y
=
(
64
+
dp_y
*
128.
/
256.
)
/
256.
self
.
assertAllClose
(
example
.
features
.
feature
[
'image/object/densepose/x'
].
float_list
.
value
,
expected_dp_x
)
self
.
assertAllClose
(
example
.
features
.
feature
[
'image/object/densepose/y'
].
float_list
.
value
,
expected_dp_y
)
def
test_create_sharded_tf_record
(
self
):
tmp_dir
=
self
.
get_temp_dir
()
image_paths
=
[
'tmp1_image.jpg'
,
'tmp2_image.jpg'
]
...
...
Prev
1
…
8
9
10
11
12
13
14
15
16
…
20
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment