Unverified Commit 451906e4 authored by pkulzc's avatar pkulzc Committed by GitHub
Browse files

Release MobileDet code and model, and require tf_slim installation for OD API. (#8562)



* Merged commit includes the following changes:
311933687  by Sergio Guadarrama:

    Removes spurios use of tf.compat.v2, which results in spurious tf.compat.v1.compat.v2. Adds basic test to nasnet_utils.
    Replaces all remaining import tensorflow as tf with import tensorflow.compat.v1 as tf

--
311766063  by Sergio Guadarrama:

    Removes explicit tf.compat.v1 in all call sites (we already import tf.compat.v1, so this code was  doing tf.compat.v1.compat.v1). The existing code worked in latest version of tensorflow, 2.2, (and 1.15) but not in 1.14 or in 2.0.0a, this CL fixes it.

--
311624958  by Sergio Guadarrama:

    Updates README that doesn't render properly in github documentation

--
310980959  by Sergio Guadarrama:

    Moves research_models/slim off tf.contrib.slim/layers/framework to tf_slim

--
310263156  by Sergio Guadarrama:

    Adds model breakdown for MobilenetV3

--
308640516  by Sergio Guadarrama:

    Internal change

308244396  by Sergio Guadarrama:

    GroupNormalization support for MobilenetV3.

--
307475800  by Sergio Guadarrama:

    Internal change

--
302077708  by Sergio Guadarrama:

    Remove `disable_tf2` behavior from slim py_library targets

--
301208453  by Sergio Guadarrama:

    Automated refactoring to make code Python 3 compatible.

--
300816672  by Sergio Guadarrama:

    Internal change

299433840  by Sergio Guadarrama:

    Internal change

299221609  by Sergio Guadarrama:

    Explicitly disable Tensorflow v2 behaviors for all TF1.x binaries and tests

--
299179617  by Sergio Guadarrama:

    Internal change

299040784  by Sergio Guadarrama:

    Internal change

299036699  by Sergio Guadarrama:

    Internal change

298736510  by Sergio Guadarrama:

    Internal change

298732599  by Sergio Guadarrama:

    Internal change

298729507  by Sergio Guadarrama:

    Internal change

298253328  by Sergio Guadarrama:

    Internal change

297788346  by Sergio Guadarrama:

    Internal change

297785278  by Sergio Guadarrama:

    Internal change

297783127  by Sergio Guadarrama:

    Internal change

297725870  by Sergio Guadarrama:

    Internal change

297721811  by Sergio Guadarrama:

    Internal change

297711347  by Sergio Guadarrama:

    Internal change

297708059  by Sergio Guadarrama:

    Internal change

297701831  by Sergio Guadarrama:

    Internal change

297700038  by Sergio Guadarrama:

    Internal change

297670468  by Sergio Guadarrama:

    Internal change.

--
297350326  by Sergio Guadarrama:

    Explicitly replace "import tensorflow" with "tensorflow.compat.v1" for TF2.x migration

--
297201668  by Sergio Guadarrama:

    Explicitly replace "import tensorflow" with "tensorflow.compat.v1" for TF2.x migration

--
294483372  by Sergio Guadarrama:

    Internal change

PiperOrigin-RevId: 311933687

* Merged commit includes the following changes:
312578615  by Menglong Zhu:

    Modify the LSTM feature extractors to be python 3 compatible.

--
311264357  by Menglong Zhu:

    Removes contrib.slim

--
308957207  by Menglong Zhu:

    Automated refactoring to make code Python 3 compatible.

--
306976470  by yongzhe:

    Internal change

306777559  by Menglong Zhu:

    Internal change

--
299232507  by lzyuan:

    Internal update.

--
299221735  by lzyuan:

    Add small epsilon on max_range for quantize_op to prevent range collapse.

--

PiperOrigin-RevId: 312578615

* Merged commit includes the following changes:
310447280  by lzc:

    Internal changes.

--

PiperOrigin-RevId: 310447280
Co-authored-by: default avatarSergio Guadarrama <sguada@google.com>
Co-authored-by: default avatarMenglong Zhu <menglong@google.com>
parent 73b5be67
......@@ -15,7 +15,7 @@
"""Tests for object_detection.core.matcher."""
import numpy as np
import tensorflow as tf
import tensorflow.compat.v1 as tf
from object_detection.core import matcher
from object_detection.utils import test_case
......
......@@ -36,7 +36,7 @@ from abc import ABCMeta
from abc import abstractmethod
import six
import tensorflow as tf
import tensorflow.compat.v1 as tf
from object_detection.utils import ops
......
......@@ -16,7 +16,7 @@
"""Tests for google3.research.vale.object_detection.minibatch_sampler."""
import numpy as np
import tensorflow as tf
import tensorflow.compat.v1 as tf
from object_detection.core import minibatch_sampler
from object_detection.utils import test_case
......
......@@ -60,7 +60,7 @@ from __future__ import print_function
import abc
import six
import tensorflow as tf
import tensorflow.compat.v1 as tf
from object_detection.core import standard_fields as fields
......@@ -68,7 +68,7 @@ from object_detection.core import standard_fields as fields
# If using a new enough version of TensorFlow, detection models should be a
# tf module or keras model for tracking.
try:
_BaseClass = tf.Module
_BaseClass = tf.keras.layers.Layer
except AttributeError:
_BaseClass = object
......@@ -90,6 +90,8 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
self._num_classes = num_classes
self._groundtruth_lists = {}
super(DetectionModel, self).__init__()
@property
def num_classes(self):
return self._num_classes
......@@ -295,6 +297,7 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
groundtruth_weights_list=None,
groundtruth_confidences_list=None,
groundtruth_is_crowd_list=None,
groundtruth_group_of_list=None,
groundtruth_area_list=None,
is_annotated_list=None,
groundtruth_labeled_classes=None):
......@@ -328,14 +331,16 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
boxes.
groundtruth_is_crowd_list: A list of 1-D tf.bool tensors of shape
[num_boxes] containing is_crowd annotations.
groundtruth_group_of_list: A list of 1-D tf.bool tensors of shape
[num_boxes] containing group_of annotations.
groundtruth_area_list: A list of 1-D tf.float32 tensors of shape
[num_boxes] containing the area (in the original absolute coordinates)
of the annotations.
is_annotated_list: A list of scalar tf.bool tensors indicating whether
images have been labeled or not.
groundtruth_labeled_classes: A list of 1-D tf.float32 tensors of shape
[num_classes], containing label indices (1-indexed) of the classes that
are exhaustively annotated.
[num_classes], containing label indices encoded as k-hot of the classes
that are exhaustively annotated.
"""
self._groundtruth_lists[fields.BoxListFields.boxes] = groundtruth_boxes_list
self._groundtruth_lists[
......@@ -359,6 +364,9 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
if groundtruth_is_crowd_list:
self._groundtruth_lists[
fields.BoxListFields.is_crowd] = groundtruth_is_crowd_list
if groundtruth_group_of_list:
self._groundtruth_lists[
fields.BoxListFields.group_of] = groundtruth_group_of_list
if groundtruth_area_list:
self._groundtruth_lists[
fields.InputDataFields.groundtruth_area] = groundtruth_area_list
......@@ -418,3 +426,20 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
A list of update operators.
"""
pass
def call(self, images):
"""Returns detections from a batch of images.
This method calls the preprocess, predict and postprocess function
sequentially and returns the output.
Args:
images: a [batch_size, height, width, channels] float tensor.
Returns:
detetcions: The dict of tensors returned by the postprocess function.
"""
preprocessed_images, shapes = self.preprocess(images)
prediction_dict = self.predict(preprocessed_images, shapes)
return self.postprocess(prediction_dict, shapes)
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for model API."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow.compat.v1 as tf
from object_detection.core import model
from object_detection.utils import test_case
class FakeModel(model.DetectionModel):
def __init__(self):
# sub-networks containing weights of different shapes.
self._network1 = tf.keras.Sequential([
tf.keras.layers.Conv2D(8, 1)
])
self._network2 = tf.keras.Sequential([
tf.keras.layers.Conv2D(16, 1)
])
super(FakeModel, self).__init__(num_classes=0)
def preprocess(self, images):
return images, tf.shape(images)
def predict(self, images, shapes):
return {'prediction': self._network2(self._network1(images))}
def postprocess(self, prediction_dict, shapes):
return prediction_dict
def loss(self):
return tf.constant(0.0)
def updates(self):
return []
def restore_map(self):
return {}
def regularization_losses(self):
return []
class ModelTest(test_case.TestCase):
def test_model_call(self):
detection_model = FakeModel()
def graph_fn():
return detection_model(tf.zeros((1, 128, 128, 3)))
result = self.execute(graph_fn, [])
self.assertEqual(result['prediction'].shape,
(1, 128, 128, 16))
def test_freeze(self):
detection_model = FakeModel()
detection_model(tf.zeros((1, 128, 128, 3)))
net1_var_shapes = [tuple(var.get_shape().as_list()) for var in
detection_model._network1.trainable_variables]
del detection_model
detection_model = FakeModel()
detection_model._network2.trainable = False
detection_model(tf.zeros((1, 128, 128, 3)))
var_shapes = [tuple(var.get_shape().as_list()) for var in
detection_model._network1.trainable_variables]
self.assertEqual(set(net1_var_shapes), set(var_shapes))
if __name__ == '__main__':
tf.test.main()
......@@ -15,7 +15,7 @@
"""Tests for tensorflow_models.object_detection.core.post_processing."""
import numpy as np
import tensorflow as tf
import tensorflow.compat.v1 as tf
from object_detection.core import post_processing
from object_detection.core import standard_fields as fields
from object_detection.utils import test_case
......
......@@ -22,7 +22,7 @@ import collections
import numpy as np
from six.moves import range
from six.moves import zip
import tensorflow as tf
import tensorflow.compat.v1 as tf
from object_detection.core import box_list
from object_detection.core import box_list_ops
......@@ -396,6 +396,7 @@ def multiclass_non_max_suppression(boxes,
use_partitioned_nms=False,
additional_fields=None,
soft_nms_sigma=0.0,
use_hard_nms=False,
scope=None):
"""Multi-class version of non maximum suppression.
......@@ -450,6 +451,7 @@ def multiclass_non_max_suppression(boxes,
`soft_nms_sigma=0.0` (which is default), we fall back to standard (hard)
NMS. Soft NMS is currently only supported when pad_to_max_output_size is
False.
use_hard_nms: Enforce the usage of hard NMS.
scope: name scope.
Returns:
......@@ -536,7 +538,7 @@ def multiclass_non_max_suppression(boxes,
max_selection_size = tf.minimum(max_size_per_class,
boxlist_and_class_scores.num_boxes())
if (hasattr(tf.image, 'non_max_suppression_with_scores') and
tf.compat.forward_compatible(2019, 6, 6)):
tf.compat.forward_compatible(2019, 6, 6) and not use_hard_nms):
(selected_indices, selected_scores
) = tf.image.non_max_suppression_with_scores(
boxlist_and_class_scores.get(),
......@@ -852,7 +854,8 @@ def batch_multiclass_non_max_suppression(boxes,
use_class_agnostic_nms=False,
max_classes_per_detection=1,
use_dynamic_map_fn=False,
use_combined_nms=False):
use_combined_nms=False,
use_hard_nms=False):
"""Multi-class version of non maximum suppression that operates on a batch.
This op is similar to `multiclass_non_max_suppression` but operates on a batch
......@@ -923,6 +926,7 @@ def batch_multiclass_non_max_suppression(boxes,
calling this function.
Masks and additional fields are not supported.
See argument checks in the code below for unsupported arguments.
use_hard_nms: Enforce the usage of hard NMS.
Returns:
'nmsed_boxes': A [batch_size, max_detections, 4] float32 tensor
......@@ -966,11 +970,9 @@ def batch_multiclass_non_max_suppression(boxes,
'clip_window is not supported by combined_nms unless it is'
' [0. 0. 1. 1.] for each image.')
if additional_fields is not None:
tf.logging.warning(
'additional_fields is not supported by combined_nms.')
tf.logging.warning('additional_fields is not supported by combined_nms.')
if parallel_iterations != 32:
tf.logging.warning(
'Number of batch items to be processed in parallel is'
tf.logging.warning('Number of batch items to be processed in parallel is'
' not configurable by combined_nms.')
if max_classes_per_detection > 1:
tf.logging.warning(
......@@ -1009,7 +1011,7 @@ def batch_multiclass_non_max_suppression(boxes,
# in _single_image_nms_fn(). The dictionary is thus a sorted version of
# additional_fields.
if additional_fields is None:
ordered_additional_fields = {}
ordered_additional_fields = collections.OrderedDict()
else:
ordered_additional_fields = collections.OrderedDict(
sorted(additional_fields.items(), key=lambda item: item[0]))
......@@ -1159,7 +1161,8 @@ def batch_multiclass_non_max_suppression(boxes,
pad_to_max_output_size=use_static_shapes,
use_partitioned_nms=use_partitioned_nms,
additional_fields=per_image_additional_fields,
soft_nms_sigma=soft_nms_sigma)
soft_nms_sigma=soft_nms_sigma,
use_hard_nms=use_hard_nms)
if not use_static_shapes:
nmsed_boxlist = box_list_ops.pad_or_clip_box_list(
......
......@@ -14,7 +14,7 @@
# ==============================================================================
"""Provides functions to prefetch tensors to feed into models."""
import tensorflow as tf
import tensorflow.compat.v1 as tf
from object_detection.utils import tf_version
if not tf_version.is_tf1():
......
......@@ -19,12 +19,12 @@ from __future__ import division
from __future__ import print_function
from six.moves import range
import tensorflow as tf
import tensorflow.compat.v1 as tf
# pylint: disable=g-bad-import-order,
from object_detection.core import prefetcher
from tensorflow.contrib import slim as contrib_slim
slim = contrib_slim
import tf_slim as slim
# pylint: disable=g-bad-import-order
......
......@@ -74,7 +74,7 @@ import sys
import six
from six.moves import range
from six.moves import zip
import tensorflow as tf
import tensorflow.compat.v1 as tf
from tensorflow.python.ops import control_flow_ops
from object_detection.core import box_list
......@@ -1603,6 +1603,7 @@ def random_crop_image(image,
def random_pad_image(image,
boxes,
masks=None,
keypoints=None,
min_image_size=None,
max_image_size=None,
......@@ -1625,6 +1626,9 @@ def random_pad_image(image,
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
masks: (optional) rank 3 float32 tensor with shape
[N, height, width] containing instance masks. The masks
are of the same height, width as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape
[N, num_keypoints, 2]. The keypoints are in y-x normalized
coordinates.
......@@ -1648,6 +1652,8 @@ def random_pad_image(image,
boxes: boxes which is the same rank as input boxes. Boxes are in normalized
form.
if masks is not None, the function also returns:
masks: rank 3 float32 tensor with shape [N, new_height, new_width]
if keypoints is not None, the function also returns:
keypoints: rank 3 float32 tensor with shape [N, num_keypoints, 2]
"""
......@@ -1728,6 +1734,15 @@ def random_pad_image(image,
result = [new_image, new_boxes]
if masks is not None:
new_masks = tf.image.pad_to_bounding_box(
masks[:, :, :, tf.newaxis],
offset_height=offset_height,
offset_width=offset_width,
target_height=target_height,
target_width=target_width)[:, :, :, 0]
result.append(new_masks)
if keypoints is not None:
new_keypoints = keypoint_ops.change_coordinate_frame(keypoints, new_window)
result.append(new_keypoints)
......@@ -1737,6 +1752,7 @@ def random_pad_image(image,
def random_absolute_pad_image(image,
boxes,
masks=None,
keypoints=None,
max_height_padding=None,
max_width_padding=None,
......@@ -1756,6 +1772,9 @@ def random_absolute_pad_image(image,
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
masks: (optional) rank 3 float32 tensor with shape
[N, height, width] containing instance masks. The masks
are of the same height, width as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape
[N, num_keypoints, 2]. The keypoints are in y-x normalized
coordinates.
......@@ -1778,6 +1797,10 @@ def random_absolute_pad_image(image,
image: Image shape will be [new_height, new_width, channels].
boxes: boxes which is the same rank as input boxes. Boxes are in normalized
form.
if masks is not None, the function also returns:
masks: rank 3 float32 tensor with shape [N, new_height, new_width]
if keypoints is not None, the function also returns:
keypoints: rank 3 float32 tensor with shape [N, num_keypoints, 2]
"""
min_image_size = tf.shape(image)[:2]
max_image_size = min_image_size + tf.cast(
......@@ -1785,6 +1808,7 @@ def random_absolute_pad_image(image,
return random_pad_image(
image,
boxes,
masks=masks,
keypoints=keypoints,
min_image_size=min_image_size,
max_image_size=max_image_size,
......@@ -4060,10 +4084,12 @@ def get_default_func_arg_map(include_label_weights=True,
groundtruth_keypoint_visibilities),
random_pad_image:
(fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes, groundtruth_keypoints),
fields.InputDataFields.groundtruth_boxes, groundtruth_instance_masks,
groundtruth_keypoints),
random_absolute_pad_image:
(fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes, groundtruth_keypoints),
fields.InputDataFields.groundtruth_boxes, groundtruth_instance_masks,
groundtruth_keypoints),
random_crop_pad_image: (fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_classes,
......
......@@ -24,7 +24,7 @@ import numpy as np
import six
from six.moves import range
from six.moves import zip
import tensorflow as tf
import tensorflow.compat.v1 as tf
from object_detection.core import preprocessor
from object_detection.core import preprocessor_cache
......@@ -2268,7 +2268,7 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
self.assertTrue(np.all((boxes_[:, 3] - boxes_[:, 1]) >= (
padded_boxes_[:, 3] - padded_boxes_[:, 1])))
def testRandomPadImageWithKeypoints(self):
def testRandomPadImageWithKeypointsAndMasks(self):
def graph_fn():
preprocessing_options = [(preprocessor.normalize_image, {
'original_minval': 0,
......@@ -2280,45 +2280,57 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
images = self.createTestImages()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
masks = self.createTestMasks()
keypoints, _ = self.createTestKeypoints()
tensor_dict = {
fields.InputDataFields.image: images,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels,
fields.InputDataFields.groundtruth_instance_masks: masks,
fields.InputDataFields.groundtruth_keypoints: keypoints,
}
tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
images = tensor_dict[fields.InputDataFields.image]
preprocessing_options = [(preprocessor.random_pad_image, {})]
func_arg_map = preprocessor.get_default_func_arg_map(
include_instance_masks=True,
include_keypoints=True,
include_keypoint_visibilities=True)
padded_tensor_dict = preprocessor.preprocess(tensor_dict,
preprocessing_options)
preprocessing_options,
func_arg_map=func_arg_map)
padded_images = padded_tensor_dict[fields.InputDataFields.image]
padded_boxes = padded_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
padded_masks = padded_tensor_dict[
fields.InputDataFields.groundtruth_instance_masks]
padded_keypoints = padded_tensor_dict[
fields.InputDataFields.groundtruth_keypoints]
boxes_shape = tf.shape(boxes)
padded_boxes_shape = tf.shape(padded_boxes)
padded_masks_shape = tf.shape(padded_masks)
keypoints_shape = tf.shape(keypoints)
padded_keypoints_shape = tf.shape(padded_keypoints)
images_shape = tf.shape(images)
padded_images_shape = tf.shape(padded_images)
return [boxes_shape, padded_boxes_shape, keypoints_shape,
padded_keypoints_shape, images_shape, padded_images_shape, boxes,
padded_boxes, keypoints, padded_keypoints]
(boxes_shape_, padded_boxes_shape_, keypoints_shape_,
padded_keypoints_shape_, images_shape_, padded_images_shape_, boxes_,
padded_boxes_, keypoints_, padded_keypoints_) = self.execute_cpu(graph_fn,
[])
return [boxes_shape, padded_boxes_shape, padded_masks_shape,
keypoints_shape, padded_keypoints_shape, images_shape,
padded_images_shape, boxes, padded_boxes, keypoints,
padded_keypoints]
(boxes_shape_, padded_boxes_shape_, padded_masks_shape_,
keypoints_shape_, padded_keypoints_shape_, images_shape_,
padded_images_shape_, boxes_, padded_boxes_,
keypoints_, padded_keypoints_) = self.execute_cpu(graph_fn, [])
self.assertAllEqual(boxes_shape_, padded_boxes_shape_)
self.assertAllEqual(keypoints_shape_, padded_keypoints_shape_)
self.assertTrue((images_shape_[1] >= padded_images_shape_[1] * 0.5).all)
self.assertTrue((images_shape_[2] >= padded_images_shape_[2] * 0.5).all)
self.assertTrue((images_shape_[1] <= padded_images_shape_[1]).all)
self.assertTrue((images_shape_[2] <= padded_images_shape_[2]).all)
self.assertAllEqual(padded_masks_shape_[1:3], padded_images_shape_[1:3])
self.assertTrue(np.all((boxes_[:, 2] - boxes_[:, 0]) >= (
padded_boxes_[:, 2] - padded_boxes_[:, 0])))
self.assertTrue(np.all((boxes_[:, 3] - boxes_[:, 1]) >= (
......
......@@ -26,7 +26,7 @@ from abc import ABCMeta
from abc import abstractmethod
import six
import tensorflow as tf
import tensorflow.compat.v1 as tf
from object_detection.core import box_list_ops
from object_detection.core import standard_fields as fields
......
......@@ -14,7 +14,7 @@
# ==============================================================================
"""Tests for region_similarity_calculator."""
import tensorflow as tf
import tensorflow.compat.v1 as tf
from object_detection.core import box_list
from object_detection.core import region_similarity_calculator
......
......@@ -76,6 +76,9 @@ class InputDataFields(object):
context_features, used for reshaping.
valid_context_size: the valid context size, used in filtering the padded
context features.
image_format: format for the images, used to decode
image_height: height of images, used to decode
image_width: width of images, used to decode
"""
image = 'image'
image_additional_channels = 'image_additional_channels'
......@@ -112,6 +115,10 @@ class InputDataFields(object):
context_features = 'context_features'
context_feature_length = 'context_feature_length'
valid_context_size = 'valid_context_size'
image_timestamps = 'image_timestamps'
image_format = 'image_format'
image_height = 'image_height'
image_width = 'image_width'
class DetectionResultFields(object):
......@@ -182,6 +189,7 @@ class BoxListFields(object):
keypoint_visibilities = 'keypoint_visibilities'
keypoint_heatmaps = 'keypoint_heatmaps'
is_crowd = 'is_crowd'
group_of = 'group_of'
class PredictionFields(object):
......@@ -279,3 +287,14 @@ class TfExampleFields(object):
detection_bbox_ymax = 'image/detection/bbox/ymax'
detection_bbox_xmax = 'image/detection/bbox/xmax'
detection_score = 'image/detection/score'
# Sequence fields for SequenceExample inputs.
# All others are considered context fields.
SEQUENCE_FIELDS = [InputDataFields.image,
InputDataFields.source_id,
InputDataFields.groundtruth_boxes,
InputDataFields.num_groundtruth_boxes,
InputDataFields.groundtruth_classes,
InputDataFields.groundtruth_weights,
InputDataFields.source_id,
InputDataFields.is_annotated]
......@@ -37,7 +37,8 @@ from __future__ import print_function
from six.moves import range
from six.moves import zip
import tensorflow as tf
import tensorflow.compat.v1 as tf
import tensorflow.compat.v2 as tf2
from object_detection.box_coders import faster_rcnn_box_coder
from object_detection.box_coders import mean_stddev_box_coder
......@@ -54,6 +55,8 @@ from object_detection.utils import shape_utils
from object_detection.utils import target_assigner_utils as ta_utils
ResizeMethod = tf2.image.ResizeMethod
_DEFAULT_KEYPOINT_OFFSET_STD_DEV = 1.0
......
......@@ -15,7 +15,7 @@
"""Tests for object_detection.core.target_assigner."""
import numpy as np
import tensorflow as tf
import tensorflow.compat.v1 as tf
from object_detection.box_coders import keypoint_box_coder
from object_detection.box_coders import mean_stddev_box_coder
......@@ -1230,6 +1230,6 @@ class CreateTargetAssignerTest(tf.test.TestCase):
if __name__ == '__main__':
tf.enable_v2_behavior()
tf.test.main()
item {
id: 1
name: 'human'
}
item {
id: 2
name: 'gazelleGrants'
}
item {
id: 3
name: 'reedbuck'
}
item {
id: 4
name: 'dikDik'
}
item {
id: 5
name: 'zebra'
}
item {
id: 6
name: 'porcupine'
}
item {
id: 7
name: 'gazelleThomsons'
}
item {
id: 8
name: 'hyenaSpotted'
}
item {
id: 9
name: 'warthog'
}
item {
id: 10
name: 'impala'
}
item {
id: 11
name: 'elephant'
}
item {
id: 12
name: 'giraffe'
}
item {
id: 13
name: 'mongoose'
}
item {
id: 14
name: 'buffalo'
}
item {
id: 15
name: 'hartebeest'
}
item {
id: 16
name: 'guineaFowl'
}
item {
id: 17
name: 'wildebeest'
}
item {
id: 18
name: 'leopard'
}
item {
id: 19
name: 'ostrich'
}
item {
id: 20
name: 'lionFemale'
}
item {
id: 21
name: 'koriBustard'
}
item {
id: 22
name: 'otherBird'
}
item {
id: 23
name: 'batEaredFox'
}
item {
id: 24
name: 'bushbuck'
}
item {
id: 25
name: 'jackal'
}
item {
id: 26
name: 'cheetah'
}
item {
id: 27
name: 'eland'
}
item {
id: 28
name: 'aardwolf'
}
item {
id: 29
name: 'hippopotamus'
}
item {
id: 30
name: 'hyenaStriped'
}
item {
id: 31
name: 'aardvark'
}
item {
id: 32
name: 'hare'
}
item {
id: 33
name: 'baboon'
}
item {
id: 34
name: 'vervetMonkey'
}
item {
id: 35
name: 'waterbuck'
}
item {
id: 36
name: 'secretaryBird'
}
item {
id: 37
name: 'serval'
}
item {
id: 38
name: 'lionMale'
}
item {
id: 39
name: 'topi'
}
item {
id: 40
name: 'honeyBadger'
}
item {
id: 41
name: 'rodents'
}
item {
id: 42
name: 'wildcat'
}
item {
id: 43
name: 'civet'
}
item {
id: 44
name: 'genet'
}
item {
id: 45
name: 'caracal'
}
item {
id: 46
name: 'rhinoceros'
}
item {
id: 47
name: 'reptiles'
}
item {
id: 48
name: 'zorilla'
}
......@@ -24,8 +24,8 @@ from __future__ import print_function
import enum
import numpy as np
from six.moves import zip
import tensorflow as tf
import tensorflow.compat.v1 as tf
from tf_slim import tfexample_decoder as slim_example_decoder
from object_detection.core import data_decoder
from object_detection.core import standard_fields as fields
from object_detection.protos import input_reader_pb2
......@@ -34,12 +34,14 @@ from object_detection.utils import label_map_util
# pylint: disable=g-import-not-at-top
try:
from tensorflow.contrib import lookup as contrib_lookup
from tensorflow.contrib.slim import tfexample_decoder as slim_example_decoder
except ImportError:
# TF 2.0 doesn't ship with contrib.
pass
# pylint: enable=g-import-not-at-top
_LABEL_OFFSET = 1
class Visibility(enum.Enum):
"""Visibility definitions.
......@@ -167,7 +169,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
num_keypoints=0,
num_additional_channels=0,
load_multiclass_scores=False,
load_context_features=False):
load_context_features=False,
expand_hierarchy_labels=False):
"""Constructor sets keys_to_features and items_to_handlers.
Args:
......@@ -193,12 +196,18 @@ class TfExampleDecoder(data_decoder.DataDecoder):
boxes.
load_context_features: Whether to load information from context_features,
to provide additional context to a detection model for training and/or
inference
inference.
expand_hierarchy_labels: Expands the object and image labels taking into
account the provided hierarchy in the label_map_proto_file. For positive
classes, the labels are extended to ancestor. For negative classes,
the labels are expanded to descendants.
Raises:
ValueError: If `instance_mask_type` option is not one of
input_reader_pb2.DEFAULT, input_reader_pb2.NUMERICAL, or
input_reader_pb2.PNG_MASKS.
ValueError: If `expand_labels_hierarchy` is True, but the
`label_map_proto_file` is not provided.
"""
# TODO(rathodv): delete unused `use_display_name` argument once we change
# other decoders to handle label maps similarly.
......@@ -385,6 +394,20 @@ class TfExampleDecoder(data_decoder.DataDecoder):
self.items_to_handlers[
fields.InputDataFields.groundtruth_image_classes] = image_label_handler
self._expand_hierarchy_labels = expand_hierarchy_labels
self._ancestors_lut = None
self._descendants_lut = None
if expand_hierarchy_labels:
if label_map_proto_file:
ancestors_lut, descendants_lut = (
label_map_util.get_label_map_hierarchy_lut(label_map_proto_file,
True))
self._ancestors_lut = tf.constant(ancestors_lut, dtype=tf.int64)
self._descendants_lut = tf.constant(descendants_lut, dtype=tf.int64)
else:
raise ValueError('In order to expand labels, the label_map_proto_file '
'has to be provided.')
def decode(self, tf_example_string_tensor):
"""Decodes serialized tensorflow example and returns a tensor dictionary.
......@@ -432,7 +455,7 @@ class TfExampleDecoder(data_decoder.DataDecoder):
tensor of shape [None, num_keypoints] containing keypoint visibilites.
fields.InputDataFields.groundtruth_instance_masks - 3D float32 tensor of
shape [None, None, None] containing instance masks.
fields.InputDataFields.groundtruth_image_classes - 1D uint64 of shape
fields.InputDataFields.groundtruth_image_classes - 1D int64 of shape
[None] containing classes for the boxes.
fields.InputDataFields.multiclass_scores - 1D float32 tensor of shape
[None * num_classes] containing flattened multiclass scores for
......@@ -484,6 +507,46 @@ class TfExampleDecoder(data_decoder.DataDecoder):
tensor_dict[gt_kpt_fld],
np.nan * tf.ones_like(tensor_dict[gt_kpt_fld]))
if self._expand_hierarchy_labels:
input_fields = fields.InputDataFields
image_classes, image_confidences = self._expand_image_label_hierarchy(
tensor_dict[input_fields.groundtruth_image_classes],
tensor_dict[input_fields.groundtruth_image_confidences])
tensor_dict[input_fields.groundtruth_image_classes] = image_classes
tensor_dict[input_fields.groundtruth_image_confidences] = (
image_confidences)
box_fields = [
fields.InputDataFields.groundtruth_group_of,
fields.InputDataFields.groundtruth_is_crowd,
fields.InputDataFields.groundtruth_difficult,
fields.InputDataFields.groundtruth_area,
fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_weights,
]
def expand_field(field_name):
return self._expansion_box_field_labels(
tensor_dict[input_fields.groundtruth_classes],
tensor_dict[field_name])
# pylint: disable=cell-var-from-loop
for field in box_fields:
if field in tensor_dict:
tensor_dict[field] = tf.cond(
tf.size(tensor_dict[field]) > 0, lambda: expand_field(field),
lambda: tensor_dict[field])
# pylint: enable=cell-var-from-loop
tensor_dict[input_fields.groundtruth_classes] = (
self._expansion_box_field_labels(
tensor_dict[input_fields.groundtruth_classes],
tensor_dict[input_fields.groundtruth_classes], True))
if fields.InputDataFields.groundtruth_group_of in tensor_dict:
group_of = fields.InputDataFields.groundtruth_group_of
tensor_dict[group_of] = tf.cast(tensor_dict[group_of], dtype=tf.bool)
return tensor_dict
def _reshape_keypoints(self, keys_to_tensors):
......@@ -633,3 +696,69 @@ class TfExampleDecoder(data_decoder.DataDecoder):
tf.greater(tf.size(png_masks), 0),
lambda: tf.map_fn(decode_png_mask, png_masks, dtype=tf.float32),
lambda: tf.zeros(tf.cast(tf.stack([0, height, width]), dtype=tf.int32)))
def _expand_image_label_hierarchy(self, image_classes, image_confidences):
"""Expand image level labels according to the hierarchy.
Args:
image_classes: Int64 tensor with the image level class ids for a sample.
image_confidences: Float tensor signaling whether a class id is present in
the image (1.0) or not present (0.0).
Returns:
new_image_classes: Int64 tensor equal to expanding image_classes.
new_image_confidences: Float tensor equal to expanding image_confidences.
"""
def expand_labels(relation_tensor, confidence_value):
"""Expand to ancestors or descendants depending on arguments."""
mask = tf.equal(image_confidences, confidence_value)
target_image_classes = tf.boolean_mask(image_classes, mask)
expanded_indices = tf.reduce_any((tf.gather(
relation_tensor, target_image_classes - _LABEL_OFFSET, axis=0) > 0),
axis=0)
expanded_indices = tf.where(expanded_indices)[:, 0] + _LABEL_OFFSET
new_groundtruth_image_classes = (
tf.concat([
tf.boolean_mask(image_classes, tf.logical_not(mask)),
expanded_indices,
],
axis=0))
new_groundtruth_image_confidences = (
tf.concat([
tf.boolean_mask(image_confidences, tf.logical_not(mask)),
tf.ones([tf.shape(expanded_indices)[0]],
dtype=image_confidences.dtype) * confidence_value,
],
axis=0))
return new_groundtruth_image_classes, new_groundtruth_image_confidences
image_classes, image_confidences = expand_labels(self._ancestors_lut, 1.0)
new_image_classes, new_image_confidences = expand_labels(
self._descendants_lut, 0.0)
return new_image_classes, new_image_confidences
def _expansion_box_field_labels(self,
object_classes,
object_field,
copy_class_id=False):
"""Expand the labels of a specific object field according to the hierarchy.
Args:
object_classes: Int64 tensor with the class id for each element in
object_field.
object_field: Tensor to be expanded.
copy_class_id: Boolean to choose whether to use class id values in the
output tensor instead of replicating the original values.
Returns:
A tensor with the result of expanding object_field.
"""
expanded_indices = tf.gather(
self._ancestors_lut, object_classes - _LABEL_OFFSET, axis=0)
if copy_class_id:
new_object_field = tf.where(expanded_indices > 0)[:, 1] + _LABEL_OFFSET
else:
new_object_field = tf.repeat(
object_field, tf.reduce_sum(expanded_indices, axis=1), axis=0)
return new_object_field
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Sequence example decoder for object detection."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from six.moves import zip
import tensorflow.compat.v1 as tf
from tf_slim import tfexample_decoder as slim_example_decoder
from object_detection.core import data_decoder
from object_detection.core import standard_fields as fields
from object_detection.utils import label_map_util
# pylint: disable=g-import-not-at-top
try:
from tensorflow.contrib import lookup as contrib_lookup
except ImportError:
# TF 2.0 doesn't ship with contrib.
pass
# pylint: enable=g-import-not-at-top
class _ClassTensorHandler(slim_example_decoder.Tensor):
"""An ItemHandler to fetch class ids from class text."""
def __init__(self,
tensor_key,
label_map_proto_file,
shape_keys=None,
shape=None,
default_value=''):
"""Initializes the LookupTensor handler.
Simply calls a vocabulary (most often, a label mapping) lookup.
Args:
tensor_key: the name of the `TFExample` feature to read the tensor from.
label_map_proto_file: File path to a text format LabelMapProto message
mapping class text to id.
shape_keys: Optional name or list of names of the TF-Example feature in
which the tensor shape is stored. If a list, then each corresponds to
one dimension of the shape.
shape: Optional output shape of the `Tensor`. If provided, the `Tensor` is
reshaped accordingly.
default_value: The value used when the `tensor_key` is not found in a
particular `TFExample`.
Raises:
ValueError: if both `shape_keys` and `shape` are specified.
"""
name_to_id = label_map_util.get_label_map_dict(
label_map_proto_file, use_display_name=False)
# We use a default_value of -1, but we expect all labels to be contained
# in the label map.
try:
# Dynamically try to load the tf v2 lookup, falling back to contrib
lookup = tf.compat.v2.lookup
hash_table_class = tf.compat.v2.lookup.StaticHashTable
except AttributeError:
lookup = contrib_lookup
hash_table_class = contrib_lookup.HashTable
name_to_id_table = hash_table_class(
initializer=lookup.KeyValueTensorInitializer(
keys=tf.constant(list(name_to_id.keys())),
values=tf.constant(list(name_to_id.values()), dtype=tf.int64)),
default_value=-1)
self._name_to_id_table = name_to_id_table
super(_ClassTensorHandler, self).__init__(tensor_key, shape_keys, shape,
default_value)
def tensors_to_item(self, keys_to_tensors):
unmapped_tensor = super(_ClassTensorHandler,
self).tensors_to_item(keys_to_tensors)
return self._name_to_id_table.lookup(unmapped_tensor)
class TfSequenceExampleDecoder(data_decoder.DataDecoder):
"""Tensorflow Sequence Example proto decoder for Object Detection.
Sequence examples contain sequences of images which share common
features. The structure of TfSequenceExamples can be seen in
dataset_tools/seq_example_util.py
For the TFODAPI, the following fields are required:
Shared features:
'image/format'
'image/height'
'image/width'
Features with an entry for each image, where bounding box features can
be empty lists if the image does not contain any objects:
'image/encoded'
'image/source_id'
'region/bbox/xmin'
'region/bbox/xmax'
'region/bbox/ymin'
'region/bbox/ymax'
'region/label/string'
Optionally, the sequence example can include context_features for use in
Context R-CNN (see https://arxiv.org/abs/1912.03538):
'image/context_features'
'image/context_feature_length'
"""
def __init__(self,
label_map_proto_file,
load_context_features=False,
use_display_name=False,
fully_annotated=False):
"""Constructs `TfSequenceExampleDecoder` object.
Args:
label_map_proto_file: a file path to a
object_detection.protos.StringIntLabelMap proto. The
label map will be used to map IDs of 'region/label/string'.
It is assumed that 'region/label/string' will be in the data.
load_context_features: Whether to load information from context_features,
to provide additional context to a detection model for training and/or
inference
use_display_name: whether or not to use the `display_name` for label
mapping (instead of `name`). Only used if label_map_proto_file is
provided.
fully_annotated: If True, will assume that every frame (whether it has
boxes or not), has been fully annotated. If False, a
'region/is_annotated' field must be provided in the dataset which
indicates which frames have annotations. Default False.
"""
# Specifies how the tf.SequenceExamples are decoded.
self._context_keys_to_features = {
'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'),
'image/height': tf.FixedLenFeature((), tf.int64),
'image/width': tf.FixedLenFeature((), tf.int64),
}
self._sequence_keys_to_feature_lists = {
'image/encoded': tf.FixedLenSequenceFeature([], dtype=tf.string),
'image/source_id': tf.FixedLenSequenceFeature([], dtype=tf.string),
'region/bbox/xmin': tf.VarLenFeature(dtype=tf.float32),
'region/bbox/xmax': tf.VarLenFeature(dtype=tf.float32),
'region/bbox/ymin': tf.VarLenFeature(dtype=tf.float32),
'region/bbox/ymax': tf.VarLenFeature(dtype=tf.float32),
'region/label/string': tf.VarLenFeature(dtype=tf.string),
'region/label/confidence': tf.VarLenFeature(dtype=tf.float32),
}
self._items_to_handlers = {
# Context.
fields.InputDataFields.image_height:
slim_example_decoder.Tensor('image/height'),
fields.InputDataFields.image_width:
slim_example_decoder.Tensor('image/width'),
# Sequence.
fields.InputDataFields.num_groundtruth_boxes:
slim_example_decoder.NumBoxesSequence('region/bbox/xmin'),
fields.InputDataFields.groundtruth_boxes:
slim_example_decoder.BoundingBoxSequence(
prefix='region/bbox/', default_value=0.0),
fields.InputDataFields.groundtruth_weights:
slim_example_decoder.Tensor('region/label/confidence'),
}
# If the dataset is sparsely annotated, parse sequence features which
# indicate which frames have been labeled.
if not fully_annotated:
self._sequence_keys_to_feature_lists['region/is_annotated'] = (
tf.FixedLenSequenceFeature([], dtype=tf.int64))
self._items_to_handlers[fields.InputDataFields.is_annotated] = (
slim_example_decoder.Tensor('region/is_annotated'))
self._items_to_handlers[fields.InputDataFields.image] = (
slim_example_decoder.Tensor('image/encoded'))
self._items_to_handlers[fields.InputDataFields.source_id] = (
slim_example_decoder.Tensor('image/source_id'))
label_handler = _ClassTensorHandler(
'region/label/string', label_map_proto_file, default_value='')
self._items_to_handlers[
fields.InputDataFields.groundtruth_classes] = label_handler
if load_context_features:
self._context_keys_to_features['image/context_features'] = (
tf.VarLenFeature(dtype=tf.float32))
self._items_to_handlers[fields.InputDataFields.context_features] = (
slim_example_decoder.ItemHandlerCallback(
['image/context_features', 'image/context_feature_length'],
self._reshape_context_features))
self._context_keys_to_features['image/context_feature_length'] = (
tf.FixedLenFeature((), tf.int64))
self._items_to_handlers[fields.InputDataFields.context_feature_length] = (
slim_example_decoder.Tensor('image/context_feature_length'))
self._fully_annotated = fully_annotated
def decode(self, tf_seq_example_string_tensor):
"""Decodes serialized `tf.SequenceExample`s and returns a tensor dictionary.
Args:
tf_seq_example_string_tensor: a string tensor holding a serialized
`tf.SequenceExample`.
Returns:
A list of dictionaries with (at least) the following tensors:
fields.InputDataFields.source_id: a [num_frames] string tensor with a
unique ID for each frame.
fields.InputDataFields.num_groundtruth_boxes: a [num_frames] int32 tensor
specifying the number of boxes in each frame.
fields.InputDataFields.groundtruth_boxes: a [num_frames, num_boxes, 4]
float32 tensor with bounding boxes for each frame. Note that num_boxes
is the maximum boxes seen in any individual frame. Any frames with fewer
boxes are padded with 0.0.
fields.InputDataFields.groundtruth_classes: a [num_frames, num_boxes]
int32 tensor with class indices for each box in each frame.
fields.InputDataFields.groundtruth_weights: a [num_frames, num_boxes]
float32 tensor with weights of the groundtruth boxes.
fields.InputDataFields.is_annotated: a [num_frames] bool tensor specifying
whether the image was annotated or not. If False, the corresponding
entries in the groundtruth tensor will be ignored.
fields.InputDataFields.context_features - 1D float32 tensor of shape
[context_feature_length * num_context_features]
fields.InputDataFields.context_feature_length - int32 tensor specifying
the length of each feature in context_features
fields.InputDataFields.image: a [num_frames] string tensor with
the encoded images.
"""
serialized_example = tf.reshape(tf_seq_example_string_tensor, shape=[])
decoder = slim_example_decoder.TFSequenceExampleDecoder(
self._context_keys_to_features, self._sequence_keys_to_feature_lists,
self._items_to_handlers)
keys = decoder.list_items()
tensors = decoder.decode(serialized_example, items=keys)
tensor_dict = dict(list(zip(keys, tensors)))
tensor_dict[fields.InputDataFields.groundtruth_boxes].set_shape(
[None, None, 4])
tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = tf.cast(
tensor_dict[fields.InputDataFields.num_groundtruth_boxes],
dtype=tf.int32)
tensor_dict[fields.InputDataFields.groundtruth_classes] = tf.cast(
tensor_dict[fields.InputDataFields.groundtruth_classes], dtype=tf.int32)
tensor_dict[fields.InputDataFields.original_image_spatial_shape] = tf.cast(
tf.stack([
tensor_dict[fields.InputDataFields.image_height],
tensor_dict[fields.InputDataFields.image_width]
]),
dtype=tf.int32)
tensor_dict.pop(fields.InputDataFields.image_height)
tensor_dict.pop(fields.InputDataFields.image_width)
def default_groundtruth_weights():
"""Produces weights of 1.0 for each valid box, and 0.0 otherwise."""
num_boxes_per_frame = tensor_dict[
fields.InputDataFields.num_groundtruth_boxes]
max_num_boxes = tf.reduce_max(num_boxes_per_frame)
num_boxes_per_frame_tiled = tf.tile(
tf.expand_dims(num_boxes_per_frame, axis=-1),
multiples=tf.stack([1, max_num_boxes]))
range_tiled = tf.tile(
tf.expand_dims(tf.range(max_num_boxes), axis=0),
multiples=tf.stack([tf.shape(num_boxes_per_frame)[0], 1]))
return tf.cast(
tf.greater(num_boxes_per_frame_tiled, range_tiled), tf.float32)
tensor_dict[fields.InputDataFields.groundtruth_weights] = tf.cond(
tf.greater(
tf.size(tensor_dict[fields.InputDataFields.groundtruth_weights]),
0), lambda: tensor_dict[fields.InputDataFields.groundtruth_weights],
default_groundtruth_weights)
if self._fully_annotated:
tensor_dict[fields.InputDataFields.is_annotated] = tf.ones_like(
tensor_dict[fields.InputDataFields.num_groundtruth_boxes],
dtype=tf.bool)
else:
tensor_dict[fields.InputDataFields.is_annotated] = tf.cast(
tensor_dict[fields.InputDataFields.is_annotated], dtype=tf.bool)
return tensor_dict
def _reshape_context_features(self, keys_to_tensors):
"""Reshape context features.
The instance context_features are reshaped to
[num_context_features, context_feature_length]
Args:
keys_to_tensors: a dictionary from keys to tensors.
Returns:
A 2-D float tensor of shape [num_context_features, context_feature_length]
"""
context_feature_length = keys_to_tensors['image/context_feature_length']
to_shape = tf.cast(tf.stack([-1, context_feature_length]), tf.int32)
context_features = keys_to_tensors['image/context_features']
if isinstance(context_features, tf.SparseTensor):
context_features = tf.sparse_tensor_to_dense(context_features)
context_features = tf.reshape(context_features, to_shape)
return context_features
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment