Unverified Commit 451906e4 authored by pkulzc's avatar pkulzc Committed by GitHub
Browse files

Release MobileDet code and model, and require tf_slim installation for OD API. (#8562)

* Merged commit includes the following changes:
311933687  by Sergio Guadarrama:

    Removes spurios use of tf.compat.v2, which results in spurious tf.compat.v1.compat.v2. Adds basic test to nasnet_utils.
    Replaces all remaining import tensorflow as tf with import tensorflow.compat.v1 as tf

--
311766063  by Sergio Guadarrama:

    Removes explicit tf.compat.v1 in all call sites (we already import tf.compat.v1, so this code was  doing tf.compat.v1.compat.v1). The existing code worked in latest version of tensorflow, 2.2, (and 1.15) but not in 1.14 or in 2.0.0a, this CL fixes it.

--
311624958  by Sergio Guadarrama:

    Updates README that doesn't render properly in github documentation

--
310980959  by Sergio Guadarrama:

    Moves research_models/slim off tf.contrib.slim/layers/framework to tf_slim

--
310263156  by Sergio Guadarrama:

    Adds model breakdown for MobilenetV3

--
308640...
parent 73b5be67
......@@ -15,7 +15,7 @@
"""Tests for object_detection.core.matcher."""
import numpy as np
import tensorflow as tf
import tensorflow.compat.v1 as tf
from object_detection.core import matcher
from object_detection.utils import test_case
......
......@@ -36,7 +36,7 @@ from abc import ABCMeta
from abc import abstractmethod
import six
import tensorflow as tf
import tensorflow.compat.v1 as tf
from object_detection.utils import ops
......
......@@ -16,7 +16,7 @@
"""Tests for google3.research.vale.object_detection.minibatch_sampler."""
import numpy as np
import tensorflow as tf
import tensorflow.compat.v1 as tf
from object_detection.core import minibatch_sampler
from object_detection.utils import test_case
......
......@@ -60,7 +60,7 @@ from __future__ import print_function
import abc
import six
import tensorflow as tf
import tensorflow.compat.v1 as tf
from object_detection.core import standard_fields as fields
......@@ -68,7 +68,7 @@ from object_detection.core import standard_fields as fields
# If using a new enough version of TensorFlow, detection models should be a
# tf module or keras model for tracking.
try:
_BaseClass = tf.Module
_BaseClass = tf.keras.layers.Layer
except AttributeError:
_BaseClass = object
......@@ -90,6 +90,8 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
self._num_classes = num_classes
self._groundtruth_lists = {}
super(DetectionModel, self).__init__()
@property
def num_classes(self):
return self._num_classes
......@@ -295,6 +297,7 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
groundtruth_weights_list=None,
groundtruth_confidences_list=None,
groundtruth_is_crowd_list=None,
groundtruth_group_of_list=None,
groundtruth_area_list=None,
is_annotated_list=None,
groundtruth_labeled_classes=None):
......@@ -328,14 +331,16 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
boxes.
groundtruth_is_crowd_list: A list of 1-D tf.bool tensors of shape
[num_boxes] containing is_crowd annotations.
groundtruth_group_of_list: A list of 1-D tf.bool tensors of shape
[num_boxes] containing group_of annotations.
groundtruth_area_list: A list of 1-D tf.float32 tensors of shape
[num_boxes] containing the area (in the original absolute coordinates)
of the annotations.
is_annotated_list: A list of scalar tf.bool tensors indicating whether
images have been labeled or not.
groundtruth_labeled_classes: A list of 1-D tf.float32 tensors of shape
[num_classes], containing label indices (1-indexed) of the classes that
are exhaustively annotated.
[num_classes], containing label indices encoded as k-hot of the classes
that are exhaustively annotated.
"""
self._groundtruth_lists[fields.BoxListFields.boxes] = groundtruth_boxes_list
self._groundtruth_lists[
......@@ -359,6 +364,9 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
if groundtruth_is_crowd_list:
self._groundtruth_lists[
fields.BoxListFields.is_crowd] = groundtruth_is_crowd_list
if groundtruth_group_of_list:
self._groundtruth_lists[
fields.BoxListFields.group_of] = groundtruth_group_of_list
if groundtruth_area_list:
self._groundtruth_lists[
fields.InputDataFields.groundtruth_area] = groundtruth_area_list
......@@ -418,3 +426,20 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
A list of update operators.
"""
pass
def call(self, images):
"""Returns detections from a batch of images.
This method calls the preprocess, predict and postprocess function
sequentially and returns the output.
Args:
images: a [batch_size, height, width, channels] float tensor.
Returns:
detetcions: The dict of tensors returned by the postprocess function.
"""
preprocessed_images, shapes = self.preprocess(images)
prediction_dict = self.predict(preprocessed_images, shapes)
return self.postprocess(prediction_dict, shapes)
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for model API."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow.compat.v1 as tf
from object_detection.core import model
from object_detection.utils import test_case
class FakeModel(model.DetectionModel):
def __init__(self):
# sub-networks containing weights of different shapes.
self._network1 = tf.keras.Sequential([
tf.keras.layers.Conv2D(8, 1)
])
self._network2 = tf.keras.Sequential([
tf.keras.layers.Conv2D(16, 1)
])
super(FakeModel, self).__init__(num_classes=0)
def preprocess(self, images):
return images, tf.shape(images)
def predict(self, images, shapes):
return {'prediction': self._network2(self._network1(images))}
def postprocess(self, prediction_dict, shapes):
return prediction_dict
def loss(self):
return tf.constant(0.0)
def updates(self):
return []
def restore_map(self):
return {}
def regularization_losses(self):
return []
class ModelTest(test_case.TestCase):
def test_model_call(self):
detection_model = FakeModel()
def graph_fn():
return detection_model(tf.zeros((1, 128, 128, 3)))
result = self.execute(graph_fn, [])
self.assertEqual(result['prediction'].shape,
(1, 128, 128, 16))
def test_freeze(self):
detection_model = FakeModel()
detection_model(tf.zeros((1, 128, 128, 3)))
net1_var_shapes = [tuple(var.get_shape().as_list()) for var in
detection_model._network1.trainable_variables]
del detection_model
detection_model = FakeModel()
detection_model._network2.trainable = False
detection_model(tf.zeros((1, 128, 128, 3)))
var_shapes = [tuple(var.get_shape().as_list()) for var in
detection_model._network1.trainable_variables]
self.assertEqual(set(net1_var_shapes), set(var_shapes))
if __name__ == '__main__':
tf.test.main()
......@@ -15,7 +15,7 @@
"""Tests for tensorflow_models.object_detection.core.post_processing."""
import numpy as np
import tensorflow as tf
import tensorflow.compat.v1 as tf
from object_detection.core import post_processing
from object_detection.core import standard_fields as fields
from object_detection.utils import test_case
......
......@@ -22,7 +22,7 @@ import collections
import numpy as np
from six.moves import range
from six.moves import zip
import tensorflow as tf
import tensorflow.compat.v1 as tf
from object_detection.core import box_list
from object_detection.core import box_list_ops
......@@ -396,6 +396,7 @@ def multiclass_non_max_suppression(boxes,
use_partitioned_nms=False,
additional_fields=None,
soft_nms_sigma=0.0,
use_hard_nms=False,
scope=None):
"""Multi-class version of non maximum suppression.
......@@ -450,6 +451,7 @@ def multiclass_non_max_suppression(boxes,
`soft_nms_sigma=0.0` (which is default), we fall back to standard (hard)
NMS. Soft NMS is currently only supported when pad_to_max_output_size is
False.
use_hard_nms: Enforce the usage of hard NMS.
scope: name scope.
Returns:
......@@ -536,7 +538,7 @@ def multiclass_non_max_suppression(boxes,
max_selection_size = tf.minimum(max_size_per_class,
boxlist_and_class_scores.num_boxes())
if (hasattr(tf.image, 'non_max_suppression_with_scores') and
tf.compat.forward_compatible(2019, 6, 6)):
tf.compat.forward_compatible(2019, 6, 6) and not use_hard_nms):
(selected_indices, selected_scores
) = tf.image.non_max_suppression_with_scores(
boxlist_and_class_scores.get(),
......@@ -852,7 +854,8 @@ def batch_multiclass_non_max_suppression(boxes,
use_class_agnostic_nms=False,
max_classes_per_detection=1,
use_dynamic_map_fn=False,
use_combined_nms=False):
use_combined_nms=False,
use_hard_nms=False):
"""Multi-class version of non maximum suppression that operates on a batch.
This op is similar to `multiclass_non_max_suppression` but operates on a batch
......@@ -923,6 +926,7 @@ def batch_multiclass_non_max_suppression(boxes,
calling this function.
Masks and additional fields are not supported.
See argument checks in the code below for unsupported arguments.
use_hard_nms: Enforce the usage of hard NMS.
Returns:
'nmsed_boxes': A [batch_size, max_detections, 4] float32 tensor
......@@ -966,12 +970,10 @@ def batch_multiclass_non_max_suppression(boxes,
'clip_window is not supported by combined_nms unless it is'
' [0. 0. 1. 1.] for each image.')
if additional_fields is not None:
tf.logging.warning(
'additional_fields is not supported by combined_nms.')
tf.logging.warning('additional_fields is not supported by combined_nms.')
if parallel_iterations != 32:
tf.logging.warning(
'Number of batch items to be processed in parallel is'
' not configurable by combined_nms.')
tf.logging.warning('Number of batch items to be processed in parallel is'
' not configurable by combined_nms.')
if max_classes_per_detection > 1:
tf.logging.warning(
'max_classes_per_detection is not configurable by combined_nms.')
......@@ -1009,7 +1011,7 @@ def batch_multiclass_non_max_suppression(boxes,
# in _single_image_nms_fn(). The dictionary is thus a sorted version of
# additional_fields.
if additional_fields is None:
ordered_additional_fields = {}
ordered_additional_fields = collections.OrderedDict()
else:
ordered_additional_fields = collections.OrderedDict(
sorted(additional_fields.items(), key=lambda item: item[0]))
......@@ -1159,7 +1161,8 @@ def batch_multiclass_non_max_suppression(boxes,
pad_to_max_output_size=use_static_shapes,
use_partitioned_nms=use_partitioned_nms,
additional_fields=per_image_additional_fields,
soft_nms_sigma=soft_nms_sigma)
soft_nms_sigma=soft_nms_sigma,
use_hard_nms=use_hard_nms)
if not use_static_shapes:
nmsed_boxlist = box_list_ops.pad_or_clip_box_list(
......
......@@ -14,7 +14,7 @@
# ==============================================================================
"""Provides functions to prefetch tensors to feed into models."""
import tensorflow as tf
import tensorflow.compat.v1 as tf
from object_detection.utils import tf_version
if not tf_version.is_tf1():
......
......@@ -19,12 +19,12 @@ from __future__ import division
from __future__ import print_function
from six.moves import range
import tensorflow as tf
import tensorflow.compat.v1 as tf
# pylint: disable=g-bad-import-order,
from object_detection.core import prefetcher
from tensorflow.contrib import slim as contrib_slim
slim = contrib_slim
import tf_slim as slim
# pylint: disable=g-bad-import-order
......
......@@ -74,7 +74,7 @@ import sys
import six
from six.moves import range
from six.moves import zip
import tensorflow as tf
import tensorflow.compat.v1 as tf
from tensorflow.python.ops import control_flow_ops
from object_detection.core import box_list
......@@ -1603,6 +1603,7 @@ def random_crop_image(image,
def random_pad_image(image,
boxes,
masks=None,
keypoints=None,
min_image_size=None,
max_image_size=None,
......@@ -1625,6 +1626,9 @@ def random_pad_image(image,
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
masks: (optional) rank 3 float32 tensor with shape
[N, height, width] containing instance masks. The masks
are of the same height, width as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape
[N, num_keypoints, 2]. The keypoints are in y-x normalized
coordinates.
......@@ -1648,6 +1652,8 @@ def random_pad_image(image,
boxes: boxes which is the same rank as input boxes. Boxes are in normalized
form.
if masks is not None, the function also returns:
masks: rank 3 float32 tensor with shape [N, new_height, new_width]
if keypoints is not None, the function also returns:
keypoints: rank 3 float32 tensor with shape [N, num_keypoints, 2]
"""
......@@ -1728,6 +1734,15 @@ def random_pad_image(image,
result = [new_image, new_boxes]
if masks is not None:
new_masks = tf.image.pad_to_bounding_box(
masks[:, :, :, tf.newaxis],
offset_height=offset_height,
offset_width=offset_width,
target_height=target_height,
target_width=target_width)[:, :, :, 0]
result.append(new_masks)
if keypoints is not None:
new_keypoints = keypoint_ops.change_coordinate_frame(keypoints, new_window)
result.append(new_keypoints)
......@@ -1737,6 +1752,7 @@ def random_pad_image(image,
def random_absolute_pad_image(image,
boxes,
masks=None,
keypoints=None,
max_height_padding=None,
max_width_padding=None,
......@@ -1756,6 +1772,9 @@ def random_absolute_pad_image(image,
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
masks: (optional) rank 3 float32 tensor with shape
[N, height, width] containing instance masks. The masks
are of the same height, width as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape
[N, num_keypoints, 2]. The keypoints are in y-x normalized
coordinates.
......@@ -1778,6 +1797,10 @@ def random_absolute_pad_image(image,
image: Image shape will be [new_height, new_width, channels].
boxes: boxes which is the same rank as input boxes. Boxes are in normalized
form.
if masks is not None, the function also returns:
masks: rank 3 float32 tensor with shape [N, new_height, new_width]
if keypoints is not None, the function also returns:
keypoints: rank 3 float32 tensor with shape [N, num_keypoints, 2]
"""
min_image_size = tf.shape(image)[:2]
max_image_size = min_image_size + tf.cast(
......@@ -1785,6 +1808,7 @@ def random_absolute_pad_image(image,
return random_pad_image(
image,
boxes,
masks=masks,
keypoints=keypoints,
min_image_size=min_image_size,
max_image_size=max_image_size,
......@@ -4060,10 +4084,12 @@ def get_default_func_arg_map(include_label_weights=True,
groundtruth_keypoint_visibilities),
random_pad_image:
(fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes, groundtruth_keypoints),
fields.InputDataFields.groundtruth_boxes, groundtruth_instance_masks,
groundtruth_keypoints),
random_absolute_pad_image:
(fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes, groundtruth_keypoints),
fields.InputDataFields.groundtruth_boxes, groundtruth_instance_masks,
groundtruth_keypoints),
random_crop_pad_image: (fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_classes,
......
......@@ -24,7 +24,7 @@ import numpy as np
import six
from six.moves import range
from six.moves import zip
import tensorflow as tf
import tensorflow.compat.v1 as tf
from object_detection.core import preprocessor
from object_detection.core import preprocessor_cache
......@@ -2268,7 +2268,7 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
self.assertTrue(np.all((boxes_[:, 3] - boxes_[:, 1]) >= (
padded_boxes_[:, 3] - padded_boxes_[:, 1])))
def testRandomPadImageWithKeypoints(self):
def testRandomPadImageWithKeypointsAndMasks(self):
def graph_fn():
preprocessing_options = [(preprocessor.normalize_image, {
'original_minval': 0,
......@@ -2280,45 +2280,57 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
images = self.createTestImages()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
masks = self.createTestMasks()
keypoints, _ = self.createTestKeypoints()
tensor_dict = {
fields.InputDataFields.image: images,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels,
fields.InputDataFields.groundtruth_instance_masks: masks,
fields.InputDataFields.groundtruth_keypoints: keypoints,
}
tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
images = tensor_dict[fields.InputDataFields.image]
preprocessing_options = [(preprocessor.random_pad_image, {})]
func_arg_map = preprocessor.get_default_func_arg_map(
include_instance_masks=True,
include_keypoints=True,
include_keypoint_visibilities=True)
padded_tensor_dict = preprocessor.preprocess(tensor_dict,
preprocessing_options)
preprocessing_options,
func_arg_map=func_arg_map)
padded_images = padded_tensor_dict[fields.InputDataFields.image]
padded_boxes = padded_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
padded_masks = padded_tensor_dict[
fields.InputDataFields.groundtruth_instance_masks]
padded_keypoints = padded_tensor_dict[
fields.InputDataFields.groundtruth_keypoints]
boxes_shape = tf.shape(boxes)
padded_boxes_shape = tf.shape(padded_boxes)
padded_masks_shape = tf.shape(padded_masks)
keypoints_shape = tf.shape(keypoints)
padded_keypoints_shape = tf.shape(padded_keypoints)
images_shape = tf.shape(images)
padded_images_shape = tf.shape(padded_images)
return [boxes_shape, padded_boxes_shape, keypoints_shape,
padded_keypoints_shape, images_shape, padded_images_shape, boxes,
padded_boxes, keypoints, padded_keypoints]
(boxes_shape_, padded_boxes_shape_, keypoints_shape_,
padded_keypoints_shape_, images_shape_, padded_images_shape_, boxes_,
padded_boxes_, keypoints_, padded_keypoints_) = self.execute_cpu(graph_fn,
[])
return [boxes_shape, padded_boxes_shape, padded_masks_shape,
keypoints_shape, padded_keypoints_shape, images_shape,
padded_images_shape, boxes, padded_boxes, keypoints,
padded_keypoints]
(boxes_shape_, padded_boxes_shape_, padded_masks_shape_,
keypoints_shape_, padded_keypoints_shape_, images_shape_,
padded_images_shape_, boxes_, padded_boxes_,
keypoints_, padded_keypoints_) = self.execute_cpu(graph_fn, [])
self.assertAllEqual(boxes_shape_, padded_boxes_shape_)
self.assertAllEqual(keypoints_shape_, padded_keypoints_shape_)
self.assertTrue((images_shape_[1] >= padded_images_shape_[1] * 0.5).all)
self.assertTrue((images_shape_[2] >= padded_images_shape_[2] * 0.5).all)
self.assertTrue((images_shape_[1] <= padded_images_shape_[1]).all)
self.assertTrue((images_shape_[2] <= padded_images_shape_[2]).all)
self.assertAllEqual(padded_masks_shape_[1:3], padded_images_shape_[1:3])
self.assertTrue(np.all((boxes_[:, 2] - boxes_[:, 0]) >= (
padded_boxes_[:, 2] - padded_boxes_[:, 0])))
self.assertTrue(np.all((boxes_[:, 3] - boxes_[:, 1]) >= (
......
......@@ -26,7 +26,7 @@ from abc import ABCMeta
from abc import abstractmethod
import six
import tensorflow as tf
import tensorflow.compat.v1 as tf
from object_detection.core import box_list_ops
from object_detection.core import standard_fields as fields
......
......@@ -14,7 +14,7 @@
# ==============================================================================
"""Tests for region_similarity_calculator."""
import tensorflow as tf
import tensorflow.compat.v1 as tf
from object_detection.core import box_list
from object_detection.core import region_similarity_calculator
......
......@@ -76,6 +76,9 @@ class InputDataFields(object):
context_features, used for reshaping.
valid_context_size: the valid context size, used in filtering the padded
context features.
image_format: format for the images, used to decode
image_height: height of images, used to decode
image_width: width of images, used to decode
"""
image = 'image'
image_additional_channels = 'image_additional_channels'
......@@ -112,6 +115,10 @@ class InputDataFields(object):
context_features = 'context_features'
context_feature_length = 'context_feature_length'
valid_context_size = 'valid_context_size'
image_timestamps = 'image_timestamps'
image_format = 'image_format'
image_height = 'image_height'
image_width = 'image_width'
class DetectionResultFields(object):
......@@ -182,6 +189,7 @@ class BoxListFields(object):
keypoint_visibilities = 'keypoint_visibilities'
keypoint_heatmaps = 'keypoint_heatmaps'
is_crowd = 'is_crowd'
group_of = 'group_of'
class PredictionFields(object):
......@@ -279,3 +287,14 @@ class TfExampleFields(object):
detection_bbox_ymax = 'image/detection/bbox/ymax'
detection_bbox_xmax = 'image/detection/bbox/xmax'
detection_score = 'image/detection/score'
# Sequence fields for SequenceExample inputs.
# All others are considered context fields.
SEQUENCE_FIELDS = [InputDataFields.image,
InputDataFields.source_id,
InputDataFields.groundtruth_boxes,
InputDataFields.num_groundtruth_boxes,
InputDataFields.groundtruth_classes,
InputDataFields.groundtruth_weights,
InputDataFields.source_id,
InputDataFields.is_annotated]
......@@ -37,7 +37,8 @@ from __future__ import print_function
from six.moves import range
from six.moves import zip
import tensorflow as tf
import tensorflow.compat.v1 as tf
import tensorflow.compat.v2 as tf2
from object_detection.box_coders import faster_rcnn_box_coder
from object_detection.box_coders import mean_stddev_box_coder
......@@ -54,6 +55,8 @@ from object_detection.utils import shape_utils
from object_detection.utils import target_assigner_utils as ta_utils
ResizeMethod = tf2.image.ResizeMethod
_DEFAULT_KEYPOINT_OFFSET_STD_DEV = 1.0
......
......@@ -15,7 +15,7 @@
"""Tests for object_detection.core.target_assigner."""
import numpy as np
import tensorflow as tf
import tensorflow.compat.v1 as tf
from object_detection.box_coders import keypoint_box_coder
from object_detection.box_coders import mean_stddev_box_coder
......@@ -1230,6 +1230,6 @@ class CreateTargetAssignerTest(tf.test.TestCase):
if __name__ == '__main__':
tf.enable_v2_behavior()
tf.test.main()
item {
id: 1
name: 'human'
}
item {
id: 2
name: 'gazelleGrants'
}
item {
id: 3
name: 'reedbuck'
}
item {
id: 4
name: 'dikDik'
}
item {
id: 5
name: 'zebra'
}
item {
id: 6
name: 'porcupine'
}
item {
id: 7
name: 'gazelleThomsons'
}
item {
id: 8
name: 'hyenaSpotted'
}
item {
id: 9
name: 'warthog'
}
item {
id: 10
name: 'impala'
}
item {
id: 11
name: 'elephant'
}
item {
id: 12
name: 'giraffe'
}
item {
id: 13
name: 'mongoose'
}
item {
id: 14
name: 'buffalo'
}
item {
id: 15
name: 'hartebeest'
}
item {
id: 16
name: 'guineaFowl'
}
item {
id: 17
name: 'wildebeest'
}
item {
id: 18
name: 'leopard'
}
item {
id: 19
name: 'ostrich'
}
item {
id: 20
name: 'lionFemale'
}
item {
id: 21
name: 'koriBustard'
}
item {
id: 22
name: 'otherBird'
}
item {
id: 23
name: 'batEaredFox'
}
item {
id: 24
name: 'bushbuck'
}
item {
id: 25
name: 'jackal'
}
item {
id: 26
name: 'cheetah'
}
item {
id: 27
name: 'eland'
}
item {
id: 28
name: 'aardwolf'
}
item {
id: 29
name: 'hippopotamus'
}
item {
id: 30
name: 'hyenaStriped'
}
item {
id: 31
name: 'aardvark'
}
item {
id: 32
name: 'hare'
}
item {
id: 33
name: 'baboon'
}
item {
id: 34
name: 'vervetMonkey'
}
item {
id: 35
name: 'waterbuck'
}
item {
id: 36
name: 'secretaryBird'
}
item {
id: 37
name: 'serval'
}
item {
id: 38
name: 'lionMale'
}
item {
id: 39
name: 'topi'
}
item {
id: 40
name: 'honeyBadger'
}
item {
id: 41
name: 'rodents'
}
item {
id: 42
name: 'wildcat'
}
item {
id: 43
name: 'civet'
}
item {
id: 44
name: 'genet'
}
item {
id: 45
name: 'caracal'
}
item {
id: 46
name: 'rhinoceros'
}
item {
id: 47
name: 'reptiles'
}
item {
id: 48
name: 'zorilla'
}
......@@ -24,8 +24,8 @@ from __future__ import print_function
import enum
import numpy as np
from six.moves import zip
import tensorflow as tf
import tensorflow.compat.v1 as tf
from tf_slim import tfexample_decoder as slim_example_decoder
from object_detection.core import data_decoder
from object_detection.core import standard_fields as fields
from object_detection.protos import input_reader_pb2
......@@ -34,12 +34,14 @@ from object_detection.utils import label_map_util
# pylint: disable=g-import-not-at-top
try:
from tensorflow.contrib import lookup as contrib_lookup
from tensorflow.contrib.slim import tfexample_decoder as slim_example_decoder
except ImportError:
# TF 2.0 doesn't ship with contrib.
pass
# pylint: enable=g-import-not-at-top
_LABEL_OFFSET = 1
class Visibility(enum.Enum):
"""Visibility definitions.
......@@ -167,7 +169,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
num_keypoints=0,
num_additional_channels=0,
load_multiclass_scores=False,
load_context_features=False):
load_context_features=False,
expand_hierarchy_labels=False):
"""Constructor sets keys_to_features and items_to_handlers.
Args:
......@@ -193,12 +196,18 @@ class TfExampleDecoder(data_decoder.DataDecoder):
boxes.
load_context_features: Whether to load information from context_features,
to provide additional context to a detection model for training and/or
inference
inference.
expand_hierarchy_labels: Expands the object and image labels taking into
account the provided hierarchy in the label_map_proto_file. For positive
classes, the labels are extended to ancestor. For negative classes,
the labels are expanded to descendants.
Raises:
ValueError: If `instance_mask_type` option is not one of
input_reader_pb2.DEFAULT, input_reader_pb2.NUMERICAL, or
input_reader_pb2.PNG_MASKS.
ValueError: If `expand_labels_hierarchy` is True, but the
`label_map_proto_file` is not provided.
"""
# TODO(rathodv): delete unused `use_display_name` argument once we change
# other decoders to handle label maps similarly.
......@@ -385,6 +394,20 @@ class TfExampleDecoder(data_decoder.DataDecoder):
self.items_to_handlers[
fields.InputDataFields.groundtruth_image_classes] = image_label_handler
self._expand_hierarchy_labels = expand_hierarchy_labels
self._ancestors_lut = None
self._descendants_lut = None
if expand_hierarchy_labels:
if label_map_proto_file:
ancestors_lut, descendants_lut = (
label_map_util.get_label_map_hierarchy_lut(label_map_proto_file,
True))
self._ancestors_lut = tf.constant(ancestors_lut, dtype=tf.int64)
self._descendants_lut = tf.constant(descendants_lut, dtype=tf.int64)
else:
raise ValueError('In order to expand labels, the label_map_proto_file '
'has to be provided.')
def decode(self, tf_example_string_tensor):
"""Decodes serialized tensorflow example and returns a tensor dictionary.
......@@ -432,7 +455,7 @@ class TfExampleDecoder(data_decoder.DataDecoder):
tensor of shape [None, num_keypoints] containing keypoint visibilites.
fields.InputDataFields.groundtruth_instance_masks - 3D float32 tensor of
shape [None, None, None] containing instance masks.
fields.InputDataFields.groundtruth_image_classes - 1D uint64 of shape
fields.InputDataFields.groundtruth_image_classes - 1D int64 of shape
[None] containing classes for the boxes.
fields.InputDataFields.multiclass_scores - 1D float32 tensor of shape
[None * num_classes] containing flattened multiclass scores for
......@@ -484,6 +507,46 @@ class TfExampleDecoder(data_decoder.DataDecoder):
tensor_dict[gt_kpt_fld],
np.nan * tf.ones_like(tensor_dict[gt_kpt_fld]))
if self._expand_hierarchy_labels:
input_fields = fields.InputDataFields
image_classes, image_confidences = self._expand_image_label_hierarchy(
tensor_dict[input_fields.groundtruth_image_classes],
tensor_dict[input_fields.groundtruth_image_confidences])
tensor_dict[input_fields.groundtruth_image_classes] = image_classes
tensor_dict[input_fields.groundtruth_image_confidences] = (
image_confidences)
box_fields = [
fields.InputDataFields.groundtruth_group_of,
fields.InputDataFields.groundtruth_is_crowd,
fields.InputDataFields.groundtruth_difficult,
fields.InputDataFields.groundtruth_area,
fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_weights,
]
def expand_field(field_name):
return self._expansion_box_field_labels(
tensor_dict[input_fields.groundtruth_classes],
tensor_dict[field_name])
# pylint: disable=cell-var-from-loop
for field in box_fields:
if field in tensor_dict:
tensor_dict[field] = tf.cond(
tf.size(tensor_dict[field]) > 0, lambda: expand_field(field),
lambda: tensor_dict[field])
# pylint: enable=cell-var-from-loop
tensor_dict[input_fields.groundtruth_classes] = (
self._expansion_box_field_labels(
tensor_dict[input_fields.groundtruth_classes],
tensor_dict[input_fields.groundtruth_classes], True))
if fields.InputDataFields.groundtruth_group_of in tensor_dict:
group_of = fields.InputDataFields.groundtruth_group_of
tensor_dict[group_of] = tf.cast(tensor_dict[group_of], dtype=tf.bool)
return tensor_dict
def _reshape_keypoints(self, keys_to_tensors):
......@@ -633,3 +696,69 @@ class TfExampleDecoder(data_decoder.DataDecoder):
tf.greater(tf.size(png_masks), 0),
lambda: tf.map_fn(decode_png_mask, png_masks, dtype=tf.float32),
lambda: tf.zeros(tf.cast(tf.stack([0, height, width]), dtype=tf.int32)))
def _expand_image_label_hierarchy(self, image_classes, image_confidences):
"""Expand image level labels according to the hierarchy.
Args:
image_classes: Int64 tensor with the image level class ids for a sample.
image_confidences: Float tensor signaling whether a class id is present in
the image (1.0) or not present (0.0).
Returns:
new_image_classes: Int64 tensor equal to expanding image_classes.
new_image_confidences: Float tensor equal to expanding image_confidences.
"""
def expand_labels(relation_tensor, confidence_value):
"""Expand to ancestors or descendants depending on arguments."""
mask = tf.equal(image_confidences, confidence_value)
target_image_classes = tf.boolean_mask(image_classes, mask)
expanded_indices = tf.reduce_any((tf.gather(
relation_tensor, target_image_classes - _LABEL_OFFSET, axis=0) > 0),
axis=0)
expanded_indices = tf.where(expanded_indices)[:, 0] + _LABEL_OFFSET
new_groundtruth_image_classes = (
tf.concat([
tf.boolean_mask(image_classes, tf.logical_not(mask)),
expanded_indices,
],
axis=0))
new_groundtruth_image_confidences = (
tf.concat([
tf.boolean_mask(image_confidences, tf.logical_not(mask)),
tf.ones([tf.shape(expanded_indices)[0]],
dtype=image_confidences.dtype) * confidence_value,
],
axis=0))
return new_groundtruth_image_classes, new_groundtruth_image_confidences
image_classes, image_confidences = expand_labels(self._ancestors_lut, 1.0)
new_image_classes, new_image_confidences = expand_labels(
self._descendants_lut, 0.0)
return new_image_classes, new_image_confidences
def _expansion_box_field_labels(self,
object_classes,
object_field,
copy_class_id=False):
"""Expand the labels of a specific object field according to the hierarchy.
Args:
object_classes: Int64 tensor with the class id for each element in
object_field.
object_field: Tensor to be expanded.
copy_class_id: Boolean to choose whether to use class id values in the
output tensor instead of replicating the original values.
Returns:
A tensor with the result of expanding object_field.
"""
expanded_indices = tf.gather(
self._ancestors_lut, object_classes - _LABEL_OFFSET, axis=0)
if copy_class_id:
new_object_field = tf.where(expanded_indices > 0)[:, 1] + _LABEL_OFFSET
else:
new_object_field = tf.repeat(
object_field, tf.reduce_sum(expanded_indices, axis=1), axis=0)
return new_object_field
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment