"vscode:/vscode.git/clone" did not exist on "75f1f45396d4b8984caadef349f7226434d94485"
Unverified Commit 8518d053 authored by pkulzc's avatar pkulzc Committed by GitHub
Browse files

Open source MnasFPN and minor fixes to OD API (#8484)

310447280  by lzc:

    Internal change

310420845  by Zhichao Lu:

    Open source the internal Context RCNN code.

--
310362339  by Zhichao Lu:

    Internal change

310259448  by lzc:

    Update required TF version for OD API.

--
310252159  by Zhichao Lu:

    Port patch_ops_test to TF1/TF2 as TPUs.

--
310247180  by Zhichao Lu:

    Ignore keypoint heatmap loss in the regions/bounding boxes with target keypoint
    class but no valid keypoint annotations.

--
310178294  by Zhichao Lu:

    Opensource MnasFPN
    https://arxiv.org/abs/1912.01106

--
310094222  by lzc:

    Internal changes.

--
310085250  by lzc:

    Internal Change.

--
310016447  by huizhongc:

    Remove unrecognized classes from labeled_classes.

--
310009470  by rathodv:

    Mark batcher.py as TF1 only.

--
310001984  by rathodv:

    Update core/preprocessor.py to be compatible with TF1/TF2..

--
309455035  by Zhi...
parent ac5fff19
...@@ -42,6 +42,8 @@ class InputDataFields(object): ...@@ -42,6 +42,8 @@ class InputDataFields(object):
filename: original filename of the dataset (without common path). filename: original filename of the dataset (without common path).
groundtruth_image_classes: image-level class labels. groundtruth_image_classes: image-level class labels.
groundtruth_image_confidences: image-level class confidences. groundtruth_image_confidences: image-level class confidences.
groundtruth_labeled_classes: image-level annotation that indicates the
classes for which an image has been labeled.
groundtruth_boxes: coordinates of the ground truth boxes in the image. groundtruth_boxes: coordinates of the ground truth boxes in the image.
groundtruth_classes: box-level class labels. groundtruth_classes: box-level class labels.
groundtruth_confidences: box-level class confidences. The shape should be groundtruth_confidences: box-level class confidences. The shape should be
...@@ -61,6 +63,7 @@ class InputDataFields(object): ...@@ -61,6 +63,7 @@ class InputDataFields(object):
groundtruth_instance_classes: instance mask-level class labels. groundtruth_instance_classes: instance mask-level class labels.
groundtruth_keypoints: ground truth keypoints. groundtruth_keypoints: ground truth keypoints.
groundtruth_keypoint_visibilities: ground truth keypoint visibilities. groundtruth_keypoint_visibilities: ground truth keypoint visibilities.
groundtruth_keypoint_weights: groundtruth weight factor for keypoints.
groundtruth_label_weights: groundtruth label weights. groundtruth_label_weights: groundtruth label weights.
groundtruth_weights: groundtruth weight factor for bounding boxes. groundtruth_weights: groundtruth weight factor for bounding boxes.
num_groundtruth_boxes: number of groundtruth boxes. num_groundtruth_boxes: number of groundtruth boxes.
...@@ -68,6 +71,11 @@ class InputDataFields(object): ...@@ -68,6 +71,11 @@ class InputDataFields(object):
true_image_shapes: true shapes of images in the resized images, as resized true_image_shapes: true shapes of images in the resized images, as resized
images can be padded with zeros. images can be padded with zeros.
multiclass_scores: the label score per class for each box. multiclass_scores: the label score per class for each box.
context_features: a flattened list of contextual features.
context_feature_length: the fixed length of each feature in
context_features, used for reshaping.
valid_context_size: the valid context size, used in filtering the padded
context features.
""" """
image = 'image' image = 'image'
image_additional_channels = 'image_additional_channels' image_additional_channels = 'image_additional_channels'
...@@ -78,6 +86,7 @@ class InputDataFields(object): ...@@ -78,6 +86,7 @@ class InputDataFields(object):
filename = 'filename' filename = 'filename'
groundtruth_image_classes = 'groundtruth_image_classes' groundtruth_image_classes = 'groundtruth_image_classes'
groundtruth_image_confidences = 'groundtruth_image_confidences' groundtruth_image_confidences = 'groundtruth_image_confidences'
groundtruth_labeled_classes = 'groundtruth_labeled_classes'
groundtruth_boxes = 'groundtruth_boxes' groundtruth_boxes = 'groundtruth_boxes'
groundtruth_classes = 'groundtruth_classes' groundtruth_classes = 'groundtruth_classes'
groundtruth_confidences = 'groundtruth_confidences' groundtruth_confidences = 'groundtruth_confidences'
...@@ -93,12 +102,16 @@ class InputDataFields(object): ...@@ -93,12 +102,16 @@ class InputDataFields(object):
groundtruth_instance_classes = 'groundtruth_instance_classes' groundtruth_instance_classes = 'groundtruth_instance_classes'
groundtruth_keypoints = 'groundtruth_keypoints' groundtruth_keypoints = 'groundtruth_keypoints'
groundtruth_keypoint_visibilities = 'groundtruth_keypoint_visibilities' groundtruth_keypoint_visibilities = 'groundtruth_keypoint_visibilities'
groundtruth_keypoint_weights = 'groundtruth_keypoint_weights'
groundtruth_label_weights = 'groundtruth_label_weights' groundtruth_label_weights = 'groundtruth_label_weights'
groundtruth_weights = 'groundtruth_weights' groundtruth_weights = 'groundtruth_weights'
num_groundtruth_boxes = 'num_groundtruth_boxes' num_groundtruth_boxes = 'num_groundtruth_boxes'
is_annotated = 'is_annotated' is_annotated = 'is_annotated'
true_image_shape = 'true_image_shape' true_image_shape = 'true_image_shape'
multiclass_scores = 'multiclass_scores' multiclass_scores = 'multiclass_scores'
context_features = 'context_features'
context_feature_length = 'context_feature_length'
valid_context_size = 'valid_context_size'
class DetectionResultFields(object): class DetectionResultFields(object):
...@@ -115,6 +128,7 @@ class DetectionResultFields(object): ...@@ -115,6 +128,7 @@ class DetectionResultFields(object):
detection_masks: contains a segmentation mask for each detection box. detection_masks: contains a segmentation mask for each detection box.
detection_boundaries: contains an object boundary for each detection box. detection_boundaries: contains an object boundary for each detection box.
detection_keypoints: contains detection keypoints for each detection box. detection_keypoints: contains detection keypoints for each detection box.
detection_keypoint_scores: contains detection keypoint scores.
num_detections: number of detections in the batch. num_detections: number of detections in the batch.
raw_detection_boxes: contains decoded detection boxes without Non-Max raw_detection_boxes: contains decoded detection boxes without Non-Max
suppression. suppression.
...@@ -134,6 +148,7 @@ class DetectionResultFields(object): ...@@ -134,6 +148,7 @@ class DetectionResultFields(object):
detection_masks = 'detection_masks' detection_masks = 'detection_masks'
detection_boundaries = 'detection_boundaries' detection_boundaries = 'detection_boundaries'
detection_keypoints = 'detection_keypoints' detection_keypoints = 'detection_keypoints'
detection_keypoint_scores = 'detection_keypoint_scores'
num_detections = 'num_detections' num_detections = 'num_detections'
raw_detection_boxes = 'raw_detection_boxes' raw_detection_boxes = 'raw_detection_boxes'
raw_detection_scores = 'raw_detection_scores' raw_detection_scores = 'raw_detection_scores'
...@@ -164,6 +179,7 @@ class BoxListFields(object): ...@@ -164,6 +179,7 @@ class BoxListFields(object):
masks = 'masks' masks = 'masks'
boundaries = 'boundaries' boundaries = 'boundaries'
keypoints = 'keypoints' keypoints = 'keypoints'
keypoint_visibilities = 'keypoint_visibilities'
keypoint_heatmaps = 'keypoint_heatmaps' keypoint_heatmaps = 'keypoint_heatmaps'
is_crowd = 'is_crowd' is_crowd = 'is_crowd'
...@@ -201,6 +217,7 @@ class TfExampleFields(object): ...@@ -201,6 +217,7 @@ class TfExampleFields(object):
source_id: original source of the image source_id: original source of the image
image_class_text: image-level label in text format image_class_text: image-level label in text format
image_class_label: image-level label in numerical format image_class_label: image-level label in numerical format
image_class_confidence: image-level confidence of the label
object_class_text: labels in text format, e.g. ["person", "cat"] object_class_text: labels in text format, e.g. ["person", "cat"]
object_class_label: labels in numbers, e.g. [16, 8] object_class_label: labels in numbers, e.g. [16, 8]
object_bbox_xmin: xmin coordinates of groundtruth box, e.g. 10, 30 object_bbox_xmin: xmin coordinates of groundtruth box, e.g. 10, 30
...@@ -237,6 +254,7 @@ class TfExampleFields(object): ...@@ -237,6 +254,7 @@ class TfExampleFields(object):
source_id = 'image/source_id' source_id = 'image/source_id'
image_class_text = 'image/class/text' image_class_text = 'image/class/text'
image_class_label = 'image/class/label' image_class_label = 'image/class/label'
image_class_confidence = 'image/class/confidence'
object_class_text = 'image/object/class/text' object_class_text = 'image/object/class/text'
object_class_label = 'image/object/class/label' object_class_label = 'image/object/class/label'
object_bbox_ymin = 'image/object/bbox/ymin' object_bbox_ymin = 'image/object/bbox/ymin'
......
...@@ -44,12 +44,17 @@ from object_detection.box_coders import mean_stddev_box_coder ...@@ -44,12 +44,17 @@ from object_detection.box_coders import mean_stddev_box_coder
from object_detection.core import box_coder from object_detection.core import box_coder
from object_detection.core import box_list from object_detection.core import box_list
from object_detection.core import box_list_ops from object_detection.core import box_list_ops
from object_detection.core import keypoint_ops
from object_detection.core import matcher as mat from object_detection.core import matcher as mat
from object_detection.core import region_similarity_calculator as sim_calc from object_detection.core import region_similarity_calculator as sim_calc
from object_detection.core import standard_fields as fields from object_detection.core import standard_fields as fields
from object_detection.matchers import argmax_matcher from object_detection.matchers import argmax_matcher
from object_detection.matchers import bipartite_matcher from object_detection.matchers import bipartite_matcher
from object_detection.utils import shape_utils from object_detection.utils import shape_utils
from object_detection.utils import target_assigner_utils as ta_utils
_DEFAULT_KEYPOINT_OFFSET_STD_DEV = 1.0
class TargetAssigner(object): class TargetAssigner(object):
......
...@@ -25,6 +25,7 @@ from object_detection.core import standard_fields as fields ...@@ -25,6 +25,7 @@ from object_detection.core import standard_fields as fields
from object_detection.core import target_assigner as targetassigner from object_detection.core import target_assigner as targetassigner
from object_detection.matchers import argmax_matcher from object_detection.matchers import argmax_matcher
from object_detection.matchers import bipartite_matcher from object_detection.matchers import bipartite_matcher
from object_detection.utils import np_box_ops
from object_detection.utils import test_case from object_detection.utils import test_case
...@@ -65,10 +66,10 @@ class TargetAssignerTest(test_case.TestCase): ...@@ -65,10 +66,10 @@ class TargetAssignerTest(test_case.TestCase):
self.assertAllClose(cls_weights_out, exp_cls_weights) self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets) self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights) self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertEquals(cls_targets_out.dtype, np.float32) self.assertEqual(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32) self.assertEqual(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32) self.assertEqual(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32) self.assertEqual(reg_weights_out.dtype, np.float32)
def test_assign_class_agnostic_with_ignored_matches(self): def test_assign_class_agnostic_with_ignored_matches(self):
# Note: test is very similar to above. The third box matched with an IOU # Note: test is very similar to above. The third box matched with an IOU
...@@ -108,10 +109,10 @@ class TargetAssignerTest(test_case.TestCase): ...@@ -108,10 +109,10 @@ class TargetAssignerTest(test_case.TestCase):
self.assertAllClose(cls_weights_out, exp_cls_weights) self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets) self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights) self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertEquals(cls_targets_out.dtype, np.float32) self.assertEqual(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32) self.assertEqual(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32) self.assertEqual(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32) self.assertEqual(reg_weights_out.dtype, np.float32)
def test_assign_agnostic_with_keypoints(self): def test_assign_agnostic_with_keypoints(self):
def graph_fn(anchor_means, groundtruth_box_corners, def graph_fn(anchor_means, groundtruth_box_corners,
...@@ -158,10 +159,10 @@ class TargetAssignerTest(test_case.TestCase): ...@@ -158,10 +159,10 @@ class TargetAssignerTest(test_case.TestCase):
self.assertAllClose(cls_weights_out, exp_cls_weights) self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets) self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights) self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertEquals(cls_targets_out.dtype, np.float32) self.assertEqual(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32) self.assertEqual(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32) self.assertEqual(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32) self.assertEqual(reg_weights_out.dtype, np.float32)
def test_assign_class_agnostic_with_keypoints_and_ignored_matches(self): def test_assign_class_agnostic_with_keypoints_and_ignored_matches(self):
# Note: test is very similar to above. The third box matched with an IOU # Note: test is very similar to above. The third box matched with an IOU
...@@ -213,10 +214,10 @@ class TargetAssignerTest(test_case.TestCase): ...@@ -213,10 +214,10 @@ class TargetAssignerTest(test_case.TestCase):
self.assertAllClose(cls_weights_out, exp_cls_weights) self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets) self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights) self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertEquals(cls_targets_out.dtype, np.float32) self.assertEqual(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32) self.assertEqual(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32) self.assertEqual(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32) self.assertEqual(reg_weights_out.dtype, np.float32)
def test_assign_multiclass(self): def test_assign_multiclass(self):
...@@ -271,10 +272,10 @@ class TargetAssignerTest(test_case.TestCase): ...@@ -271,10 +272,10 @@ class TargetAssignerTest(test_case.TestCase):
self.assertAllClose(cls_weights_out, exp_cls_weights) self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets) self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights) self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertEquals(cls_targets_out.dtype, np.float32) self.assertEqual(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32) self.assertEqual(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32) self.assertEqual(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32) self.assertEqual(reg_weights_out.dtype, np.float32)
def test_assign_multiclass_with_groundtruth_weights(self): def test_assign_multiclass_with_groundtruth_weights(self):
...@@ -379,10 +380,10 @@ class TargetAssignerTest(test_case.TestCase): ...@@ -379,10 +380,10 @@ class TargetAssignerTest(test_case.TestCase):
self.assertAllClose(cls_weights_out, exp_cls_weights) self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets) self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights) self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertEquals(cls_targets_out.dtype, np.float32) self.assertEqual(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32) self.assertEqual(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32) self.assertEqual(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32) self.assertEqual(reg_weights_out.dtype, np.float32)
def test_assign_empty_groundtruth(self): def test_assign_empty_groundtruth(self):
...@@ -431,10 +432,10 @@ class TargetAssignerTest(test_case.TestCase): ...@@ -431,10 +432,10 @@ class TargetAssignerTest(test_case.TestCase):
self.assertAllClose(cls_weights_out, exp_cls_weights) self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets) self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights) self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertEquals(cls_targets_out.dtype, np.float32) self.assertEqual(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32) self.assertEqual(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32) self.assertEqual(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32) self.assertEqual(reg_weights_out.dtype, np.float32)
def test_raises_error_on_incompatible_groundtruth_boxes_and_labels(self): def test_raises_error_on_incompatible_groundtruth_boxes_and_labels(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity() similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
...@@ -1228,5 +1229,7 @@ class CreateTargetAssignerTest(tf.test.TestCase): ...@@ -1228,5 +1229,7 @@ class CreateTargetAssignerTest(tf.test.TestCase):
stage='invalid_stage') stage='invalid_stage')
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
item: {
id: 1
name: 'face'
display_name: 'face'
keypoints {
id: 0
label: "left_eye_center"
}
keypoints {
id: 1
label: "right_eye_center"
}
keypoints {
id: 2
label: "nose_tip"
}
keypoints {
id: 3
label: "mouth_center"
}
keypoints {
id: 4
label: "left_ear_tragion"
}
keypoints {
id: 5
label: "right_ear_tragion"
}
}
item: {
id: 2
name: 'Person'
display_name: 'PERSON'
keypoints {
id: 6
label: "NOSE_TIP"
}
keypoints {
id: 7
label: "LEFT_EYE"
}
keypoints {
id: 8
label: "RIGHT_EYE"
}
keypoints {
id: 9
label: "LEFT_EAR_TRAGION"
}
keypoints {
id: 10
label: "RIGHT_EAR_TRAGION"
}
keypoints {
id: 11
label: "LEFT_SHOULDER"
}
keypoints {
id: 12
label: "RIGHT_SHOULDER"
}
keypoints {
id: 13
label: "LEFT_ELBOW"
}
keypoints {
id: 14
label: "RIGHT_ELBOW"
}
keypoints {
id: 15
label: "LEFT_WRIST"
}
keypoints {
id: 16
label: "RIGHT_WRIST"
}
keypoints {
id: 17
label: "LEFT_HIP"
}
keypoints {
id: 18
label: "RIGHT_HIP"
}
keypoints {
id: 19
label: "LEFT_KNEE"
}
keypoints {
id: 20
label: "RIGHT_KNEE"
}
keypoints {
id: 21
label: "LEFT_ANKLE"
}
keypoints {
id: 22
label: "RIGHT_ANKLE"
}
}
...@@ -21,6 +21,8 @@ from __future__ import absolute_import ...@@ -21,6 +21,8 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import enum
import numpy as np
from six.moves import zip from six.moves import zip
import tensorflow as tf import tensorflow as tf
...@@ -29,7 +31,27 @@ from object_detection.core import standard_fields as fields ...@@ -29,7 +31,27 @@ from object_detection.core import standard_fields as fields
from object_detection.protos import input_reader_pb2 from object_detection.protos import input_reader_pb2
from object_detection.utils import label_map_util from object_detection.utils import label_map_util
slim_example_decoder = tf.contrib.slim.tfexample_decoder # pylint: disable=g-import-not-at-top
try:
from tensorflow.contrib import lookup as contrib_lookup
from tensorflow.contrib.slim import tfexample_decoder as slim_example_decoder
except ImportError:
# TF 2.0 doesn't ship with contrib.
pass
# pylint: enable=g-import-not-at-top
class Visibility(enum.Enum):
"""Visibility definitions.
This follows the MS Coco convention (http://cocodataset.org/#format-data).
"""
# Keypoint is not labeled.
UNLABELED = 0
# Keypoint is labeled but falls outside the object segment (e.g. occluded).
NOT_VISIBLE = 1
# Keypoint is labeled and visible.
VISIBLE = 2
class _ClassTensorHandler(slim_example_decoder.Tensor): class _ClassTensorHandler(slim_example_decoder.Tensor):
...@@ -69,8 +91,8 @@ class _ClassTensorHandler(slim_example_decoder.Tensor): ...@@ -69,8 +91,8 @@ class _ClassTensorHandler(slim_example_decoder.Tensor):
lookup = tf.compat.v2.lookup lookup = tf.compat.v2.lookup
hash_table_class = tf.compat.v2.lookup.StaticHashTable hash_table_class = tf.compat.v2.lookup.StaticHashTable
except AttributeError: except AttributeError:
lookup = tf.contrib.lookup lookup = contrib_lookup
hash_table_class = tf.contrib.lookup.HashTable hash_table_class = contrib_lookup.HashTable
name_to_id_table = hash_table_class( name_to_id_table = hash_table_class(
initializer=lookup.KeyValueTensorInitializer( initializer=lookup.KeyValueTensorInitializer(
keys=tf.constant(list(name_to_id.keys())), keys=tf.constant(list(name_to_id.keys())),
...@@ -144,7 +166,8 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -144,7 +166,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
dct_method='', dct_method='',
num_keypoints=0, num_keypoints=0,
num_additional_channels=0, num_additional_channels=0,
load_multiclass_scores=False): load_multiclass_scores=False,
load_context_features=False):
"""Constructor sets keys_to_features and items_to_handlers. """Constructor sets keys_to_features and items_to_handlers.
Args: Args:
...@@ -168,6 +191,9 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -168,6 +191,9 @@ class TfExampleDecoder(data_decoder.DataDecoder):
num_additional_channels: how many additional channels to use. num_additional_channels: how many additional channels to use.
load_multiclass_scores: Whether to load multiclass scores associated with load_multiclass_scores: Whether to load multiclass scores associated with
boxes. boxes.
load_context_features: Whether to load information from context_features,
to provide additional context to a detection model for training and/or
inference
Raises: Raises:
ValueError: If `instance_mask_type` option is not one of ValueError: If `instance_mask_type` option is not one of
...@@ -197,6 +223,8 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -197,6 +223,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
tf.VarLenFeature(tf.string), tf.VarLenFeature(tf.string),
'image/class/label': 'image/class/label':
tf.VarLenFeature(tf.int64), tf.VarLenFeature(tf.int64),
'image/class/confidence':
tf.VarLenFeature(tf.float32),
# Object boxes and classes. # Object boxes and classes.
'image/object/bbox/xmin': 'image/object/bbox/xmin':
tf.VarLenFeature(tf.float32), tf.VarLenFeature(tf.float32),
...@@ -253,6 +281,9 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -253,6 +281,9 @@ class TfExampleDecoder(data_decoder.DataDecoder):
slim_example_decoder.Tensor('image/key/sha256')), slim_example_decoder.Tensor('image/key/sha256')),
fields.InputDataFields.filename: ( fields.InputDataFields.filename: (
slim_example_decoder.Tensor('image/filename')), slim_example_decoder.Tensor('image/filename')),
# Image-level labels.
fields.InputDataFields.groundtruth_image_confidences: (
slim_example_decoder.Tensor('image/class/confidence')),
# Object boxes and classes. # Object boxes and classes.
fields.InputDataFields.groundtruth_boxes: ( fields.InputDataFields.groundtruth_boxes: (
slim_example_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'], slim_example_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'],
...@@ -274,6 +305,20 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -274,6 +305,20 @@ class TfExampleDecoder(data_decoder.DataDecoder):
'image/object/class/multiclass_scores'] = tf.VarLenFeature(tf.float32) 'image/object/class/multiclass_scores'] = tf.VarLenFeature(tf.float32)
self.items_to_handlers[fields.InputDataFields.multiclass_scores] = ( self.items_to_handlers[fields.InputDataFields.multiclass_scores] = (
slim_example_decoder.Tensor('image/object/class/multiclass_scores')) slim_example_decoder.Tensor('image/object/class/multiclass_scores'))
if load_context_features:
self.keys_to_features[
'image/context_features'] = tf.VarLenFeature(tf.float32)
self.items_to_handlers[fields.InputDataFields.context_features] = (
slim_example_decoder.ItemHandlerCallback(
['image/context_features', 'image/context_feature_length'],
self._reshape_context_features))
self.keys_to_features[
'image/context_feature_length'] = tf.FixedLenFeature((), tf.int64)
self.items_to_handlers[fields.InputDataFields.context_feature_length] = (
slim_example_decoder.Tensor('image/context_feature_length'))
if num_additional_channels > 0: if num_additional_channels > 0:
self.keys_to_features[ self.keys_to_features[
'image/additional_channels/encoded'] = tf.FixedLenFeature( 'image/additional_channels/encoded'] = tf.FixedLenFeature(
...@@ -287,10 +332,17 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -287,10 +332,17 @@ class TfExampleDecoder(data_decoder.DataDecoder):
tf.VarLenFeature(tf.float32)) tf.VarLenFeature(tf.float32))
self.keys_to_features['image/object/keypoint/y'] = ( self.keys_to_features['image/object/keypoint/y'] = (
tf.VarLenFeature(tf.float32)) tf.VarLenFeature(tf.float32))
self.keys_to_features['image/object/keypoint/visibility'] = (
tf.VarLenFeature(tf.int64))
self.items_to_handlers[fields.InputDataFields.groundtruth_keypoints] = ( self.items_to_handlers[fields.InputDataFields.groundtruth_keypoints] = (
slim_example_decoder.ItemHandlerCallback( slim_example_decoder.ItemHandlerCallback(
['image/object/keypoint/y', 'image/object/keypoint/x'], ['image/object/keypoint/y', 'image/object/keypoint/x'],
self._reshape_keypoints)) self._reshape_keypoints))
kpt_vis_field = fields.InputDataFields.groundtruth_keypoint_visibilities
self.items_to_handlers[kpt_vis_field] = (
slim_example_decoder.ItemHandlerCallback(
['image/object/keypoint/x', 'image/object/keypoint/visibility'],
self._reshape_keypoint_visibilities))
if load_instance_masks: if load_instance_masks:
if instance_mask_type in (input_reader_pb2.DEFAULT, if instance_mask_type in (input_reader_pb2.DEFAULT,
input_reader_pb2.NUMERICAL_MASKS): input_reader_pb2.NUMERICAL_MASKS):
...@@ -363,6 +415,9 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -363,6 +415,9 @@ class TfExampleDecoder(data_decoder.DataDecoder):
[None] indicating if the boxes enclose a crowd. [None] indicating if the boxes enclose a crowd.
Optional: Optional:
fields.InputDataFields.groundtruth_image_confidences - 1D float tensor of
shape [None] indicating if a class is present in the image (1.0) or
a class is not present in the image (0.0).
fields.InputDataFields.image_additional_channels - 3D uint8 tensor of fields.InputDataFields.image_additional_channels - 3D uint8 tensor of
shape [None, None, num_additional_channels]. 1st dim is height; 2nd dim shape [None, None, num_additional_channels]. 1st dim is height; 2nd dim
is width; 3rd dim is the number of additional channels. is width; 3rd dim is the number of additional channels.
...@@ -371,8 +426,10 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -371,8 +426,10 @@ class TfExampleDecoder(data_decoder.DataDecoder):
fields.InputDataFields.groundtruth_group_of - 1D bool tensor of shape fields.InputDataFields.groundtruth_group_of - 1D bool tensor of shape
[None] indicating if the boxes represent `group_of` instances. [None] indicating if the boxes represent `group_of` instances.
fields.InputDataFields.groundtruth_keypoints - 3D float32 tensor of fields.InputDataFields.groundtruth_keypoints - 3D float32 tensor of
shape [None, None, 2] containing keypoints, where the coordinates of shape [None, num_keypoints, 2] containing keypoints, where the
the keypoints are ordered (y, x). coordinates of the keypoints are ordered (y, x).
fields.InputDataFields.groundtruth_keypoint_visibilities - 2D bool
tensor of shape [None, num_keypoints] containing keypoint visibilites.
fields.InputDataFields.groundtruth_instance_masks - 3D float32 tensor of fields.InputDataFields.groundtruth_instance_masks - 3D float32 tensor of
shape [None, None, None] containing instance masks. shape [None, None, None] containing instance masks.
fields.InputDataFields.groundtruth_image_classes - 1D uint64 of shape fields.InputDataFields.groundtruth_image_classes - 1D uint64 of shape
...@@ -380,6 +437,10 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -380,6 +437,10 @@ class TfExampleDecoder(data_decoder.DataDecoder):
fields.InputDataFields.multiclass_scores - 1D float32 tensor of shape fields.InputDataFields.multiclass_scores - 1D float32 tensor of shape
[None * num_classes] containing flattened multiclass scores for [None * num_classes] containing flattened multiclass scores for
groundtruth boxes. groundtruth boxes.
fields.InputDataFields.context_features - 1D float32 tensor of shape
[context_feature_length * num_context_features]
fields.InputDataFields.context_feature_length - int32 tensor specifying
the length of each feature in context_features
""" """
serialized_example = tf.reshape(tf_example_string_tensor, shape=[]) serialized_example = tf.reshape(tf_example_string_tensor, shape=[])
decoder = slim_example_decoder.TFExampleDecoder(self.keys_to_features, decoder = slim_example_decoder.TFExampleDecoder(self.keys_to_features,
...@@ -410,20 +471,34 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -410,20 +471,34 @@ class TfExampleDecoder(data_decoder.DataDecoder):
tensor_dict[fields.InputDataFields.groundtruth_weights])[0], tensor_dict[fields.InputDataFields.groundtruth_weights])[0],
0), lambda: tensor_dict[fields.InputDataFields.groundtruth_weights], 0), lambda: tensor_dict[fields.InputDataFields.groundtruth_weights],
default_groundtruth_weights) default_groundtruth_weights)
if fields.InputDataFields.groundtruth_keypoints in tensor_dict:
# Set all keypoints that are not labeled to NaN.
gt_kpt_fld = fields.InputDataFields.groundtruth_keypoints
gt_kpt_vis_fld = fields.InputDataFields.groundtruth_keypoint_visibilities
visibilities_tiled = tf.tile(
tf.expand_dims(tensor_dict[gt_kpt_vis_fld], -1),
[1, 1, 2])
tensor_dict[gt_kpt_fld] = tf.where(
visibilities_tiled,
tensor_dict[gt_kpt_fld],
np.nan * tf.ones_like(tensor_dict[gt_kpt_fld]))
return tensor_dict return tensor_dict
def _reshape_keypoints(self, keys_to_tensors): def _reshape_keypoints(self, keys_to_tensors):
"""Reshape keypoints. """Reshape keypoints.
The instance segmentation masks are reshaped to [num_instances, The keypoints are reshaped to [num_instances, num_keypoints, 2].
num_keypoints, 2].
Args: Args:
keys_to_tensors: a dictionary from keys to tensors. keys_to_tensors: a dictionary from keys to tensors. Expected keys are:
'image/object/keypoint/x'
'image/object/keypoint/y'
Returns: Returns:
A 3-D float tensor of shape [num_instances, num_keypoints, 2] with values A 3-D float tensor of shape [num_instances, num_keypoints, 2] with values
in {0, 1}. in [0, 1].
""" """
y = keys_to_tensors['image/object/keypoint/y'] y = keys_to_tensors['image/object/keypoint/y']
if isinstance(y, tf.SparseTensor): if isinstance(y, tf.SparseTensor):
...@@ -437,6 +512,54 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -437,6 +512,54 @@ class TfExampleDecoder(data_decoder.DataDecoder):
keypoints = tf.reshape(keypoints, [-1, self._num_keypoints, 2]) keypoints = tf.reshape(keypoints, [-1, self._num_keypoints, 2])
return keypoints return keypoints
def _reshape_keypoint_visibilities(self, keys_to_tensors):
"""Reshape keypoint visibilities.
The keypoint visibilities are reshaped to [num_instances,
num_keypoints].
The raw keypoint visibilities are expected to conform to the
MSCoco definition. See Visibility enum.
The returned boolean is True for the labeled case (either
Visibility.NOT_VISIBLE or Visibility.VISIBLE). These are the same categories
that COCO uses to evaluate keypoint detection performance:
http://cocodataset.org/#keypoints-eval
If image/object/keypoint/visibility is not provided, visibilities will be
set to True for finite keypoint coordinate values, and 0 if the coordinates
are NaN.
Args:
keys_to_tensors: a dictionary from keys to tensors. Expected keys are:
'image/object/keypoint/x'
'image/object/keypoint/visibility'
Returns:
A 2-D bool tensor of shape [num_instances, num_keypoints] with values
in {0, 1}. 1 if the keypoint is labeled, 0 otherwise.
"""
x = keys_to_tensors['image/object/keypoint/x']
vis = keys_to_tensors['image/object/keypoint/visibility']
if isinstance(vis, tf.SparseTensor):
vis = tf.sparse_tensor_to_dense(vis)
if isinstance(x, tf.SparseTensor):
x = tf.sparse_tensor_to_dense(x)
default_vis = tf.where(
tf.math.is_nan(x),
Visibility.UNLABELED.value * tf.ones_like(x, dtype=tf.int64),
Visibility.VISIBLE.value * tf.ones_like(x, dtype=tf.int64))
# Use visibility if provided, otherwise use the default visibility.
vis = tf.cond(tf.equal(tf.size(x), tf.size(vis)),
true_fn=lambda: vis,
false_fn=lambda: default_vis)
vis = tf.math.logical_or(
tf.math.equal(vis, Visibility.NOT_VISIBLE.value),
tf.math.equal(vis, Visibility.VISIBLE.value))
vis = tf.reshape(vis, [-1, self._num_keypoints])
return vis
def _reshape_instance_masks(self, keys_to_tensors): def _reshape_instance_masks(self, keys_to_tensors):
"""Reshape instance segmentation masks. """Reshape instance segmentation masks.
...@@ -460,6 +583,26 @@ class TfExampleDecoder(data_decoder.DataDecoder): ...@@ -460,6 +583,26 @@ class TfExampleDecoder(data_decoder.DataDecoder):
tf.cast(tf.greater(masks, 0.0), dtype=tf.float32), to_shape) tf.cast(tf.greater(masks, 0.0), dtype=tf.float32), to_shape)
return tf.cast(masks, tf.float32) return tf.cast(masks, tf.float32)
def _reshape_context_features(self, keys_to_tensors):
"""Reshape context features.
The instance context_features are reshaped to
[num_context_features, context_feature_length]
Args:
keys_to_tensors: a dictionary from keys to tensors.
Returns:
A 2-D float tensor of shape [num_context_features, context_feature_length]
"""
context_feature_length = keys_to_tensors['image/context_feature_length']
to_shape = tf.cast(tf.stack([-1, context_feature_length]), tf.int32)
context_features = keys_to_tensors['image/context_features']
if isinstance(context_features, tf.SparseTensor):
context_features = tf.sparse_tensor_to_dense(context_features)
context_features = tf.reshape(context_features, to_shape)
return context_features
def _decode_png_instance_masks(self, keys_to_tensors): def _decode_png_instance_masks(self, keys_to_tensors):
"""Decode PNG instance segmentation masks and stack into dense tensor. """Decode PNG instance segmentation masks and stack into dense tensor.
......
...@@ -24,8 +24,6 @@ from object_detection.data_decoders import tf_example_decoder ...@@ -24,8 +24,6 @@ from object_detection.data_decoders import tf_example_decoder
from object_detection.protos import input_reader_pb2 from object_detection.protos import input_reader_pb2
from object_detection.utils import dataset_util from object_detection.utils import dataset_util
slim_example_decoder = tf.contrib.slim.tfexample_decoder
class TfExampleDecoderTest(tf.test.TestCase): class TfExampleDecoderTest(tf.test.TestCase):
...@@ -265,6 +263,68 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -265,6 +263,68 @@ class TfExampleDecoderTest(tf.test.TestCase):
tensor_dict[fields.InputDataFields.groundtruth_boxes]) tensor_dict[fields.InputDataFields.groundtruth_boxes])
def testDecodeKeypoint(self): def testDecodeKeypoint(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
bbox_ymins = [0.0, 4.0]
bbox_xmins = [1.0, 5.0]
bbox_ymaxs = [2.0, 6.0]
bbox_xmaxs = [3.0, 7.0]
keypoint_ys = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]
keypoint_xs = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
keypoint_visibility = [1, 2, 0, 1, 0, 2]
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded':
dataset_util.bytes_feature(encoded_jpeg),
'image/format':
dataset_util.bytes_feature(six.b('jpeg')),
'image/object/bbox/ymin':
dataset_util.float_list_feature(bbox_ymins),
'image/object/bbox/xmin':
dataset_util.float_list_feature(bbox_xmins),
'image/object/bbox/ymax':
dataset_util.float_list_feature(bbox_ymaxs),
'image/object/bbox/xmax':
dataset_util.float_list_feature(bbox_xmaxs),
'image/object/keypoint/y':
dataset_util.float_list_feature(keypoint_ys),
'image/object/keypoint/x':
dataset_util.float_list_feature(keypoint_xs),
'image/object/keypoint/visibility':
dataset_util.int64_list_feature(keypoint_visibility),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder(num_keypoints=3)
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes]
.get_shape().as_list()), [None, 4])
self.assertAllEqual(
(tensor_dict[fields.InputDataFields.groundtruth_keypoints].get_shape()
.as_list()), [2, 3, 2])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
expected_boxes = np.vstack([bbox_ymins, bbox_xmins, bbox_ymaxs,
bbox_xmaxs]).transpose()
self.assertAllEqual(expected_boxes,
tensor_dict[fields.InputDataFields.groundtruth_boxes])
expected_keypoints = [
[[0.0, 1.0], [1.0, 2.0], [np.nan, np.nan]],
[[3.0, 4.0], [np.nan, np.nan], [5.0, 6.0]]]
self.assertAllClose(
expected_keypoints,
tensor_dict[fields.InputDataFields.groundtruth_keypoints])
expected_visibility = (
(np.array(keypoint_visibility) > 0).reshape((2, 3)))
self.assertAllEqual(
expected_visibility,
tensor_dict[fields.InputDataFields.groundtruth_keypoint_visibilities])
def testDecodeKeypointNoVisibilities(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor) encoded_jpeg = self._EncodeImage(image_tensor)
bbox_ymins = [0.0, 4.0] bbox_ymins = [0.0, 4.0]
...@@ -316,6 +376,11 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -316,6 +376,11 @@ class TfExampleDecoderTest(tf.test.TestCase):
expected_keypoints, expected_keypoints,
tensor_dict[fields.InputDataFields.groundtruth_keypoints]) tensor_dict[fields.InputDataFields.groundtruth_keypoints])
expected_visibility = np.ones((2, 3))
self.assertAllEqual(
expected_visibility,
tensor_dict[fields.InputDataFields.groundtruth_keypoint_visibilities])
def testDecodeDefaultGroundtruthWeights(self): def testDecodeDefaultGroundtruthWeights(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8) image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor) encoded_jpeg = self._EncodeImage(image_tensor)
...@@ -841,6 +906,34 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -841,6 +906,34 @@ class TfExampleDecoderTest(tf.test.TestCase):
self.assertAllEqual(object_weights, self.assertAllEqual(object_weights,
tensor_dict[fields.InputDataFields.groundtruth_weights]) tensor_dict[fields.InputDataFields.groundtruth_weights])
def testDecodeClassConfidence(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
class_confidence = [0.0, 1.0, 0.0]
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded':
dataset_util.bytes_feature(encoded_jpeg),
'image/format':
dataset_util.bytes_feature(six.b('jpeg')),
'image/class/confidence':
dataset_util.float_list_feature(class_confidence),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual(
(tensor_dict[fields.InputDataFields.groundtruth_image_confidences]
.get_shape().as_list()), [3])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(
class_confidence,
tensor_dict[fields.InputDataFields.groundtruth_image_confidences])
def testDecodeInstanceSegmentation(self): def testDecodeInstanceSegmentation(self):
num_instances = 4 num_instances = 4
image_height = 5 image_height = 5
...@@ -992,6 +1085,87 @@ class TfExampleDecoderTest(tf.test.TestCase): ...@@ -992,6 +1085,87 @@ class TfExampleDecoderTest(tf.test.TestCase):
tensor_dict[fields.InputDataFields.groundtruth_image_classes], tensor_dict[fields.InputDataFields.groundtruth_image_classes],
np.array([1, 3])) np.array([1, 3]))
def testDecodeContextFeatures(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
bbox_ymins = [0.0, 4.0]
bbox_xmins = [1.0, 5.0]
bbox_ymaxs = [2.0, 6.0]
bbox_xmaxs = [3.0, 7.0]
num_features = 8
context_feature_length = 10
context_features = np.random.random(num_features*context_feature_length)
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded':
dataset_util.bytes_feature(encoded_jpeg),
'image/format':
dataset_util.bytes_feature(six.b('jpeg')),
'image/context_features':
dataset_util.float_list_feature(context_features),
'image/context_feature_length':
dataset_util.int64_feature(context_feature_length),
'image/object/bbox/ymin':
dataset_util.float_list_feature(bbox_ymins),
'image/object/bbox/xmin':
dataset_util.float_list_feature(bbox_xmins),
'image/object/bbox/ymax':
dataset_util.float_list_feature(bbox_ymaxs),
'image/object/bbox/xmax':
dataset_util.float_list_feature(bbox_xmaxs),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder(
load_context_features=True)
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllClose(
context_features.reshape(num_features, context_feature_length),
tensor_dict[fields.InputDataFields.context_features])
self.assertAllEqual(
context_feature_length,
tensor_dict[fields.InputDataFields.context_feature_length])
def testContextFeaturesNotAvailableByDefault(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
bbox_ymins = [0.0, 4.0]
bbox_xmins = [1.0, 5.0]
bbox_ymaxs = [2.0, 6.0]
bbox_xmaxs = [3.0, 7.0]
num_features = 10
context_feature_length = 10
context_features = np.random.random(num_features*context_feature_length)
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded':
dataset_util.bytes_feature(encoded_jpeg),
'image/format':
dataset_util.bytes_feature(six.b('jpeg')),
'image/context_features':
dataset_util.float_list_feature(context_features),
'image/context_feature_length':
dataset_util.int64_feature(context_feature_length),
'image/object/bbox/ymin':
dataset_util.float_list_feature(bbox_ymins),
'image/object/bbox/xmin':
dataset_util.float_list_feature(bbox_xmins),
'image/object/bbox/ymax':
dataset_util.float_list_feature(bbox_ymaxs),
'image/object/bbox/xmax':
dataset_util.float_list_feature(bbox_xmaxs),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertNotIn(fields.InputDataFields.context_features,
tensor_dict)
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
r"""Convert raw COCO dataset to TFRecord for object_detection. r"""Convert raw COCO dataset to TFRecord for object_detection.
Please note that this tool creates sharded output files. Please note that this tool creates sharded output files.
...@@ -34,6 +33,7 @@ from __future__ import print_function ...@@ -34,6 +33,7 @@ from __future__ import print_function
import hashlib import hashlib
import io import io
import json import json
import logging
import os import os
import contextlib2 import contextlib2
import numpy as np import numpy as np
...@@ -46,57 +46,69 @@ from object_detection.dataset_tools import tf_record_creation_util ...@@ -46,57 +46,69 @@ from object_detection.dataset_tools import tf_record_creation_util
from object_detection.utils import dataset_util from object_detection.utils import dataset_util
from object_detection.utils import label_map_util from object_detection.utils import label_map_util
flags = tf.app.flags flags = tf.app.flags
tf.flags.DEFINE_boolean('include_masks', False, tf.flags.DEFINE_boolean(
'Whether to include instance segmentations masks ' 'include_masks', False, 'Whether to include instance segmentations masks '
'(PNG encoded) in the result. default: False.') '(PNG encoded) in the result. default: False.')
tf.flags.DEFINE_string('train_image_dir', '', tf.flags.DEFINE_string('train_image_dir', '', 'Training image directory.')
'Training image directory.') tf.flags.DEFINE_string('val_image_dir', '', 'Validation image directory.')
tf.flags.DEFINE_string('val_image_dir', '', tf.flags.DEFINE_string('test_image_dir', '', 'Test image directory.')
'Validation image directory.')
tf.flags.DEFINE_string('test_image_dir', '',
'Test image directory.')
tf.flags.DEFINE_string('train_annotations_file', '', tf.flags.DEFINE_string('train_annotations_file', '',
'Training annotations JSON file.') 'Training annotations JSON file.')
tf.flags.DEFINE_string('val_annotations_file', '', tf.flags.DEFINE_string('val_annotations_file', '',
'Validation annotations JSON file.') 'Validation annotations JSON file.')
tf.flags.DEFINE_string('testdev_annotations_file', '', tf.flags.DEFINE_string('testdev_annotations_file', '',
'Test-dev annotations JSON file.') 'Test-dev annotations JSON file.')
tf.flags.DEFINE_string('train_keypoint_annotations_file', '',
'Training annotations JSON file.')
tf.flags.DEFINE_string('val_keypoint_annotations_file', '',
'Validation annotations JSON file.')
tf.flags.DEFINE_string('output_dir', '/tmp/', 'Output data directory.') tf.flags.DEFINE_string('output_dir', '/tmp/', 'Output data directory.')
FLAGS = flags.FLAGS FLAGS = flags.FLAGS
tf.logging.set_verbosity(tf.logging.INFO) logger = tf.get_logger()
logger.setLevel(logging.INFO)
_COCO_KEYPOINT_NAMES = [
b'nose', b'left_eye', b'right_eye', b'left_ear', b'right_ear',
b'left_shoulder', b'right_shoulder', b'left_elbow', b'right_elbow',
b'left_wrist', b'right_wrist', b'left_hip', b'right_hip',
b'left_knee', b'right_knee', b'left_ankle', b'right_ankle'
]
def create_tf_example(image, def create_tf_example(image,
annotations_list, annotations_list,
image_dir, image_dir,
category_index, category_index,
include_masks=False): include_masks=False,
keypoint_annotations_dict=None):
"""Converts image and annotations to a tf.Example proto. """Converts image and annotations to a tf.Example proto.
Args: Args:
image: dict with keys: image: dict with keys: [u'license', u'file_name', u'coco_url', u'height',
[u'license', u'file_name', u'coco_url', u'height', u'width', u'width', u'date_captured', u'flickr_url', u'id']
u'date_captured', u'flickr_url', u'id']
annotations_list: annotations_list:
list of dicts with keys: list of dicts with keys: [u'segmentation', u'area', u'iscrowd',
[u'segmentation', u'area', u'iscrowd', u'image_id', u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box
u'bbox', u'category_id', u'id'] coordinates in the official COCO dataset are given as [x, y, width,
Notice that bounding box coordinates in the official COCO dataset are height] tuples using absolute coordinates where x, y represent the
given as [x, y, width, height] tuples using absolute coordinates where top-left (0-indexed) corner. This function converts to the format
x, y represent the top-left (0-indexed) corner. This function converts expected by the Tensorflow Object Detection API (which is which is
to the format expected by the Tensorflow Object Detection API (which is [ymin, xmin, ymax, xmax] with coordinates normalized relative to image
which is [ymin, xmin, ymax, xmax] with coordinates normalized relative size).
to image size).
image_dir: directory containing the image files. image_dir: directory containing the image files.
category_index: a dict containing COCO category information keyed category_index: a dict containing COCO category information keyed by the
by the 'id' field of each category. See the 'id' field of each category. See the label_map_util.create_category_index
label_map_util.create_category_index function. function.
include_masks: Whether to include instance segmentations masks include_masks: Whether to include instance segmentations masks
(PNG encoded) in the result. default: False. (PNG encoded) in the result. default: False.
keypoint_annotations_dict: A dictionary that maps from annotation_id to a
dictionary with keys: [u'keypoints', u'num_keypoints'] represeting the
keypoint information for this person object annotation. If None, then
no keypoint annotations will be populated.
Returns: Returns:
example: The converted tf.Example example: The converted tf.Example
num_annotations_skipped: Number of (invalid) annotations that were ignored. num_annotations_skipped: Number of (invalid) annotations that were ignored.
...@@ -125,7 +137,15 @@ def create_tf_example(image, ...@@ -125,7 +137,15 @@ def create_tf_example(image,
category_ids = [] category_ids = []
area = [] area = []
encoded_mask_png = [] encoded_mask_png = []
keypoints_x = []
keypoints_y = []
keypoints_visibility = []
keypoints_name = []
num_keypoints = []
include_keypoint = keypoint_annotations_dict is not None
num_annotations_skipped = 0 num_annotations_skipped = 0
num_keypoint_annotation_used = 0
num_keypoint_annotation_skipped = 0
for object_annotations in annotations_list: for object_annotations in annotations_list:
(x, y, width, height) = tuple(object_annotations['bbox']) (x, y, width, height) = tuple(object_annotations['bbox'])
if width <= 0 or height <= 0: if width <= 0 or height <= 0:
...@@ -154,6 +174,29 @@ def create_tf_example(image, ...@@ -154,6 +174,29 @@ def create_tf_example(image,
output_io = io.BytesIO() output_io = io.BytesIO()
pil_image.save(output_io, format='PNG') pil_image.save(output_io, format='PNG')
encoded_mask_png.append(output_io.getvalue()) encoded_mask_png.append(output_io.getvalue())
if include_keypoint:
annotation_id = object_annotations['id']
if annotation_id in keypoint_annotations_dict:
num_keypoint_annotation_used += 1
keypoint_annotations = keypoint_annotations_dict[annotation_id]
keypoints = keypoint_annotations['keypoints']
num_kpts = keypoint_annotations['num_keypoints']
keypoints_x_abs = keypoints[::3]
keypoints_x.extend(
[float(x_abs) / image_width for x_abs in keypoints_x_abs])
keypoints_y_abs = keypoints[1::3]
keypoints_y.extend(
[float(y_abs) / image_height for y_abs in keypoints_y_abs])
keypoints_visibility.extend(keypoints[2::3])
keypoints_name.extend(_COCO_KEYPOINT_NAMES)
num_keypoints.append(num_kpts)
else:
keypoints_x.extend([0.0] * len(_COCO_KEYPOINT_NAMES))
keypoints_y.extend([0.0] * len(_COCO_KEYPOINT_NAMES))
keypoints_visibility.extend([0] * len(_COCO_KEYPOINT_NAMES))
keypoints_name.extend(_COCO_KEYPOINT_NAMES)
num_keypoints.append(0)
feature_dict = { feature_dict = {
'image/height': 'image/height':
dataset_util.int64_feature(image_height), dataset_util.int64_feature(image_height),
...@@ -187,12 +230,28 @@ def create_tf_example(image, ...@@ -187,12 +230,28 @@ def create_tf_example(image,
if include_masks: if include_masks:
feature_dict['image/object/mask'] = ( feature_dict['image/object/mask'] = (
dataset_util.bytes_list_feature(encoded_mask_png)) dataset_util.bytes_list_feature(encoded_mask_png))
if include_keypoint:
feature_dict['image/object/keypoint/x'] = (
dataset_util.float_list_feature(keypoints_x))
feature_dict['image/object/keypoint/y'] = (
dataset_util.float_list_feature(keypoints_y))
feature_dict['image/object/keypoint/num'] = (
dataset_util.int64_list_feature(num_keypoints))
feature_dict['image/object/keypoint/visibility'] = (
dataset_util.int64_list_feature(keypoints_visibility))
feature_dict['image/object/keypoint/text'] = (
dataset_util.bytes_list_feature(keypoints_name))
num_keypoint_annotation_skipped = (
len(keypoint_annotations_dict) - num_keypoint_annotation_used)
example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
return key, example, num_annotations_skipped return key, example, num_annotations_skipped, num_keypoint_annotation_skipped
def _create_tf_record_from_coco_annotations( def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
annotations_file, image_dir, output_path, include_masks, num_shards): output_path, include_masks,
num_shards,
keypoint_annotations_file=''):
"""Loads COCO annotation json files and converts to tf.Record format. """Loads COCO annotation json files and converts to tf.Record format.
Args: Args:
...@@ -202,6 +261,9 @@ def _create_tf_record_from_coco_annotations( ...@@ -202,6 +261,9 @@ def _create_tf_record_from_coco_annotations(
include_masks: Whether to include instance segmentations masks include_masks: Whether to include instance segmentations masks
(PNG encoded) in the result. default: False. (PNG encoded) in the result. default: False.
num_shards: number of output file shards. num_shards: number of output file shards.
keypoint_annotations_file: JSON file containing the person keypoint
annotations. If empty, then no person keypoint annotations will be
generated.
""" """
with contextlib2.ExitStack() as tf_record_close_stack, \ with contextlib2.ExitStack() as tf_record_close_stack, \
tf.gfile.GFile(annotations_file, 'r') as fid: tf.gfile.GFile(annotations_file, 'r') as fid:
...@@ -214,8 +276,7 @@ def _create_tf_record_from_coco_annotations( ...@@ -214,8 +276,7 @@ def _create_tf_record_from_coco_annotations(
annotations_index = {} annotations_index = {}
if 'annotations' in groundtruth_data: if 'annotations' in groundtruth_data:
tf.logging.info( logging.info('Found groundtruth annotations. Building annotations index.')
'Found groundtruth annotations. Building annotations index.')
for annotation in groundtruth_data['annotations']: for annotation in groundtruth_data['annotations']:
image_id = annotation['image_id'] image_id = annotation['image_id']
if image_id not in annotations_index: if image_id not in annotations_index:
...@@ -227,21 +288,43 @@ def _create_tf_record_from_coco_annotations( ...@@ -227,21 +288,43 @@ def _create_tf_record_from_coco_annotations(
if image_id not in annotations_index: if image_id not in annotations_index:
missing_annotation_count += 1 missing_annotation_count += 1
annotations_index[image_id] = [] annotations_index[image_id] = []
tf.logging.info('%d images are missing annotations.', logging.info('%d images are missing annotations.', missing_annotation_count)
missing_annotation_count)
keypoint_annotations_index = {}
if keypoint_annotations_file:
with tf.gfile.GFile(keypoint_annotations_file, 'r') as kid:
keypoint_groundtruth_data = json.load(kid)
if 'annotations' in keypoint_groundtruth_data:
for annotation in keypoint_groundtruth_data['annotations']:
image_id = annotation['image_id']
if image_id not in keypoint_annotations_index:
keypoint_annotations_index[image_id] = {}
keypoint_annotations_index[image_id][annotation['id']] = annotation
total_num_annotations_skipped = 0 total_num_annotations_skipped = 0
total_num_keypoint_annotations_skipped = 0
for idx, image in enumerate(images): for idx, image in enumerate(images):
if idx % 100 == 0: if idx % 100 == 0:
tf.logging.info('On image %d of %d', idx, len(images)) logging.info('On image %d of %d', idx, len(images))
annotations_list = annotations_index[image['id']] annotations_list = annotations_index[image['id']]
_, tf_example, num_annotations_skipped = create_tf_example( keypoint_annotations_dict = None
image, annotations_list, image_dir, category_index, include_masks) if keypoint_annotations_file:
keypoint_annotations_dict = {}
if image['id'] in keypoint_annotations_index:
keypoint_annotations_dict = keypoint_annotations_index[image['id']]
(_, tf_example, num_annotations_skipped,
num_keypoint_annotations_skipped) = create_tf_example(
image, annotations_list, image_dir, category_index, include_masks,
keypoint_annotations_dict)
total_num_annotations_skipped += num_annotations_skipped total_num_annotations_skipped += num_annotations_skipped
total_num_keypoint_annotations_skipped += num_keypoint_annotations_skipped
shard_idx = idx % num_shards shard_idx = idx % num_shards
output_tfrecords[shard_idx].write(tf_example.SerializeToString()) output_tfrecords[shard_idx].write(tf_example.SerializeToString())
tf.logging.info('Finished writing, skipped %d annotations.', logging.info('Finished writing, skipped %d annotations.',
total_num_annotations_skipped) total_num_annotations_skipped)
if keypoint_annotations_file:
logging.info('Finished writing, skipped %d keypoint annotations.',
total_num_keypoint_annotations_skipped)
def main(_): def main(_):
...@@ -263,13 +346,15 @@ def main(_): ...@@ -263,13 +346,15 @@ def main(_):
FLAGS.train_image_dir, FLAGS.train_image_dir,
train_output_path, train_output_path,
FLAGS.include_masks, FLAGS.include_masks,
num_shards=100) num_shards=100,
keypoint_annotations_file=FLAGS.train_keypoint_annotations_file)
_create_tf_record_from_coco_annotations( _create_tf_record_from_coco_annotations(
FLAGS.val_annotations_file, FLAGS.val_annotations_file,
FLAGS.val_image_dir, FLAGS.val_image_dir,
val_output_path, val_output_path,
FLAGS.include_masks, FLAGS.include_masks,
num_shards=10) num_shards=100,
keypoint_annotations_file=FLAGS.val_keypoint_annotations_file)
_create_tf_record_from_coco_annotations( _create_tf_record_from_coco_annotations(
FLAGS.testdev_annotations_file, FLAGS.testdev_annotations_file,
FLAGS.test_image_dir, FLAGS.test_image_dir,
......
...@@ -20,6 +20,7 @@ import os ...@@ -20,6 +20,7 @@ import os
import numpy as np import numpy as np
import PIL.Image import PIL.Image
import six
import tensorflow as tf import tensorflow as tf
from object_detection.dataset_tools import create_coco_tf_record from object_detection.dataset_tools import create_coco_tf_record
...@@ -37,6 +38,16 @@ class CreateCocoTFRecordTest(tf.test.TestCase): ...@@ -37,6 +38,16 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
proto_list = [p for p in proto_field] proto_list = [p for p in proto_field]
self.assertListEqual(proto_list, expectation) self.assertListEqual(proto_list, expectation)
def _assertProtoClose(self, proto_field, expectation):
"""Helper function to assert if a proto field nearly equals some value.
Args:
proto_field: The protobuf field to compare.
expectation: The expected value of the protobuf field.
"""
proto_list = [p for p in proto_field]
self.assertAllClose(proto_list, expectation)
def test_create_tf_example(self): def test_create_tf_example(self):
image_file_name = 'tmp_image.jpg' image_file_name = 'tmp_image.jpg'
image_data = np.random.rand(256, 256, 3) image_data = np.random.rand(256, 256, 3)
...@@ -78,7 +89,7 @@ class CreateCocoTFRecordTest(tf.test.TestCase): ...@@ -78,7 +89,7 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
} }
(_, example, (_, example,
num_annotations_skipped) = create_coco_tf_record.create_tf_example( num_annotations_skipped, _) = create_coco_tf_record.create_tf_example(
image, annotations_list, image_dir, category_index) image, annotations_list, image_dir, category_index)
self.assertEqual(num_annotations_skipped, 0) self.assertEqual(num_annotations_skipped, 0)
...@@ -88,12 +99,13 @@ class CreateCocoTFRecordTest(tf.test.TestCase): ...@@ -88,12 +99,13 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
example.features.feature['image/width'].int64_list.value, [256]) example.features.feature['image/width'].int64_list.value, [256])
self._assertProtoEqual( self._assertProtoEqual(
example.features.feature['image/filename'].bytes_list.value, example.features.feature['image/filename'].bytes_list.value,
[image_file_name]) [six.b(image_file_name)])
self._assertProtoEqual( self._assertProtoEqual(
example.features.feature['image/source_id'].bytes_list.value, example.features.feature['image/source_id'].bytes_list.value,
[str(image['id'])]) [six.b(str(image['id']))])
self._assertProtoEqual( self._assertProtoEqual(
example.features.feature['image/format'].bytes_list.value, ['jpeg']) example.features.feature['image/format'].bytes_list.value,
[six.b('jpeg')])
self._assertProtoEqual( self._assertProtoEqual(
example.features.feature['image/object/bbox/xmin'].float_list.value, example.features.feature['image/object/bbox/xmin'].float_list.value,
[0.25]) [0.25])
...@@ -108,7 +120,7 @@ class CreateCocoTFRecordTest(tf.test.TestCase): ...@@ -108,7 +120,7 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
[0.75]) [0.75])
self._assertProtoEqual( self._assertProtoEqual(
example.features.feature['image/object/class/text'].bytes_list.value, example.features.feature['image/object/class/text'].bytes_list.value,
['cat']) [six.b('cat')])
def test_create_tf_example_with_instance_masks(self): def test_create_tf_example_with_instance_masks(self):
image_file_name = 'tmp_image.jpg' image_file_name = 'tmp_image.jpg'
...@@ -144,7 +156,7 @@ class CreateCocoTFRecordTest(tf.test.TestCase): ...@@ -144,7 +156,7 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
} }
(_, example, (_, example,
num_annotations_skipped) = create_coco_tf_record.create_tf_example( num_annotations_skipped, _) = create_coco_tf_record.create_tf_example(
image, annotations_list, image_dir, category_index, include_masks=True) image, annotations_list, image_dir, category_index, include_masks=True)
self.assertEqual(num_annotations_skipped, 0) self.assertEqual(num_annotations_skipped, 0)
...@@ -154,12 +166,13 @@ class CreateCocoTFRecordTest(tf.test.TestCase): ...@@ -154,12 +166,13 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
example.features.feature['image/width'].int64_list.value, [8]) example.features.feature['image/width'].int64_list.value, [8])
self._assertProtoEqual( self._assertProtoEqual(
example.features.feature['image/filename'].bytes_list.value, example.features.feature['image/filename'].bytes_list.value,
[image_file_name]) [six.b(image_file_name)])
self._assertProtoEqual( self._assertProtoEqual(
example.features.feature['image/source_id'].bytes_list.value, example.features.feature['image/source_id'].bytes_list.value,
[str(image['id'])]) [six.b(str(image['id']))])
self._assertProtoEqual( self._assertProtoEqual(
example.features.feature['image/format'].bytes_list.value, ['jpeg']) example.features.feature['image/format'].bytes_list.value,
[six.b('jpeg')])
self._assertProtoEqual( self._assertProtoEqual(
example.features.feature['image/object/bbox/xmin'].float_list.value, example.features.feature['image/object/bbox/xmin'].float_list.value,
[0]) [0])
...@@ -174,7 +187,7 @@ class CreateCocoTFRecordTest(tf.test.TestCase): ...@@ -174,7 +187,7 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
[1]) [1])
self._assertProtoEqual( self._assertProtoEqual(
example.features.feature['image/object/class/text'].bytes_list.value, example.features.feature['image/object/class/text'].bytes_list.value,
['dog']) [six.b('dog')])
encoded_mask_pngs = [ encoded_mask_pngs = [
io.BytesIO(encoded_masks) for encoded_masks in example.features.feature[ io.BytesIO(encoded_masks) for encoded_masks in example.features.feature[
'image/object/mask'].bytes_list.value 'image/object/mask'].bytes_list.value
...@@ -183,13 +196,120 @@ class CreateCocoTFRecordTest(tf.test.TestCase): ...@@ -183,13 +196,120 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
np.array(PIL.Image.open(encoded_mask_png)) np.array(PIL.Image.open(encoded_mask_png))
for encoded_mask_png in encoded_mask_pngs for encoded_mask_png in encoded_mask_pngs
] ]
self.assertTrue(len(pil_masks) == 1) self.assertEqual(len(pil_masks), 1)
self.assertAllEqual(pil_masks[0], self.assertAllEqual(pil_masks[0],
[[1, 1, 1, 0, 0, 0, 0, 0], [1, 1, 0, 0, 0, 0, 0, 0], [[1, 1, 1, 0, 0, 0, 0, 0], [1, 1, 0, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0, 1, 1], [0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0, 1, 1],
[0, 0, 0, 0, 0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 1, 1]]) [0, 0, 0, 0, 0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 1, 1]])
def test_create_tf_example_with_keypoints(self):
image_dir = self.get_temp_dir()
image_file_name = 'tmp_image.jpg'
image_data = np.random.randint(low=0, high=256, size=(256, 256, 3)).astype(
np.uint8)
save_path = os.path.join(image_dir, image_file_name)
image = PIL.Image.fromarray(image_data, 'RGB')
image.save(save_path)
image = {
'file_name': image_file_name,
'height': 256,
'width': 256,
'id': 11,
}
min_x, min_y = 64, 64
max_x, max_y = 128, 128
keypoints = []
num_visible_keypoints = 0
xv = []
yv = []
vv = []
for _ in range(17):
xc = min_x + int(np.random.rand()*(max_x - min_x))
yc = min_y + int(np.random.rand()*(max_y - min_y))
vis = np.random.randint(0, 3)
xv.append(xc)
yv.append(yc)
vv.append(vis)
keypoints.extend([xc, yc, vis])
num_visible_keypoints += (vis > 0)
annotations_list = [{
'area': 0.5,
'iscrowd': False,
'image_id': 11,
'bbox': [64, 64, 128, 128],
'category_id': 1,
'id': 1000
}]
keypoint_annotations_dict = {
1000: {
'keypoints': keypoints,
'num_keypoints': num_visible_keypoints
}
}
category_index = {
1: {
'name': 'person',
'id': 1
}
}
(_, example, _,
num_keypoint_annotation_skipped) = create_coco_tf_record.create_tf_example(
image,
annotations_list,
image_dir,
category_index,
include_masks=False,
keypoint_annotations_dict=keypoint_annotations_dict)
self.assertEqual(num_keypoint_annotation_skipped, 0)
self._assertProtoEqual(
example.features.feature['image/height'].int64_list.value, [256])
self._assertProtoEqual(
example.features.feature['image/width'].int64_list.value, [256])
self._assertProtoEqual(
example.features.feature['image/filename'].bytes_list.value,
[six.b(image_file_name)])
self._assertProtoEqual(
example.features.feature['image/source_id'].bytes_list.value,
[six.b(str(image['id']))])
self._assertProtoEqual(
example.features.feature['image/format'].bytes_list.value,
[six.b('jpeg')])
self._assertProtoEqual(
example.features.feature['image/object/bbox/xmin'].float_list.value,
[0.25])
self._assertProtoEqual(
example.features.feature['image/object/bbox/ymin'].float_list.value,
[0.25])
self._assertProtoEqual(
example.features.feature['image/object/bbox/xmax'].float_list.value,
[0.75])
self._assertProtoEqual(
example.features.feature['image/object/bbox/ymax'].float_list.value,
[0.75])
self._assertProtoEqual(
example.features.feature['image/object/class/text'].bytes_list.value,
[six.b('person')])
self._assertProtoClose(
example.features.feature['image/object/keypoint/x'].float_list.value,
np.array(xv, dtype=np.float32) / 256)
self._assertProtoClose(
example.features.feature['image/object/keypoint/y'].float_list.value,
np.array(yv, dtype=np.float32) / 256)
self._assertProtoEqual(
example.features.feature['image/object/keypoint/text'].bytes_list.value,
create_coco_tf_record._COCO_KEYPOINT_NAMES)
self._assertProtoEqual(
example.features.feature[
'image/object/keypoint/visibility'].int64_list.value, vv)
def test_create_sharded_tf_record(self): def test_create_sharded_tf_record(self):
tmp_dir = self.get_temp_dir() tmp_dir = self.get_temp_dir()
image_paths = ['tmp1_image.jpg', 'tmp2_image.jpg'] image_paths = ['tmp1_image.jpg', 'tmp2_image.jpg']
......
...@@ -19,6 +19,7 @@ import os ...@@ -19,6 +19,7 @@ import os
import numpy as np import numpy as np
import PIL.Image import PIL.Image
import six
import tensorflow as tf import tensorflow as tf
from object_detection.dataset_tools import create_kitti_tf_record from object_detection.dataset_tools import create_kitti_tf_record
...@@ -75,12 +76,13 @@ class CreateKittiTFRecordTest(tf.test.TestCase): ...@@ -75,12 +76,13 @@ class CreateKittiTFRecordTest(tf.test.TestCase):
example.features.feature['image/width'].int64_list.value, [256]) example.features.feature['image/width'].int64_list.value, [256])
self._assertProtoEqual( self._assertProtoEqual(
example.features.feature['image/filename'].bytes_list.value, example.features.feature['image/filename'].bytes_list.value,
[save_path]) [six.b(save_path)])
self._assertProtoEqual( self._assertProtoEqual(
example.features.feature['image/source_id'].bytes_list.value, example.features.feature['image/source_id'].bytes_list.value,
[save_path]) [six.b(save_path)])
self._assertProtoEqual( self._assertProtoEqual(
example.features.feature['image/format'].bytes_list.value, ['png']) example.features.feature['image/format'].bytes_list.value,
[six.b('png')])
self._assertProtoEqual( self._assertProtoEqual(
example.features.feature['image/object/bbox/xmin'].float_list.value, example.features.feature['image/object/bbox/xmin'].float_list.value,
[0.25]) [0.25])
...@@ -95,7 +97,7 @@ class CreateKittiTFRecordTest(tf.test.TestCase): ...@@ -95,7 +97,7 @@ class CreateKittiTFRecordTest(tf.test.TestCase):
[0.75]) [0.75])
self._assertProtoEqual( self._assertProtoEqual(
example.features.feature['image/object/class/text'].bytes_list.value, example.features.feature['image/object/class/text'].bytes_list.value,
['car']) [six.b('car')])
self._assertProtoEqual( self._assertProtoEqual(
example.features.feature['image/object/class/label'].int64_list.value, example.features.feature['image/object/class/label'].int64_list.value,
[1]) [1])
......
...@@ -19,6 +19,7 @@ import os ...@@ -19,6 +19,7 @@ import os
import numpy as np import numpy as np
import PIL.Image import PIL.Image
import six
import tensorflow as tf import tensorflow as tf
from object_detection.dataset_tools import create_pascal_tf_record from object_detection.dataset_tools import create_pascal_tf_record
...@@ -80,12 +81,13 @@ class CreatePascalTFRecordTest(tf.test.TestCase): ...@@ -80,12 +81,13 @@ class CreatePascalTFRecordTest(tf.test.TestCase):
example.features.feature['image/width'].int64_list.value, [256]) example.features.feature['image/width'].int64_list.value, [256])
self._assertProtoEqual( self._assertProtoEqual(
example.features.feature['image/filename'].bytes_list.value, example.features.feature['image/filename'].bytes_list.value,
[image_file_name]) [six.b(image_file_name)])
self._assertProtoEqual( self._assertProtoEqual(
example.features.feature['image/source_id'].bytes_list.value, example.features.feature['image/source_id'].bytes_list.value,
[image_file_name]) [six.b(image_file_name)])
self._assertProtoEqual( self._assertProtoEqual(
example.features.feature['image/format'].bytes_list.value, ['jpeg']) example.features.feature['image/format'].bytes_list.value,
[six.b('jpeg')])
self._assertProtoEqual( self._assertProtoEqual(
example.features.feature['image/object/bbox/xmin'].float_list.value, example.features.feature['image/object/bbox/xmin'].float_list.value,
[0.25]) [0.25])
...@@ -100,7 +102,7 @@ class CreatePascalTFRecordTest(tf.test.TestCase): ...@@ -100,7 +102,7 @@ class CreatePascalTFRecordTest(tf.test.TestCase):
[0.75]) [0.75])
self._assertProtoEqual( self._assertProtoEqual(
example.features.feature['image/object/class/text'].bytes_list.value, example.features.feature['image/object/class/text'].bytes_list.value,
['person']) [six.b('person')])
self._assertProtoEqual( self._assertProtoEqual(
example.features.feature['image/object/class/label'].int64_list.value, example.features.feature['image/object/class/label'].int64_list.value,
[1]) [1])
...@@ -111,7 +113,8 @@ class CreatePascalTFRecordTest(tf.test.TestCase): ...@@ -111,7 +113,8 @@ class CreatePascalTFRecordTest(tf.test.TestCase):
example.features.feature['image/object/truncated'].int64_list.value, example.features.feature['image/object/truncated'].int64_list.value,
[0]) [0])
self._assertProtoEqual( self._assertProtoEqual(
example.features.feature['image/object/view'].bytes_list.value, ['']) example.features.feature['image/object/view'].bytes_list.value,
[six.b('')])
if __name__ == '__main__': if __name__ == '__main__':
......
# Lint as: python2, python3
# Copyright 2017 The TensorFlow Authors. All Rights Reserved. # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
...@@ -32,12 +33,15 @@ oid_hierarchical_labels_expansion.py \ ...@@ -32,12 +33,15 @@ oid_hierarchical_labels_expansion.py \
--annotation_type=<1 (for boxes and segments) or 2 (for image-level labels)> --annotation_type=<1 (for boxes and segments) or 2 (for image-level labels)>
""" """
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function from __future__ import print_function
import copy import copy
import json import json
from absl import app from absl import app
from absl import flags from absl import flags
import six
flags.DEFINE_string( flags.DEFINE_string(
'json_hierarchy_file', None, 'json_hierarchy_file', None,
...@@ -136,7 +140,7 @@ class OIDHierarchicalLabelsExpansion(object): ...@@ -136,7 +140,7 @@ class OIDHierarchicalLabelsExpansion(object):
# Row header is expected to be the following for segments: # Row header is expected to be the following for segments:
# ImageID,LabelName,ImageWidth,ImageHeight,XMin,XMax,YMin,YMax, # ImageID,LabelName,ImageWidth,ImageHeight,XMin,XMax,YMin,YMax,
# IsGroupOf,Mask # IsGroupOf,Mask
split_csv_row = csv_row.split(',') split_csv_row = six.ensure_str(csv_row).split(',')
result = [csv_row] result = [csv_row]
assert split_csv_row[ assert split_csv_row[
labelname_column_index] in self._hierarchy_keyed_child labelname_column_index] in self._hierarchy_keyed_child
...@@ -165,7 +169,7 @@ class OIDHierarchicalLabelsExpansion(object): ...@@ -165,7 +169,7 @@ class OIDHierarchicalLabelsExpansion(object):
""" """
# Row header is expected to be exactly: # Row header is expected to be exactly:
# ImageID,Source,LabelName,Confidence # ImageID,Source,LabelName,Confidence
split_csv_row = csv_row.split(',') split_csv_row = six.ensure_str(csv_row).split(',')
result = [csv_row] result = [csv_row]
if int(split_csv_row[confidence_column_index]) == 1: if int(split_csv_row[confidence_column_index]) == 1:
assert split_csv_row[ assert split_csv_row[
......
...@@ -18,6 +18,7 @@ from __future__ import absolute_import ...@@ -18,6 +18,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import six
import tensorflow as tf import tensorflow as tf
from object_detection.core import standard_fields from object_detection.core import standard_fields
...@@ -61,18 +62,21 @@ def tf_example_from_annotations_data_frame(annotations_data_frame, label_map, ...@@ -61,18 +62,21 @@ def tf_example_from_annotations_data_frame(annotations_data_frame, label_map,
dataset_util.float_list_feature( dataset_util.float_list_feature(
filtered_data_frame_boxes.XMax.as_matrix()), filtered_data_frame_boxes.XMax.as_matrix()),
standard_fields.TfExampleFields.object_class_text: standard_fields.TfExampleFields.object_class_text:
dataset_util.bytes_list_feature( dataset_util.bytes_list_feature([
filtered_data_frame_boxes.LabelName.as_matrix()), six.ensure_binary(label_text)
for label_text in filtered_data_frame_boxes.LabelName.as_matrix()
]),
standard_fields.TfExampleFields.object_class_label: standard_fields.TfExampleFields.object_class_label:
dataset_util.int64_list_feature( dataset_util.int64_list_feature(
filtered_data_frame_boxes.LabelName.map(lambda x: label_map[x]) filtered_data_frame_boxes.LabelName.map(
.as_matrix()), lambda x: label_map[x]).as_matrix()),
standard_fields.TfExampleFields.filename: standard_fields.TfExampleFields.filename:
dataset_util.bytes_feature('{}.jpg'.format(image_id)), dataset_util.bytes_feature(
six.ensure_binary('{}.jpg'.format(image_id))),
standard_fields.TfExampleFields.source_id: standard_fields.TfExampleFields.source_id:
dataset_util.bytes_feature(image_id), dataset_util.bytes_feature(six.ensure_binary(image_id)),
standard_fields.TfExampleFields.image_encoded: standard_fields.TfExampleFields.image_encoded:
dataset_util.bytes_feature(encoded_image), dataset_util.bytes_feature(six.ensure_binary(encoded_image)),
} }
if 'IsGroupOf' in filtered_data_frame.columns: if 'IsGroupOf' in filtered_data_frame.columns:
...@@ -100,7 +104,9 @@ def tf_example_from_annotations_data_frame(annotations_data_frame, label_map, ...@@ -100,7 +104,9 @@ def tf_example_from_annotations_data_frame(annotations_data_frame, label_map,
image_class_label] = dataset_util.int64_list_feature( image_class_label] = dataset_util.int64_list_feature(
filtered_data_frame_labels.LabelName.map( filtered_data_frame_labels.LabelName.map(
lambda x: label_map[x]).as_matrix()) lambda x: label_map[x]).as_matrix())
feature_map[standard_fields.TfExampleFields. feature_map[standard_fields.TfExampleFields
image_class_text] = dataset_util.bytes_list_feature( .image_class_text] = dataset_util.bytes_list_feature([
filtered_data_frame_labels.LabelName.as_matrix()), six.ensure_binary(label_text) for label_text in
filtered_data_frame_labels.LabelName.as_matrix()
]),
return tf.train.Example(features=tf.train.Features(feature=feature_map)) return tf.train.Example(features=tf.train.Features(feature=feature_map))
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
"""Tests for oid_tfrecord_creation.py.""" """Tests for oid_tfrecord_creation.py."""
import pandas as pd import pandas as pd
import six
import tensorflow as tf import tensorflow as tf
from object_detection.dataset_tools import oid_tfrecord_creation from object_detection.dataset_tools import oid_tfrecord_creation
...@@ -46,8 +47,7 @@ class TfExampleFromAnnotationsDataFrameTests(tf.test.TestCase): ...@@ -46,8 +47,7 @@ class TfExampleFromAnnotationsDataFrameTests(tf.test.TestCase):
tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame( tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame(
df[df.ImageID == 'i1'], label_map, 'encoded_image_test') df[df.ImageID == 'i1'], label_map, 'encoded_image_test')
self.assertProtoEquals( self.assertProtoEquals(six.ensure_str("""
"""
features { features {
feature { feature {
key: "image/encoded" key: "image/encoded"
...@@ -94,7 +94,7 @@ class TfExampleFromAnnotationsDataFrameTests(tf.test.TestCase): ...@@ -94,7 +94,7 @@ class TfExampleFromAnnotationsDataFrameTests(tf.test.TestCase):
feature { feature {
key: "image/class/text" key: "image/class/text"
value { bytes_list { value: ["c"] } } } } value { bytes_list { value: ["c"] } } } }
""", tf_example) """), tf_example)
def test_no_attributes(self): def test_no_attributes(self):
label_map, df = create_test_data() label_map, df = create_test_data()
...@@ -107,7 +107,7 @@ class TfExampleFromAnnotationsDataFrameTests(tf.test.TestCase): ...@@ -107,7 +107,7 @@ class TfExampleFromAnnotationsDataFrameTests(tf.test.TestCase):
tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame( tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame(
df[df.ImageID == 'i2'], label_map, 'encoded_image_test') df[df.ImageID == 'i2'], label_map, 'encoded_image_test')
self.assertProtoEquals(""" self.assertProtoEquals(six.ensure_str("""
features { features {
feature { feature {
key: "image/encoded" key: "image/encoded"
...@@ -136,7 +136,7 @@ class TfExampleFromAnnotationsDataFrameTests(tf.test.TestCase): ...@@ -136,7 +136,7 @@ class TfExampleFromAnnotationsDataFrameTests(tf.test.TestCase):
feature { feature {
key: "image/source_id" key: "image/source_id"
value { bytes_list { value: "i2" } } } } value { bytes_list { value: "i2" } } } }
""", tf_example) """), tf_example)
def test_label_filtering(self): def test_label_filtering(self):
label_map, df = create_test_data() label_map, df = create_test_data()
...@@ -146,7 +146,7 @@ class TfExampleFromAnnotationsDataFrameTests(tf.test.TestCase): ...@@ -146,7 +146,7 @@ class TfExampleFromAnnotationsDataFrameTests(tf.test.TestCase):
tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame( tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame(
df[df.ImageID == 'i1'], label_map, 'encoded_image_test') df[df.ImageID == 'i1'], label_map, 'encoded_image_test')
self.assertProtoEquals( self.assertProtoEquals(
""" six.ensure_str("""
features { features {
feature { feature {
key: "image/encoded" key: "image/encoded"
...@@ -193,7 +193,7 @@ class TfExampleFromAnnotationsDataFrameTests(tf.test.TestCase): ...@@ -193,7 +193,7 @@ class TfExampleFromAnnotationsDataFrameTests(tf.test.TestCase):
feature { feature {
key: "image/class/text" key: "image/class/text"
value { bytes_list { } } } } value { bytes_list { } } } }
""", tf_example) """), tf_example)
if __name__ == '__main__': if __name__ == '__main__':
......
# Lint as: python2, python3
# Copyright 2018 The TensorFlow Authors. All Rights Reserved. # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
...@@ -18,6 +19,7 @@ from __future__ import absolute_import ...@@ -18,6 +19,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from six.moves import range
import tensorflow as tf import tensorflow as tf
......
# Lint as: python2, python3
# Copyright 2018 The TensorFlow Authors. All Rights Reserved. # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
...@@ -14,8 +15,14 @@ ...@@ -14,8 +15,14 @@
# ============================================================================== # ==============================================================================
"""Tests for tf_record_creation_util.py.""" """Tests for tf_record_creation_util.py."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os import os
import contextlib2 import contextlib2
import six
from six.moves import range
import tensorflow as tf import tensorflow as tf
from object_detection.dataset_tools import tf_record_creation_util from object_detection.dataset_tools import tf_record_creation_util
...@@ -29,7 +36,7 @@ class OpenOutputTfrecordsTests(tf.test.TestCase): ...@@ -29,7 +36,7 @@ class OpenOutputTfrecordsTests(tf.test.TestCase):
tf_record_close_stack, tf_record_close_stack,
os.path.join(tf.test.get_temp_dir(), 'test.tfrec'), 10) os.path.join(tf.test.get_temp_dir(), 'test.tfrec'), 10)
for idx in range(10): for idx in range(10):
output_tfrecords[idx].write('test_{}'.format(idx)) output_tfrecords[idx].write(six.ensure_binary('test_{}'.format(idx)))
for idx in range(10): for idx in range(10):
tf_record_path = '{}-{:05d}-of-00010'.format( tf_record_path = '{}-{:05d}-of-00010'.format(
......
...@@ -24,7 +24,8 @@ RUN git clone --depth 1 https://github.com/tensorflow/models.git && \ ...@@ -24,7 +24,8 @@ RUN git clone --depth 1 https://github.com/tensorflow/models.git && \
# Install gcloud and gsutil commands # Install gcloud and gsutil commands
# https://cloud.google.com/sdk/docs/quickstart-debian-ubuntu # https://cloud.google.com/sdk/docs/quickstart-debian-ubuntu
RUN apt-get -y update && apt-get install -y gpg-agent && export CLOUD_SDK_REPO="cloud-sdk-$(lsb_release -c -s)" && \ RUN apt-get -y update && apt-get install -y gpg-agent && \
export CLOUD_SDK_REPO="cloud-sdk-$(lsb_release -c -s)" && \
echo "deb http://packages.cloud.google.com/apt $CLOUD_SDK_REPO main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \ echo "deb http://packages.cloud.google.com/apt $CLOUD_SDK_REPO main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \ curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \
apt-get update -y && apt-get install google-cloud-sdk -y apt-get update -y && apt-get install google-cloud-sdk -y
...@@ -33,8 +34,10 @@ RUN apt-get -y update && apt-get install -y gpg-agent && export CLOUD_SDK_REPO=" ...@@ -33,8 +34,10 @@ RUN apt-get -y update && apt-get install -y gpg-agent && export CLOUD_SDK_REPO="
# Install the Tensorflow Object Detection API from here # Install the Tensorflow Object Detection API from here
# https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md # https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md
# Install object detection api dependencies - use non-interactive mode to set default tzdata config during installation # Install object detection api dependencies - use non-interactive mode to set
RUN export DEBIAN_FRONTEND=noninteractive && apt-get install -y protobuf-compiler python-pil python-lxml python-tk && \ # default tzdata config during installation.
RUN export DEBIAN_FRONTEND=noninteractive && \
apt-get install -y protobuf-compiler python-pil python-lxml python-tk && \
pip install Cython && \ pip install Cython && \
pip install contextlib2 && \ pip install contextlib2 && \
pip install jupyter && \ pip install jupyter && \
......
...@@ -26,18 +26,20 @@ import numpy as np ...@@ -26,18 +26,20 @@ import numpy as np
from six.moves import range from six.moves import range
import tensorflow as tf import tensorflow as tf
from tensorflow.contrib import slim
from object_detection.core import box_list from object_detection.core import box_list
from object_detection.core import box_list_ops from object_detection.core import box_list_ops
from object_detection.core import keypoint_ops from object_detection.core import keypoint_ops
from object_detection.core import standard_fields as fields from object_detection.core import standard_fields as fields
from object_detection.metrics import coco_evaluation from object_detection.metrics import coco_evaluation
from object_detection.protos import eval_pb2
from object_detection.utils import label_map_util from object_detection.utils import label_map_util
from object_detection.utils import object_detection_evaluation from object_detection.utils import object_detection_evaluation
from object_detection.utils import ops from object_detection.utils import ops
from object_detection.utils import shape_utils from object_detection.utils import shape_utils
from object_detection.utils import visualization_utils as vis_utils from object_detection.utils import visualization_utils as vis_utils
slim = tf.contrib.slim EVAL_KEYPOINT_METRIC = 'coco_keypoint_metrics'
# A dictionary of metric names to classes that implement the metric. The classes # A dictionary of metric names to classes that implement the metric. The classes
# in the dictionary must implement # in the dictionary must implement
...@@ -45,6 +47,8 @@ slim = tf.contrib.slim ...@@ -45,6 +47,8 @@ slim = tf.contrib.slim
EVAL_METRICS_CLASS_DICT = { EVAL_METRICS_CLASS_DICT = {
'coco_detection_metrics': 'coco_detection_metrics':
coco_evaluation.CocoDetectionEvaluator, coco_evaluation.CocoDetectionEvaluator,
'coco_keypoint_metrics':
coco_evaluation.CocoKeypointEvaluator,
'coco_mask_metrics': 'coco_mask_metrics':
coco_evaluation.CocoMaskEvaluator, coco_evaluation.CocoMaskEvaluator,
'oid_challenge_detection_metrics': 'oid_challenge_detection_metrics':
...@@ -324,7 +328,7 @@ def _run_checkpoint_once(tensor_dict, ...@@ -324,7 +328,7 @@ def _run_checkpoint_once(tensor_dict,
counters = {'skipped': 0, 'success': 0} counters = {'skipped': 0, 'success': 0}
aggregate_result_losses_dict = collections.defaultdict(list) aggregate_result_losses_dict = collections.defaultdict(list)
with tf.contrib.slim.queues.QueueRunners(sess): with slim.queues.QueueRunners(sess):
try: try:
for batch in range(int(num_batches)): for batch in range(int(num_batches)):
if (batch + 1) % 100 == 0: if (batch + 1) % 100 == 0:
...@@ -591,6 +595,8 @@ def result_dict_for_single_example(image, ...@@ -591,6 +595,8 @@ def result_dict_for_single_example(image,
'groundtruth_group_of': [num_boxes] int64 tensor. (Optional) 'groundtruth_group_of': [num_boxes] int64 tensor. (Optional)
'groundtruth_instance_masks': 3D int64 tensor of instance masks 'groundtruth_instance_masks': 3D int64 tensor of instance masks
(Optional). (Optional).
'groundtruth_keypoints': [num_boxes, num_keypoints, 2] float32 tensor with
keypoints (Optional).
class_agnostic: Boolean indicating whether the detections are class-agnostic class_agnostic: Boolean indicating whether the detections are class-agnostic
(i.e. binary). Default False. (i.e. binary). Default False.
scale_to_absolute: Boolean indicating whether boxes and keypoints should be scale_to_absolute: Boolean indicating whether boxes and keypoints should be
...@@ -620,7 +626,8 @@ def result_dict_for_single_example(image, ...@@ -620,7 +626,8 @@ def result_dict_for_single_example(image,
'groundtruth_group_of': [num_boxes] int64 tensor. (Optional) 'groundtruth_group_of': [num_boxes] int64 tensor. (Optional)
'groundtruth_instance_masks': 3D int64 tensor of instance masks 'groundtruth_instance_masks': 3D int64 tensor of instance masks
(Optional). (Optional).
'groundtruth_keypoints': [num_boxes, num_keypoints, 2] float32 tensor with
keypoints (Optional).
""" """
if groundtruth: if groundtruth:
...@@ -675,6 +682,10 @@ def result_dict_for_batched_example(images, ...@@ -675,6 +682,10 @@ def result_dict_for_batched_example(images,
Note that evaluation tools require classes that are 1-indexed, and so this Note that evaluation tools require classes that are 1-indexed, and so this
function performs the offset. If `class_agnostic` is True, all output classes function performs the offset. If `class_agnostic` is True, all output classes
have label 1. have label 1.
The groundtruth coordinates of boxes/keypoints in 'groundtruth' dictionary are
normalized relative to the (potentially padded) input image, while the
coordinates in 'detection' dictionary are normalized relative to the true
image shape.
Args: Args:
images: A single 4D uint8 image tensor of shape [batch_size, H, W, C]. images: A single 4D uint8 image tensor of shape [batch_size, H, W, C].
...@@ -696,6 +707,10 @@ def result_dict_for_batched_example(images, ...@@ -696,6 +707,10 @@ def result_dict_for_batched_example(images,
tensor. (Optional) tensor. (Optional)
'groundtruth_instance_masks': 4D int64 tensor of instance 'groundtruth_instance_masks': 4D int64 tensor of instance
masks (Optional). masks (Optional).
'groundtruth_keypoints': [batch_size, max_number_of_boxes, num_keypoints,
2] float32 tensor with keypoints (Optional).
'groundtruth_keypoint_visibilities': [batch_size, max_number_of_boxes,
num_keypoints] bool tensor with keypoint visibilities (Optional).
class_agnostic: Boolean indicating whether the detections are class-agnostic class_agnostic: Boolean indicating whether the detections are class-agnostic
(i.e. binary). Default False. (i.e. binary). Default False.
scale_to_absolute: Boolean indicating whether boxes and keypoints should be scale_to_absolute: Boolean indicating whether boxes and keypoints should be
...@@ -724,7 +739,11 @@ def result_dict_for_batched_example(images, ...@@ -724,7 +739,11 @@ def result_dict_for_batched_example(images,
'detection_classes': [batch_size, max_detections] int64 tensor of 1-indexed 'detection_classes': [batch_size, max_detections] int64 tensor of 1-indexed
classes. classes.
'detection_masks': [batch_size, max_detections, H, W] float32 tensor of 'detection_masks': [batch_size, max_detections, H, W] float32 tensor of
binarized masks, reframed to full image masks. binarized masks, reframed to full image masks. (Optional)
'detection_keypoints': [batch_size, max_detections, num_keypoints, 2]
float32 tensor containing keypoint coordinates. (Optional)
'detection_keypoint_scores': [batch_size, max_detections, num_keypoints]
float32 tensor containing keypoint scores. (Optional)
'num_detections': [batch_size] int64 tensor containing number of valid 'num_detections': [batch_size] int64 tensor containing number of valid
detections. detections.
'groundtruth_boxes': [batch_size, num_boxes, 4] float32 tensor of boxes, in 'groundtruth_boxes': [batch_size, num_boxes, 4] float32 tensor of boxes, in
...@@ -739,6 +758,10 @@ def result_dict_for_batched_example(images, ...@@ -739,6 +758,10 @@ def result_dict_for_batched_example(images,
'groundtruth_group_of': [batch_size, num_boxes] int64 tensor. (Optional) 'groundtruth_group_of': [batch_size, num_boxes] int64 tensor. (Optional)
'groundtruth_instance_masks': 4D int64 tensor of instance masks 'groundtruth_instance_masks': 4D int64 tensor of instance masks
(Optional). (Optional).
'groundtruth_keypoints': [batch_size, num_boxes, num_keypoints, 2] float32
tensor with keypoints (Optional).
'groundtruth_keypoint_visibilities': [batch_size, num_boxes, num_keypoints]
bool tensor with keypoint visibilities (Optional).
'num_groundtruth_boxes': [batch_size] tensor containing the maximum number 'num_groundtruth_boxes': [batch_size] tensor containing the maximum number
of groundtruth boxes per image. of groundtruth boxes per image.
...@@ -828,6 +851,12 @@ def result_dict_for_batched_example(images, ...@@ -828,6 +851,12 @@ def result_dict_for_batched_example(images,
_scale_keypoint_to_absolute, _scale_keypoint_to_absolute,
elems=[detection_keypoints, original_image_spatial_shapes], elems=[detection_keypoints, original_image_spatial_shapes],
dtype=tf.float32)) dtype=tf.float32))
if detection_fields.detection_keypoint_scores in detections:
output_dict[detection_fields.detection_keypoint_scores] = detections[
detection_fields.detection_keypoint_scores]
else:
output_dict[detection_fields.detection_keypoint_scores] = tf.ones_like(
detections[detection_fields.detection_keypoints][:, :, :, 0])
if groundtruth: if groundtruth:
if max_gt_boxes is None: if max_gt_boxes is None:
...@@ -866,6 +895,28 @@ def result_dict_for_batched_example(images, ...@@ -866,6 +895,28 @@ def result_dict_for_batched_example(images,
elems=[groundtruth_boxes, true_image_shapes], dtype=tf.float32) elems=[groundtruth_boxes, true_image_shapes], dtype=tf.float32)
output_dict[input_data_fields.groundtruth_boxes] = groundtruth_boxes output_dict[input_data_fields.groundtruth_boxes] = groundtruth_boxes
if input_data_fields.groundtruth_keypoints in groundtruth:
# If groundtruth_keypoints is in the groundtruth dictionary. Update the
# coordinates to conform with the true image shape.
def _scale_keypoints_to_normalized_true_image(args):
"""Scale the box coordinates to be relative to the true image shape."""
keypoints, true_image_shape = args
true_image_shape = tf.cast(true_image_shape, tf.float32)
true_height, true_width = true_image_shape[0], true_image_shape[1]
normalized_window = tf.stack(
[0.0, 0.0, true_height / image_height, true_width / image_width])
return keypoint_ops.change_coordinate_frame(keypoints,
normalized_window)
groundtruth_keypoints = groundtruth[
input_data_fields.groundtruth_keypoints]
groundtruth_keypoints = shape_utils.static_or_dynamic_map_fn(
_scale_keypoints_to_normalized_true_image,
elems=[groundtruth_keypoints, true_image_shapes],
dtype=tf.float32)
output_dict[
input_data_fields.groundtruth_keypoints] = groundtruth_keypoints
if scale_to_absolute: if scale_to_absolute:
groundtruth_boxes = output_dict[input_data_fields.groundtruth_boxes] groundtruth_boxes = output_dict[input_data_fields.groundtruth_boxes]
output_dict[input_data_fields.groundtruth_boxes] = ( output_dict[input_data_fields.groundtruth_boxes] = (
...@@ -873,6 +924,14 @@ def result_dict_for_batched_example(images, ...@@ -873,6 +924,14 @@ def result_dict_for_batched_example(images,
_scale_box_to_absolute, _scale_box_to_absolute,
elems=[groundtruth_boxes, original_image_spatial_shapes], elems=[groundtruth_boxes, original_image_spatial_shapes],
dtype=tf.float32)) dtype=tf.float32))
if input_data_fields.groundtruth_keypoints in groundtruth:
groundtruth_keypoints = output_dict[
input_data_fields.groundtruth_keypoints]
output_dict[input_data_fields.groundtruth_keypoints] = (
shape_utils.static_or_dynamic_map_fn(
_scale_keypoint_to_absolute,
elems=[groundtruth_keypoints, original_image_spatial_shapes],
dtype=tf.float32))
# For class-agnostic models, groundtruth classes all become 1. # For class-agnostic models, groundtruth classes all become 1.
if class_agnostic: if class_agnostic:
...@@ -893,6 +952,8 @@ def get_evaluators(eval_config, categories, evaluator_options=None): ...@@ -893,6 +952,8 @@ def get_evaluators(eval_config, categories, evaluator_options=None):
categories: A list of dicts, each of which has the following keys - categories: A list of dicts, each of which has the following keys -
'id': (required) an integer id uniquely identifying this category. 'id': (required) an integer id uniquely identifying this category.
'name': (required) string representing category name e.g., 'cat', 'dog'. 'name': (required) string representing category name e.g., 'cat', 'dog'.
'keypoints': (optional) dict mapping this category's keypoints to unique
ids.
evaluator_options: A dictionary of metric names (see evaluator_options: A dictionary of metric names (see
EVAL_METRICS_CLASS_DICT) to `DetectionEvaluator` initialization EVAL_METRICS_CLASS_DICT) to `DetectionEvaluator` initialization
keyword arguments. For example: keyword arguments. For example:
...@@ -919,6 +980,32 @@ def get_evaluators(eval_config, categories, evaluator_options=None): ...@@ -919,6 +980,32 @@ def get_evaluators(eval_config, categories, evaluator_options=None):
evaluators_list.append(EVAL_METRICS_CLASS_DICT[eval_metric_fn_key]( evaluators_list.append(EVAL_METRICS_CLASS_DICT[eval_metric_fn_key](
categories, categories,
**kwargs_dict)) **kwargs_dict))
if isinstance(eval_config, eval_pb2.EvalConfig):
parameterized_metrics = eval_config.parameterized_metric
for parameterized_metric in parameterized_metrics:
assert parameterized_metric.HasField('parameterized_metric')
if parameterized_metric.WhichOneof(
'parameterized_metric') == EVAL_KEYPOINT_METRIC:
keypoint_metrics = parameterized_metric.coco_keypoint_metrics
# Create category to keypoints mapping dict.
category_keypoints = {}
class_label = keypoint_metrics.class_label
category = None
for cat in categories:
if cat['name'] == class_label:
category = cat
break
if not category:
continue
keypoints_for_this_class = category['keypoints']
category_keypoints = [{
'id': keypoints_for_this_class[kp_name], 'name': kp_name
} for kp_name in keypoints_for_this_class]
# Create keypoint evaluator for this category.
evaluators_list.append(EVAL_METRICS_CLASS_DICT[EVAL_KEYPOINT_METRIC](
category['id'], category_keypoints, class_label,
keypoint_metrics.keypoint_label_to_sigmas))
return evaluators_list return evaluators_list
......
...@@ -27,6 +27,7 @@ import tensorflow as tf ...@@ -27,6 +27,7 @@ import tensorflow as tf
from object_detection import eval_util from object_detection import eval_util
from object_detection.core import standard_fields as fields from object_detection.core import standard_fields as fields
from object_detection.metrics import coco_evaluation
from object_detection.protos import eval_pb2 from object_detection.protos import eval_pb2
from object_detection.utils import test_case from object_detection.utils import test_case
...@@ -38,6 +39,26 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase): ...@@ -38,6 +39,26 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
{'id': 2, 'name': 'dog'}, {'id': 2, 'name': 'dog'},
{'id': 3, 'name': 'cat'}] {'id': 3, 'name': 'cat'}]
def _get_categories_list_with_keypoints(self):
return [{
'id': 1,
'name': 'person',
'keypoints': {
'left_eye': 0,
'right_eye': 3
}
}, {
'id': 2,
'name': 'dog',
'keypoints': {
'tail_start': 1,
'mouth': 2
}
}, {
'id': 3,
'name': 'cat'
}]
def _make_evaluation_dict(self, def _make_evaluation_dict(self,
resized_groundtruth_masks=False, resized_groundtruth_masks=False,
batch_size=1, batch_size=1,
...@@ -61,6 +82,7 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase): ...@@ -61,6 +82,7 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
groundtruth_boxes = tf.constant([[0., 0., 1., 1.]]) groundtruth_boxes = tf.constant([[0., 0., 1., 1.]])
groundtruth_classes = tf.constant([1]) groundtruth_classes = tf.constant([1])
groundtruth_instance_masks = tf.ones(shape=[1, 20, 20], dtype=tf.uint8) groundtruth_instance_masks = tf.ones(shape=[1, 20, 20], dtype=tf.uint8)
groundtruth_keypoints = tf.constant([[0.0, 0.0], [0.5, 0.5], [1.0, 1.0]])
if resized_groundtruth_masks: if resized_groundtruth_masks:
groundtruth_instance_masks = tf.ones(shape=[1, 10, 10], dtype=tf.uint8) groundtruth_instance_masks = tf.ones(shape=[1, 10, 10], dtype=tf.uint8)
...@@ -72,6 +94,9 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase): ...@@ -72,6 +94,9 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
groundtruth_instance_masks = tf.tile( groundtruth_instance_masks = tf.tile(
tf.expand_dims(groundtruth_instance_masks, 0), tf.expand_dims(groundtruth_instance_masks, 0),
multiples=[batch_size, 1, 1, 1]) multiples=[batch_size, 1, 1, 1])
groundtruth_keypoints = tf.tile(
tf.expand_dims(groundtruth_keypoints, 0),
multiples=[batch_size, 1, 1])
detections = { detections = {
detection_fields.detection_boxes: detection_boxes, detection_fields.detection_boxes: detection_boxes,
...@@ -83,6 +108,7 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase): ...@@ -83,6 +108,7 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
groundtruth = { groundtruth = {
input_data_fields.groundtruth_boxes: groundtruth_boxes, input_data_fields.groundtruth_boxes: groundtruth_boxes,
input_data_fields.groundtruth_classes: groundtruth_classes, input_data_fields.groundtruth_classes: groundtruth_classes,
input_data_fields.groundtruth_keypoints: groundtruth_keypoints,
input_data_fields.groundtruth_instance_masks: groundtruth_instance_masks input_data_fields.groundtruth_instance_masks: groundtruth_instance_masks
} }
if batch_size > 1: if batch_size > 1:
...@@ -255,6 +281,49 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase): ...@@ -255,6 +281,49 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
self.assertAlmostEqual(evaluator[1]._recall_lower_bound, 0.0) self.assertAlmostEqual(evaluator[1]._recall_lower_bound, 0.0)
self.assertAlmostEqual(evaluator[1]._recall_upper_bound, 1.0) self.assertAlmostEqual(evaluator[1]._recall_upper_bound, 1.0)
def test_get_evaluator_with_keypoint_metrics(self):
eval_config = eval_pb2.EvalConfig()
person_keypoints_metric = eval_config.parameterized_metric.add()
person_keypoints_metric.coco_keypoint_metrics.class_label = 'person'
person_keypoints_metric.coco_keypoint_metrics.keypoint_label_to_sigmas[
'left_eye'] = 0.1
person_keypoints_metric.coco_keypoint_metrics.keypoint_label_to_sigmas[
'right_eye'] = 0.2
dog_keypoints_metric = eval_config.parameterized_metric.add()
dog_keypoints_metric.coco_keypoint_metrics.class_label = 'dog'
dog_keypoints_metric.coco_keypoint_metrics.keypoint_label_to_sigmas[
'tail_start'] = 0.3
dog_keypoints_metric.coco_keypoint_metrics.keypoint_label_to_sigmas[
'mouth'] = 0.4
categories = self._get_categories_list_with_keypoints()
evaluator = eval_util.get_evaluators(
eval_config, categories, evaluator_options=None)
# Verify keypoint evaluator class variables.
self.assertLen(evaluator, 3)
self.assertFalse(evaluator[0]._include_metrics_per_category)
self.assertEqual(evaluator[1]._category_name, 'person')
self.assertEqual(evaluator[2]._category_name, 'dog')
self.assertAllEqual(evaluator[1]._keypoint_ids, [0, 3])
self.assertAllEqual(evaluator[2]._keypoint_ids, [1, 2])
self.assertAllClose([0.1, 0.2], evaluator[1]._oks_sigmas)
self.assertAllClose([0.3, 0.4], evaluator[2]._oks_sigmas)
def test_get_evaluator_with_unmatched_label(self):
eval_config = eval_pb2.EvalConfig()
person_keypoints_metric = eval_config.parameterized_metric.add()
person_keypoints_metric.coco_keypoint_metrics.class_label = 'unmatched'
person_keypoints_metric.coco_keypoint_metrics.keypoint_label_to_sigmas[
'kpt'] = 0.1
categories = self._get_categories_list_with_keypoints()
evaluator = eval_util.get_evaluators(
eval_config, categories, evaluator_options=None)
self.assertLen(evaluator, 1)
self.assertNotIsInstance(
evaluator[0], coco_evaluation.CocoKeypointEvaluator)
def test_padded_image_result_dict(self): def test_padded_image_result_dict(self):
input_data_fields = fields.InputDataFields input_data_fields = fields.InputDataFields
...@@ -263,6 +332,8 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase): ...@@ -263,6 +332,8 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
detection_boxes = np.array([[[0., 0., 1., 1.]], [[0.0, 0.0, 0.5, 0.5]]], detection_boxes = np.array([[[0., 0., 1., 1.]], [[0.0, 0.0, 0.5, 0.5]]],
dtype=np.float32) dtype=np.float32)
detection_keypoints = np.array([[0.0, 0.0], [0.5, 0.5], [1.0, 1.0]],
dtype=np.float32)
detections = { detections = {
detection_fields.detection_boxes: detection_fields.detection_boxes:
tf.constant(detection_boxes), tf.constant(detection_boxes),
...@@ -271,7 +342,12 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase): ...@@ -271,7 +342,12 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
detection_fields.detection_classes: detection_fields.detection_classes:
tf.constant([[1], [2]]), tf.constant([[1], [2]]),
detection_fields.num_detections: detection_fields.num_detections:
tf.constant([1, 1]) tf.constant([1, 1]),
detection_fields.detection_keypoints:
tf.tile(
tf.reshape(
tf.constant(detection_keypoints), shape=[1, 1, 3, 2]),
multiples=[2, 1, 1, 1])
} }
gt_boxes = detection_boxes gt_boxes = detection_boxes
...@@ -280,6 +356,11 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase): ...@@ -280,6 +356,11 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
tf.constant(gt_boxes), tf.constant(gt_boxes),
input_data_fields.groundtruth_classes: input_data_fields.groundtruth_classes:
tf.constant([[1.], [1.]]), tf.constant([[1.], [1.]]),
input_data_fields.groundtruth_keypoints:
tf.tile(
tf.reshape(
tf.constant(detection_keypoints), shape=[1, 1, 3, 2]),
multiples=[2, 1, 1, 1])
} }
image = tf.zeros((2, 100, 100, 3), dtype=tf.float32) image = tf.zeros((2, 100, 100, 3), dtype=tf.float32)
...@@ -299,11 +380,17 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase): ...@@ -299,11 +380,17 @@ class EvalUtilTest(test_case.TestCase, parameterized.TestCase):
self.assertAllEqual( self.assertAllEqual(
[[[0., 0., 200., 200.]], [[0.0, 0.0, 150., 150.]]], [[[0., 0., 200., 200.]], [[0.0, 0.0, 150., 150.]]],
result[input_data_fields.groundtruth_boxes]) result[input_data_fields.groundtruth_boxes])
self.assertAllClose([[[[0., 0.], [100., 100.], [200., 200.]]],
[[[0., 0.], [150., 150.], [300., 300.]]]],
result[input_data_fields.groundtruth_keypoints])
# Predictions from the model are not scaled. # Predictions from the model are not scaled.
self.assertAllEqual( self.assertAllEqual(
[[[0., 0., 200., 200.]], [[0.0, 0.0, 75., 150.]]], [[[0., 0., 200., 200.]], [[0.0, 0.0, 75., 150.]]],
result[detection_fields.detection_boxes]) result[detection_fields.detection_boxes])
self.assertAllClose([[[[0., 0.], [100., 100.], [200., 200.]]],
[[[0., 0.], [75., 150.], [150., 300.]]]],
result[detection_fields.detection_keypoints])
if __name__ == '__main__': if __name__ == '__main__':
......
# Lint as: python2, python3
# Copyright 2017 The TensorFlow Authors. All Rights Reserved. # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
...@@ -108,7 +109,6 @@ from google.protobuf import text_format ...@@ -108,7 +109,6 @@ from google.protobuf import text_format
from object_detection import exporter from object_detection import exporter
from object_detection.protos import pipeline_pb2 from object_detection.protos import pipeline_pb2
slim = tf.contrib.slim
flags = tf.app.flags flags = tf.app.flags
flags.DEFINE_string('input_type', 'image_tensor', 'Type of input node. Can be ' flags.DEFINE_string('input_type', 'image_tensor', 'Type of input node. Can be '
......
# Lint as: python2, python3
# Copyright 2018 The TensorFlow Authors. All Rights Reserved. # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment