Commit c308c03c authored by Mehdi Sharifzadeh's avatar Mehdi Sharifzadeh Committed by Taylor Robie
Browse files

Mask R-CNN model added to models/research/mlperf_object_detection/Mask_RCNN (#4678)

* Create README.md

* readme changed

* readme changed

* ResNet backbone completed.

* FPN added

* Create README.md

* initial commit

* files removed

* initial commit

* protobuf file removed
parent 32e7d660
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.data_decoders.tf_example_decoder."""
import os
import numpy as np
import tensorflow as tf
from tensorflow.core.example import example_pb2
from tensorflow.core.example import feature_pb2
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import test_util
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import lookup_ops
from tensorflow.python.ops import parsing_ops
from object_detection.core import standard_fields as fields
from object_detection.data_decoders import tf_example_decoder
from object_detection.protos import input_reader_pb2
slim_example_decoder = tf.contrib.slim.tfexample_decoder
class TfExampleDecoderTest(tf.test.TestCase):
def _EncodeImage(self, image_tensor, encoding_type='jpeg'):
with self.test_session():
if encoding_type == 'jpeg':
image_encoded = tf.image.encode_jpeg(tf.constant(image_tensor)).eval()
elif encoding_type == 'png':
image_encoded = tf.image.encode_png(tf.constant(image_tensor)).eval()
else:
raise ValueError('Invalid encoding type.')
return image_encoded
def _DecodeImage(self, image_encoded, encoding_type='jpeg'):
with self.test_session():
if encoding_type == 'jpeg':
image_decoded = tf.image.decode_jpeg(tf.constant(image_encoded)).eval()
elif encoding_type == 'png':
image_decoded = tf.image.decode_png(tf.constant(image_encoded)).eval()
else:
raise ValueError('Invalid encoding type.')
return image_decoded
def _Int64Feature(self, value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
def _FloatFeature(self, value):
return tf.train.Feature(float_list=tf.train.FloatList(value=value))
def _BytesFeature(self, value):
if isinstance(value, list):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _Int64FeatureFromList(self, ndarray):
return feature_pb2.Feature(
int64_list=feature_pb2.Int64List(value=ndarray.flatten().tolist()))
def _BytesFeatureFromList(self, ndarray):
values = ndarray.flatten().tolist()
return feature_pb2.Feature(bytes_list=feature_pb2.BytesList(value=values))
def testDecodeAdditionalChannels(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
additional_channel_tensor = np.random.randint(
256, size=(4, 5, 1)).astype(np.uint8)
encoded_additional_channel = self._EncodeImage(additional_channel_tensor)
decoded_additional_channel = self._DecodeImage(encoded_additional_channel)
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded':
self._BytesFeature(encoded_jpeg),
'image/additional_channels/encoded':
self._BytesFeatureFromList(
np.array([encoded_additional_channel] * 2)),
'image/format':
self._BytesFeature('jpeg'),
'image/source_id':
self._BytesFeature('image_id'),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder(
num_additional_channels=2)
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(
np.concatenate([decoded_additional_channel] * 2, axis=2),
tensor_dict[fields.InputDataFields.image_additional_channels])
def testDecodeExampleWithBranchedBackupHandler(self):
example1 = example_pb2.Example(
features=feature_pb2.Features(
feature={
'image/object/class/text':
self._BytesFeatureFromList(
np.array(['cat', 'dog', 'guinea pig'])),
'image/object/class/label':
self._Int64FeatureFromList(np.array([42, 10, 900]))
}))
example2 = example_pb2.Example(
features=feature_pb2.Features(
feature={
'image/object/class/text':
self._BytesFeatureFromList(
np.array(['cat', 'dog', 'guinea pig'])),
}))
example3 = example_pb2.Example(
features=feature_pb2.Features(
feature={
'image/object/class/label':
self._Int64FeatureFromList(np.array([42, 10, 901]))
}))
# 'dog' -> 0, 'guinea pig' -> 1, 'cat' -> 2
table = lookup_ops.index_table_from_tensor(
constant_op.constant(['dog', 'guinea pig', 'cat']))
keys_to_features = {
'image/object/class/text': parsing_ops.VarLenFeature(dtypes.string),
'image/object/class/label': parsing_ops.VarLenFeature(dtypes.int64),
}
backup_handler = tf_example_decoder.BackupHandler(
handler=slim_example_decoder.Tensor('image/object/class/label'),
backup=tf_example_decoder.LookupTensor('image/object/class/text',
table))
items_to_handlers = {
'labels': backup_handler,
}
decoder = slim_example_decoder.TFExampleDecoder(keys_to_features,
items_to_handlers)
obtained_class_ids_each_example = []
with self.test_session() as sess:
sess.run(lookup_ops.tables_initializer())
for example in [example1, example2, example3]:
serialized_example = array_ops.reshape(
example.SerializeToString(), shape=[])
obtained_class_ids_each_example.append(
decoder.decode(serialized_example)[0].eval())
self.assertAllClose([42, 10, 900], obtained_class_ids_each_example[0])
self.assertAllClose([2, 0, 1], obtained_class_ids_each_example[1])
self.assertAllClose([42, 10, 901], obtained_class_ids_each_example[2])
def testDecodeExampleWithBranchedLookup(self):
example = example_pb2.Example(features=feature_pb2.Features(feature={
'image/object/class/text': self._BytesFeatureFromList(
np.array(['cat', 'dog', 'guinea pig'])),
}))
serialized_example = example.SerializeToString()
# 'dog' -> 0, 'guinea pig' -> 1, 'cat' -> 2
table = lookup_ops.index_table_from_tensor(
constant_op.constant(['dog', 'guinea pig', 'cat']))
with self.test_session() as sess:
sess.run(lookup_ops.tables_initializer())
serialized_example = array_ops.reshape(serialized_example, shape=[])
keys_to_features = {
'image/object/class/text': parsing_ops.VarLenFeature(dtypes.string),
}
items_to_handlers = {
'labels':
tf_example_decoder.LookupTensor('image/object/class/text', table),
}
decoder = slim_example_decoder.TFExampleDecoder(keys_to_features,
items_to_handlers)
obtained_class_ids = decoder.decode(serialized_example)[0].eval()
self.assertAllClose([2, 0, 1], obtained_class_ids)
def testDecodeJpegImage(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
decoded_jpeg = self._DecodeImage(encoded_jpeg)
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/source_id': self._BytesFeature('image_id'),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.image].
get_shape().as_list()), [None, None, 3])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(decoded_jpeg, tensor_dict[fields.InputDataFields.image])
self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])
def testDecodeImageKeyAndFilename(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/key/sha256': self._BytesFeature('abc'),
'image/filename': self._BytesFeature('filename')
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertEqual('abc', tensor_dict[fields.InputDataFields.key])
self.assertEqual('filename', tensor_dict[fields.InputDataFields.filename])
def testDecodePngImage(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_png = self._EncodeImage(image_tensor, encoding_type='png')
decoded_png = self._DecodeImage(encoded_png, encoding_type='png')
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_png),
'image/format': self._BytesFeature('png'),
'image/source_id': self._BytesFeature('image_id')
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.image].
get_shape().as_list()), [None, None, 3])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(decoded_png, tensor_dict[fields.InputDataFields.image])
self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])
def testDecodePngInstanceMasks(self):
image_tensor = np.random.randint(256, size=(10, 10, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
mask_1 = np.random.randint(0, 2, size=(10, 10, 1)).astype(np.uint8)
mask_2 = np.random.randint(0, 2, size=(10, 10, 1)).astype(np.uint8)
encoded_png_1 = self._EncodeImage(mask_1, encoding_type='png')
decoded_png_1 = np.squeeze(mask_1.astype(np.float32))
encoded_png_2 = self._EncodeImage(mask_2, encoding_type='png')
decoded_png_2 = np.squeeze(mask_2.astype(np.float32))
encoded_masks = [encoded_png_1, encoded_png_2]
decoded_masks = np.stack([decoded_png_1, decoded_png_2])
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/mask': self._BytesFeature(encoded_masks)
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder(
load_instance_masks=True, instance_mask_type=input_reader_pb2.PNG_MASKS)
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(
decoded_masks,
tensor_dict[fields.InputDataFields.groundtruth_instance_masks])
def testDecodeEmptyPngInstanceMasks(self):
image_tensor = np.random.randint(256, size=(10, 10, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
encoded_masks = []
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/mask': self._BytesFeature(encoded_masks),
'image/height': self._Int64Feature([10]),
'image/width': self._Int64Feature([10]),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder(
load_instance_masks=True, instance_mask_type=input_reader_pb2.PNG_MASKS)
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(
tensor_dict[fields.InputDataFields.groundtruth_instance_masks].shape,
[0, 10, 10])
def testDecodeBoundingBox(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
bbox_ymins = [0.0, 4.0]
bbox_xmins = [1.0, 5.0]
bbox_ymaxs = [2.0, 6.0]
bbox_xmaxs = [3.0, 7.0]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/bbox/ymin': self._FloatFeature(bbox_ymins),
'image/object/bbox/xmin': self._FloatFeature(bbox_xmins),
'image/object/bbox/ymax': self._FloatFeature(bbox_ymaxs),
'image/object/bbox/xmax': self._FloatFeature(bbox_xmaxs),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes].
get_shape().as_list()), [None, 4])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
expected_boxes = np.vstack([bbox_ymins, bbox_xmins,
bbox_ymaxs, bbox_xmaxs]).transpose()
self.assertAllEqual(expected_boxes,
tensor_dict[fields.InputDataFields.groundtruth_boxes])
self.assertAllEqual(
2, tensor_dict[fields.InputDataFields.num_groundtruth_boxes])
@test_util.enable_c_shapes
def testDecodeKeypoint(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
bbox_ymins = [0.0, 4.0]
bbox_xmins = [1.0, 5.0]
bbox_ymaxs = [2.0, 6.0]
bbox_xmaxs = [3.0, 7.0]
keypoint_ys = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]
keypoint_xs = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/bbox/ymin': self._FloatFeature(bbox_ymins),
'image/object/bbox/xmin': self._FloatFeature(bbox_xmins),
'image/object/bbox/ymax': self._FloatFeature(bbox_ymaxs),
'image/object/bbox/xmax': self._FloatFeature(bbox_xmaxs),
'image/object/keypoint/y': self._FloatFeature(keypoint_ys),
'image/object/keypoint/x': self._FloatFeature(keypoint_xs),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder(num_keypoints=3)
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes].
get_shape().as_list()), [None, 4])
self.assertAllEqual((tensor_dict[fields.InputDataFields.
groundtruth_keypoints].
get_shape().as_list()), [2, 3, 2])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
expected_boxes = np.vstack([bbox_ymins, bbox_xmins,
bbox_ymaxs, bbox_xmaxs]).transpose()
self.assertAllEqual(expected_boxes,
tensor_dict[fields.InputDataFields.groundtruth_boxes])
self.assertAllEqual(
2, tensor_dict[fields.InputDataFields.num_groundtruth_boxes])
expected_keypoints = (
np.vstack([keypoint_ys, keypoint_xs]).transpose().reshape((2, 3, 2)))
self.assertAllEqual(expected_keypoints,
tensor_dict[
fields.InputDataFields.groundtruth_keypoints])
def testDecodeDefaultGroundtruthWeights(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
bbox_ymins = [0.0, 4.0]
bbox_xmins = [1.0, 5.0]
bbox_ymaxs = [2.0, 6.0]
bbox_xmaxs = [3.0, 7.0]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/bbox/ymin': self._FloatFeature(bbox_ymins),
'image/object/bbox/xmin': self._FloatFeature(bbox_xmins),
'image/object/bbox/ymax': self._FloatFeature(bbox_ymaxs),
'image/object/bbox/xmax': self._FloatFeature(bbox_xmaxs),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes].
get_shape().as_list()), [None, 4])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllClose(tensor_dict[fields.InputDataFields.groundtruth_weights],
np.ones(2, dtype=np.float32))
@test_util.enable_c_shapes
def testDecodeObjectLabel(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
bbox_classes = [0, 1]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/class/label': self._Int64Feature(bbox_classes),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[
fields.InputDataFields.groundtruth_classes].get_shape().as_list()),
[2])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(bbox_classes,
tensor_dict[fields.InputDataFields.groundtruth_classes])
def testDecodeObjectLabelNoText(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
bbox_classes = [1, 2]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/class/label': self._Int64Feature(bbox_classes),
})).SerializeToString()
label_map_string = """
item {
id:1
name:'cat'
}
item {
id:2
name:'dog'
}
"""
label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
with tf.gfile.Open(label_map_path, 'wb') as f:
f.write(label_map_string)
example_decoder = tf_example_decoder.TfExampleDecoder(
label_map_proto_file=label_map_path)
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[
fields.InputDataFields.groundtruth_classes].get_shape().as_list()),
[None])
init = tf.tables_initializer()
with self.test_session() as sess:
sess.run(init)
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(bbox_classes,
tensor_dict[fields.InputDataFields.groundtruth_classes])
def testDecodeObjectLabelUnrecognizedName(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
bbox_classes_text = ['cat', 'cheetah']
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded':
self._BytesFeature(encoded_jpeg),
'image/format':
self._BytesFeature('jpeg'),
'image/object/class/text':
self._BytesFeature(bbox_classes_text),
})).SerializeToString()
label_map_string = """
item {
id:2
name:'cat'
}
item {
id:1
name:'dog'
}
"""
label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
with tf.gfile.Open(label_map_path, 'wb') as f:
f.write(label_map_string)
example_decoder = tf_example_decoder.TfExampleDecoder(
label_map_proto_file=label_map_path)
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes]
.get_shape().as_list()), [None])
with self.test_session() as sess:
sess.run(tf.tables_initializer())
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual([2, -1],
tensor_dict[fields.InputDataFields.groundtruth_classes])
def testDecodeObjectLabelWithMapping(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
bbox_classes_text = ['cat', 'dog']
example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded':
self._BytesFeature(encoded_jpeg),
'image/format':
self._BytesFeature('jpeg'),
'image/object/class/text':
self._BytesFeature(bbox_classes_text),
})).SerializeToString()
label_map_string = """
item {
id:3
name:'cat'
}
item {
id:1
name:'dog'
}
"""
label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
with tf.gfile.Open(label_map_path, 'wb') as f:
f.write(label_map_string)
example_decoder = tf_example_decoder.TfExampleDecoder(
label_map_proto_file=label_map_path)
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes]
.get_shape().as_list()), [None])
with self.test_session() as sess:
sess.run(tf.tables_initializer())
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual([3, 1],
tensor_dict[fields.InputDataFields.groundtruth_classes])
@test_util.enable_c_shapes
def testDecodeObjectArea(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
object_area = [100., 174.]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/area': self._FloatFeature(object_area),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_area].
get_shape().as_list()), [2])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(object_area,
tensor_dict[fields.InputDataFields.groundtruth_area])
@test_util.enable_c_shapes
def testDecodeObjectIsCrowd(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
object_is_crowd = [0, 1]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/is_crowd': self._Int64Feature(object_is_crowd),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[
fields.InputDataFields.groundtruth_is_crowd].get_shape().as_list()),
[2])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual([bool(item) for item in object_is_crowd],
tensor_dict[
fields.InputDataFields.groundtruth_is_crowd])
@test_util.enable_c_shapes
def testDecodeObjectDifficult(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
object_difficult = [0, 1]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/difficult': self._Int64Feature(object_difficult),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[
fields.InputDataFields.groundtruth_difficult].get_shape().as_list()),
[2])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual([bool(item) for item in object_difficult],
tensor_dict[
fields.InputDataFields.groundtruth_difficult])
@test_util.enable_c_shapes
def testDecodeObjectGroupOf(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
object_group_of = [0, 1]
example = tf.train.Example(features=tf.train.Features(
feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/group_of': self._Int64Feature(object_group_of),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[
fields.InputDataFields.groundtruth_group_of].get_shape().as_list()),
[2])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(
[bool(item) for item in object_group_of],
tensor_dict[fields.InputDataFields.groundtruth_group_of])
def testDecodeObjectWeight(self):
image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
object_weights = [0.75, 1.0]
example = tf.train.Example(features=tf.train.Features(
feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/weight': self._FloatFeature(object_weights),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[
fields.InputDataFields.groundtruth_weights].get_shape().as_list()),
[None])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(
object_weights,
tensor_dict[fields.InputDataFields.groundtruth_weights])
@test_util.enable_c_shapes
def testDecodeInstanceSegmentation(self):
num_instances = 4
image_height = 5
image_width = 3
# Randomly generate image.
image_tensor = np.random.randint(256, size=(image_height,
image_width,
3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
# Randomly generate instance segmentation masks.
instance_masks = (
np.random.randint(2, size=(num_instances,
image_height,
image_width)).astype(np.float32))
instance_masks_flattened = np.reshape(instance_masks, [-1])
# Randomly generate class labels for each instance.
object_classes = np.random.randint(
100, size=(num_instances)).astype(np.int64)
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/height': self._Int64Feature([image_height]),
'image/width': self._Int64Feature([image_width]),
'image/object/mask': self._FloatFeature(instance_masks_flattened),
'image/object/class/label': self._Int64Feature(
object_classes)})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder(
load_instance_masks=True)
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((
tensor_dict[fields.InputDataFields.groundtruth_instance_masks].
get_shape().as_list()), [4, 5, 3])
self.assertAllEqual((
tensor_dict[fields.InputDataFields.groundtruth_classes].
get_shape().as_list()), [4])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(
instance_masks.astype(np.float32),
tensor_dict[fields.InputDataFields.groundtruth_instance_masks])
self.assertAllEqual(
object_classes,
tensor_dict[fields.InputDataFields.groundtruth_classes])
def testInstancesNotAvailableByDefault(self):
num_instances = 4
image_height = 5
image_width = 3
# Randomly generate image.
image_tensor = np.random.randint(256, size=(image_height,
image_width,
3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
# Randomly generate instance segmentation masks.
instance_masks = (
np.random.randint(2, size=(num_instances,
image_height,
image_width)).astype(np.float32))
instance_masks_flattened = np.reshape(instance_masks, [-1])
# Randomly generate class labels for each instance.
object_classes = np.random.randint(
100, size=(num_instances)).astype(np.int64)
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/height': self._Int64Feature([image_height]),
'image/width': self._Int64Feature([image_width]),
'image/object/mask': self._FloatFeature(instance_masks_flattened),
'image/object/class/label': self._Int64Feature(
object_classes)})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertTrue(fields.InputDataFields.groundtruth_instance_masks
not in tensor_dict)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Convert raw COCO dataset to TFRecord for object_detection.
Example usage:
python create_coco_tf_record.py --logtostderr \
--train_image_dir="${TRAIN_IMAGE_DIR}" \
--val_image_dir="${VAL_IMAGE_DIR}" \
--test_image_dir="${TEST_IMAGE_DIR}" \
--train_annotations_file="${TRAIN_ANNOTATIONS_FILE}" \
--val_annotations_file="${VAL_ANNOTATIONS_FILE}" \
--testdev_annotations_file="${TESTDEV_ANNOTATIONS_FILE}" \
--output_dir="${OUTPUT_DIR}"
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import hashlib
import io
import json
import os
import numpy as np
import PIL.Image
from pycocotools import mask
import tensorflow as tf
from object_detection.utils import dataset_util
from object_detection.utils import label_map_util
flags = tf.app.flags
tf.flags.DEFINE_boolean('include_masks', False,
'Whether to include instance segmentations masks '
'(PNG encoded) in the result. default: False.')
tf.flags.DEFINE_string('train_image_dir', '',
'Training image directory.')
tf.flags.DEFINE_string('val_image_dir', '',
'Validation image directory.')
tf.flags.DEFINE_string('test_image_dir', '',
'Test image directory.')
tf.flags.DEFINE_string('train_annotations_file', '',
'Training annotations JSON file.')
tf.flags.DEFINE_string('val_annotations_file', '',
'Validation annotations JSON file.')
tf.flags.DEFINE_string('testdev_annotations_file', '',
'Test-dev annotations JSON file.')
tf.flags.DEFINE_string('output_dir', '/tmp/', 'Output data directory.')
FLAGS = flags.FLAGS
tf.logging.set_verbosity(tf.logging.INFO)
def create_tf_example(image,
annotations_list,
image_dir,
category_index,
include_masks=False):
"""Converts image and annotations to a tf.Example proto.
Args:
image: dict with keys:
[u'license', u'file_name', u'coco_url', u'height', u'width',
u'date_captured', u'flickr_url', u'id']
annotations_list:
list of dicts with keys:
[u'segmentation', u'area', u'iscrowd', u'image_id',
u'bbox', u'category_id', u'id']
Notice that bounding box coordinates in the official COCO dataset are
given as [x, y, width, height] tuples using absolute coordinates where
x, y represent the top-left (0-indexed) corner. This function converts
to the format expected by the Tensorflow Object Detection API (which is
which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
to image size).
image_dir: directory containing the image files.
category_index: a dict containing COCO category information keyed
by the 'id' field of each category. See the
label_map_util.create_category_index function.
include_masks: Whether to include instance segmentations masks
(PNG encoded) in the result. default: False.
Returns:
example: The converted tf.Example
num_annotations_skipped: Number of (invalid) annotations that were ignored.
Raises:
ValueError: if the image pointed to by data['filename'] is not a valid JPEG
"""
image_height = image['height']
image_width = image['width']
filename = image['file_name']
image_id = image['id']
full_path = os.path.join(image_dir, filename)
with tf.gfile.GFile(full_path, 'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = PIL.Image.open(encoded_jpg_io)
key = hashlib.sha256(encoded_jpg).hexdigest()
xmin = []
xmax = []
ymin = []
ymax = []
is_crowd = []
category_names = []
category_ids = []
area = []
encoded_mask_png = []
num_annotations_skipped = 0
for object_annotations in annotations_list:
(x, y, width, height) = tuple(object_annotations['bbox'])
if width <= 0 or height <= 0:
num_annotations_skipped += 1
continue
if x + width > image_width or y + height > image_height:
num_annotations_skipped += 1
continue
xmin.append(float(x) / image_width)
xmax.append(float(x + width) / image_width)
ymin.append(float(y) / image_height)
ymax.append(float(y + height) / image_height)
is_crowd.append(object_annotations['iscrowd'])
category_id = int(object_annotations['category_id'])
category_ids.append(category_id)
category_names.append(category_index[category_id]['name'].encode('utf8'))
area.append(object_annotations['area'])
if include_masks:
run_len_encoding = mask.frPyObjects(object_annotations['segmentation'],
image_height, image_width)
binary_mask = mask.decode(run_len_encoding)
if not object_annotations['iscrowd']:
binary_mask = np.amax(binary_mask, axis=2)
pil_image = PIL.Image.fromarray(binary_mask)
output_io = io.BytesIO()
pil_image.save(output_io, format='PNG')
encoded_mask_png.append(output_io.getvalue())
feature_dict = {
'image/height':
dataset_util.int64_feature(image_height),
'image/width':
dataset_util.int64_feature(image_width),
'image/filename':
dataset_util.bytes_feature(filename.encode('utf8')),
'image/source_id':
dataset_util.bytes_feature(str(image_id).encode('utf8')),
'image/key/sha256':
dataset_util.bytes_feature(key.encode('utf8')),
'image/encoded':
dataset_util.bytes_feature(encoded_jpg),
'image/format':
dataset_util.bytes_feature('jpeg'.encode('utf8')),
'image/object/bbox/xmin':
dataset_util.float_list_feature(xmin),
'image/object/bbox/xmax':
dataset_util.float_list_feature(xmax),
'image/object/bbox/ymin':
dataset_util.float_list_feature(ymin),
'image/object/bbox/ymax':
dataset_util.float_list_feature(ymax),
'image/object/class/label':
dataset_util.int64_list_feature(category_ids),
'image/object/is_crowd':
dataset_util.int64_list_feature(is_crowd),
'image/object/area':
dataset_util.float_list_feature(area),
}
if include_masks:
feature_dict['image/object/mask'] = (
dataset_util.bytes_list_feature(encoded_mask_png))
example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
return key, example, num_annotations_skipped
def _create_tf_record_from_coco_annotations(
annotations_file, image_dir, output_path, include_masks):
"""Loads COCO annotation json files and converts to tf.Record format.
Args:
annotations_file: JSON file containing bounding box annotations.
image_dir: Directory containing the image files.
output_path: Path to output tf.Record file.
include_masks: Whether to include instance segmentations masks
(PNG encoded) in the result. default: False.
"""
with tf.gfile.GFile(annotations_file, 'r') as fid:
groundtruth_data = json.load(fid)
images = groundtruth_data['images']
category_index = label_map_util.create_category_index(
groundtruth_data['categories'])
annotations_index = {}
if 'annotations' in groundtruth_data:
tf.logging.info(
'Found groundtruth annotations. Building annotations index.')
for annotation in groundtruth_data['annotations']:
image_id = annotation['image_id']
if image_id not in annotations_index:
annotations_index[image_id] = []
annotations_index[image_id].append(annotation)
missing_annotation_count = 0
for image in images:
image_id = image['id']
if image_id not in annotations_index:
missing_annotation_count += 1
annotations_index[image_id] = []
tf.logging.info('%d images are missing annotations.',
missing_annotation_count)
tf.logging.info('writing to output path: %s', output_path)
writer = tf.python_io.TFRecordWriter(output_path)
total_num_annotations_skipped = 0
for idx, image in enumerate(images):
if idx % 100 == 0:
tf.logging.info('On image %d of %d', idx, len(images))
annotations_list = annotations_index[image['id']]
_, tf_example, num_annotations_skipped = create_tf_example(
image, annotations_list, image_dir, category_index, include_masks)
total_num_annotations_skipped += num_annotations_skipped
writer.write(tf_example.SerializeToString())
writer.close()
tf.logging.info('Finished writing, skipped %d annotations.',
total_num_annotations_skipped)
def main(_):
assert FLAGS.train_image_dir, '`train_image_dir` missing.'
assert FLAGS.val_image_dir, '`val_image_dir` missing.'
assert FLAGS.test_image_dir, '`test_image_dir` missing.'
assert FLAGS.train_annotations_file, '`train_annotations_file` missing.'
assert FLAGS.val_annotations_file, '`val_annotations_file` missing.'
assert FLAGS.testdev_annotations_file, '`testdev_annotations_file` missing.'
if not tf.gfile.IsDirectory(FLAGS.output_dir):
tf.gfile.MakeDirs(FLAGS.output_dir)
train_output_path = os.path.join(FLAGS.output_dir, 'coco_train.record')
val_output_path = os.path.join(FLAGS.output_dir, 'coco_val.record')
testdev_output_path = os.path.join(FLAGS.output_dir, 'coco_testdev.record')
_create_tf_record_from_coco_annotations(
FLAGS.train_annotations_file,
FLAGS.train_image_dir,
train_output_path,
FLAGS.include_masks)
_create_tf_record_from_coco_annotations(
FLAGS.val_annotations_file,
FLAGS.val_image_dir,
val_output_path,
FLAGS.include_masks)
_create_tf_record_from_coco_annotations(
FLAGS.testdev_annotations_file,
FLAGS.test_image_dir,
testdev_output_path,
FLAGS.include_masks)
if __name__ == '__main__':
tf.app.run()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Test for create_coco_tf_record.py."""
import io
import os
import numpy as np
import PIL.Image
import tensorflow as tf
from object_detection.dataset_tools import create_coco_tf_record
class CreateCocoTFRecordTest(tf.test.TestCase):
def _assertProtoEqual(self, proto_field, expectation):
"""Helper function to assert if a proto field equals some value.
Args:
proto_field: The protobuf field to compare.
expectation: The expected value of the protobuf field.
"""
proto_list = [p for p in proto_field]
self.assertListEqual(proto_list, expectation)
def test_create_tf_example(self):
image_file_name = 'tmp_image.jpg'
image_data = np.random.rand(256, 256, 3)
tmp_dir = self.get_temp_dir()
save_path = os.path.join(tmp_dir, image_file_name)
image = PIL.Image.fromarray(image_data, 'RGB')
image.save(save_path)
image = {
'file_name': image_file_name,
'height': 256,
'width': 256,
'id': 11,
}
annotations_list = [{
'area': .5,
'iscrowd': False,
'image_id': 11,
'bbox': [64, 64, 128, 128],
'category_id': 2,
'id': 1000,
}]
image_dir = tmp_dir
category_index = {
1: {
'name': 'dog',
'id': 1
},
2: {
'name': 'cat',
'id': 2
},
3: {
'name': 'human',
'id': 3
}
}
(_, example,
num_annotations_skipped) = create_coco_tf_record.create_tf_example(
image, annotations_list, image_dir, category_index)
self.assertEqual(num_annotations_skipped, 0)
self._assertProtoEqual(
example.features.feature['image/height'].int64_list.value, [256])
self._assertProtoEqual(
example.features.feature['image/width'].int64_list.value, [256])
self._assertProtoEqual(
example.features.feature['image/filename'].bytes_list.value,
[image_file_name])
self._assertProtoEqual(
example.features.feature['image/source_id'].bytes_list.value,
[str(image['id'])])
self._assertProtoEqual(
example.features.feature['image/format'].bytes_list.value, ['jpeg'])
self._assertProtoEqual(
example.features.feature['image/object/bbox/xmin'].float_list.value,
[0.25])
self._assertProtoEqual(
example.features.feature['image/object/bbox/ymin'].float_list.value,
[0.25])
self._assertProtoEqual(
example.features.feature['image/object/bbox/xmax'].float_list.value,
[0.75])
self._assertProtoEqual(
example.features.feature['image/object/bbox/ymax'].float_list.value,
[0.75])
def test_create_tf_example_with_instance_masks(self):
image_file_name = 'tmp_image.jpg'
image_data = np.random.rand(8, 8, 3)
tmp_dir = self.get_temp_dir()
save_path = os.path.join(tmp_dir, image_file_name)
image = PIL.Image.fromarray(image_data, 'RGB')
image.save(save_path)
image = {
'file_name': image_file_name,
'height': 8,
'width': 8,
'id': 11,
}
annotations_list = [{
'area': .5,
'iscrowd': False,
'image_id': 11,
'bbox': [0, 0, 8, 8],
'segmentation': [[4, 0, 0, 0, 0, 4], [8, 4, 4, 8, 8, 8]],
'category_id': 1,
'id': 1000,
}]
image_dir = tmp_dir
category_index = {
1: {
'name': 'dog',
'id': 1
},
}
(_, example,
num_annotations_skipped) = create_coco_tf_record.create_tf_example(
image, annotations_list, image_dir, category_index, include_masks=True)
self.assertEqual(num_annotations_skipped, 0)
self._assertProtoEqual(
example.features.feature['image/height'].int64_list.value, [8])
self._assertProtoEqual(
example.features.feature['image/width'].int64_list.value, [8])
self._assertProtoEqual(
example.features.feature['image/filename'].bytes_list.value,
[image_file_name])
self._assertProtoEqual(
example.features.feature['image/source_id'].bytes_list.value,
[str(image['id'])])
self._assertProtoEqual(
example.features.feature['image/format'].bytes_list.value, ['jpeg'])
self._assertProtoEqual(
example.features.feature['image/object/bbox/xmin'].float_list.value,
[0])
self._assertProtoEqual(
example.features.feature['image/object/bbox/ymin'].float_list.value,
[0])
self._assertProtoEqual(
example.features.feature['image/object/bbox/xmax'].float_list.value,
[1])
self._assertProtoEqual(
example.features.feature['image/object/bbox/ymax'].float_list.value,
[1])
encoded_mask_pngs = [
io.BytesIO(encoded_masks) for encoded_masks in example.features.feature[
'image/object/mask'].bytes_list.value
]
pil_masks = [
np.array(PIL.Image.open(encoded_mask_png))
for encoded_mask_png in encoded_mask_pngs
]
self.assertTrue(len(pil_masks) == 1)
self.assertAllEqual(pil_masks[0],
[[1, 1, 1, 0, 0, 0, 0, 0], [1, 1, 0, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0, 1, 1],
[0, 0, 0, 0, 0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 1, 1]])
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Convert raw KITTI detection dataset to TFRecord for object_detection.
Converts KITTI detection dataset to TFRecords with a standard format allowing
to use this dataset to train object detectors. The raw dataset can be
downloaded from:
http://kitti.is.tue.mpg.de/kitti/data_object_image_2.zip.
http://kitti.is.tue.mpg.de/kitti/data_object_label_2.zip
Permission can be requested at the main website.
KITTI detection dataset contains 7481 training images. Using this code with
the default settings will set aside the first 500 images as a validation set.
This can be altered using the flags, see details below.
Example usage:
python object_detection/dataset_tools/create_kitti_tf_record.py \
--data_dir=/home/user/kitti \
--output_path=/home/user/kitti.record
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import hashlib
import io
import os
import numpy as np
import PIL.Image as pil
import tensorflow as tf
from object_detection.utils import dataset_util
from object_detection.utils import label_map_util
from object_detection.utils.np_box_ops import iou
tf.app.flags.DEFINE_string('data_dir', '', 'Location of root directory for the '
'data. Folder structure is assumed to be:'
'<data_dir>/training/label_2 (annotations) and'
'<data_dir>/data_object_image_2/training/image_2'
'(images).')
tf.app.flags.DEFINE_string('output_path', '', 'Path to which TFRecord files'
'will be written. The TFRecord with the training set'
'will be located at: <output_path>_train.tfrecord.'
'And the TFRecord with the validation set will be'
'located at: <output_path>_val.tfrecord')
tf.app.flags.DEFINE_string('classes_to_use', 'car,pedestrian,dontcare',
'Comma separated list of class names that will be'
'used. Adding the dontcare class will remove all'
'bboxs in the dontcare regions.')
tf.app.flags.DEFINE_string('label_map_path', 'data/kitti_label_map.pbtxt',
'Path to label map proto.')
tf.app.flags.DEFINE_integer('validation_set_size', '500', 'Number of images to'
'be used as a validation set.')
FLAGS = tf.app.flags.FLAGS
def convert_kitti_to_tfrecords(data_dir, output_path, classes_to_use,
label_map_path, validation_set_size):
"""Convert the KITTI detection dataset to TFRecords.
Args:
data_dir: The full path to the unzipped folder containing the unzipped data
from data_object_image_2 and data_object_label_2.zip.
Folder structure is assumed to be: data_dir/training/label_2 (annotations)
and data_dir/data_object_image_2/training/image_2 (images).
output_path: The path to which TFRecord files will be written. The TFRecord
with the training set will be located at: <output_path>_train.tfrecord
And the TFRecord with the validation set will be located at:
<output_path>_val.tfrecord
classes_to_use: List of strings naming the classes for which data should be
converted. Use the same names as presented in the KIITI README file.
Adding dontcare class will remove all other bounding boxes that overlap
with areas marked as dontcare regions.
label_map_path: Path to label map proto
validation_set_size: How many images should be left as the validation set.
(Ffirst `validation_set_size` examples are selected to be in the
validation set).
"""
label_map_dict = label_map_util.get_label_map_dict(label_map_path)
train_count = 0
val_count = 0
annotation_dir = os.path.join(data_dir,
'training',
'label_2')
image_dir = os.path.join(data_dir,
'data_object_image_2',
'training',
'image_2')
train_writer = tf.python_io.TFRecordWriter('%s_train.tfrecord'%
output_path)
val_writer = tf.python_io.TFRecordWriter('%s_val.tfrecord'%
output_path)
images = sorted(tf.gfile.ListDirectory(image_dir))
for img_name in images:
img_num = int(img_name.split('.')[0])
is_validation_img = img_num < validation_set_size
img_anno = read_annotation_file(os.path.join(annotation_dir,
str(img_num).zfill(6)+'.txt'))
image_path = os.path.join(image_dir, img_name)
# Filter all bounding boxes of this frame that are of a legal class, and
# don't overlap with a dontcare region.
# TODO(talremez) filter out targets that are truncated or heavily occluded.
annotation_for_image = filter_annotations(img_anno, classes_to_use)
example = prepare_example(image_path, annotation_for_image, label_map_dict)
if is_validation_img:
val_writer.write(example.SerializeToString())
val_count += 1
else:
train_writer.write(example.SerializeToString())
train_count += 1
train_writer.close()
val_writer.close()
def prepare_example(image_path, annotations, label_map_dict):
"""Converts a dictionary with annotations for an image to tf.Example proto.
Args:
image_path: The complete path to image.
annotations: A dictionary representing the annotation of a single object
that appears in the image.
label_map_dict: A map from string label names to integer ids.
Returns:
example: The converted tf.Example.
"""
with tf.gfile.GFile(image_path, 'rb') as fid:
encoded_png = fid.read()
encoded_png_io = io.BytesIO(encoded_png)
image = pil.open(encoded_png_io)
image = np.asarray(image)
key = hashlib.sha256(encoded_png).hexdigest()
width = int(image.shape[1])
height = int(image.shape[0])
xmin_norm = annotations['2d_bbox_left'] / float(width)
ymin_norm = annotations['2d_bbox_top'] / float(height)
xmax_norm = annotations['2d_bbox_right'] / float(width)
ymax_norm = annotations['2d_bbox_bottom'] / float(height)
difficult_obj = [0]*len(xmin_norm)
example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(image_path.encode('utf8')),
'image/source_id': dataset_util.bytes_feature(image_path.encode('utf8')),
'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
'image/encoded': dataset_util.bytes_feature(encoded_png),
'image/format': dataset_util.bytes_feature('png'.encode('utf8')),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmin_norm),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmax_norm),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymin_norm),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymax_norm),
'image/object/class/text': dataset_util.bytes_list_feature(
[x.encode('utf8') for x in annotations['type']]),
'image/object/class/label': dataset_util.int64_list_feature(
[label_map_dict[x] for x in annotations['type']]),
'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
'image/object/truncated': dataset_util.float_list_feature(
annotations['truncated']),
'image/object/alpha': dataset_util.float_list_feature(
annotations['alpha']),
'image/object/3d_bbox/height': dataset_util.float_list_feature(
annotations['3d_bbox_height']),
'image/object/3d_bbox/width': dataset_util.float_list_feature(
annotations['3d_bbox_width']),
'image/object/3d_bbox/length': dataset_util.float_list_feature(
annotations['3d_bbox_length']),
'image/object/3d_bbox/x': dataset_util.float_list_feature(
annotations['3d_bbox_x']),
'image/object/3d_bbox/y': dataset_util.float_list_feature(
annotations['3d_bbox_y']),
'image/object/3d_bbox/z': dataset_util.float_list_feature(
annotations['3d_bbox_z']),
'image/object/3d_bbox/rot_y': dataset_util.float_list_feature(
annotations['3d_bbox_rot_y']),
}))
return example
def filter_annotations(img_all_annotations, used_classes):
"""Filters out annotations from the unused classes and dontcare regions.
Filters out the annotations that belong to classes we do now wish to use and
(optionally) also removes all boxes that overlap with dontcare regions.
Args:
img_all_annotations: A list of annotation dictionaries. See documentation of
read_annotation_file for more details about the format of the annotations.
used_classes: A list of strings listing the classes we want to keep, if the
list contains "dontcare", all bounding boxes with overlapping with dont
care regions will also be filtered out.
Returns:
img_filtered_annotations: A list of annotation dictionaries that have passed
the filtering.
"""
img_filtered_annotations = {}
# Filter the type of the objects.
relevant_annotation_indices = [
i for i, x in enumerate(img_all_annotations['type']) if x in used_classes
]
for key in img_all_annotations.keys():
img_filtered_annotations[key] = (
img_all_annotations[key][relevant_annotation_indices])
if 'dontcare' in used_classes:
dont_care_indices = [i for i,
x in enumerate(img_filtered_annotations['type'])
if x == 'dontcare']
# bounding box format [y_min, x_min, y_max, x_max]
all_boxes = np.stack([img_filtered_annotations['2d_bbox_top'],
img_filtered_annotations['2d_bbox_left'],
img_filtered_annotations['2d_bbox_bottom'],
img_filtered_annotations['2d_bbox_right']],
axis=1)
ious = iou(boxes1=all_boxes,
boxes2=all_boxes[dont_care_indices])
# Remove all bounding boxes that overlap with a dontcare region.
if ious.size > 0:
boxes_to_remove = np.amax(ious, axis=1) > 0.0
for key in img_all_annotations.keys():
img_filtered_annotations[key] = (
img_filtered_annotations[key][np.logical_not(boxes_to_remove)])
return img_filtered_annotations
def read_annotation_file(filename):
"""Reads a KITTI annotation file.
Converts a KITTI annotation file into a dictionary containing all the
relevant information.
Args:
filename: the path to the annotataion text file.
Returns:
anno: A dictionary with the converted annotation information. See annotation
README file for details on the different fields.
"""
with open(filename) as f:
content = f.readlines()
content = [x.strip().split(' ') for x in content]
anno = {}
anno['type'] = np.array([x[0].lower() for x in content])
anno['truncated'] = np.array([float(x[1]) for x in content])
anno['occluded'] = np.array([int(x[2]) for x in content])
anno['alpha'] = np.array([float(x[3]) for x in content])
anno['2d_bbox_left'] = np.array([float(x[4]) for x in content])
anno['2d_bbox_top'] = np.array([float(x[5]) for x in content])
anno['2d_bbox_right'] = np.array([float(x[6]) for x in content])
anno['2d_bbox_bottom'] = np.array([float(x[7]) for x in content])
anno['3d_bbox_height'] = np.array([float(x[8]) for x in content])
anno['3d_bbox_width'] = np.array([float(x[9]) for x in content])
anno['3d_bbox_length'] = np.array([float(x[10]) for x in content])
anno['3d_bbox_x'] = np.array([float(x[11]) for x in content])
anno['3d_bbox_y'] = np.array([float(x[12]) for x in content])
anno['3d_bbox_z'] = np.array([float(x[13]) for x in content])
anno['3d_bbox_rot_y'] = np.array([float(x[14]) for x in content])
return anno
def main(_):
convert_kitti_to_tfrecords(
data_dir=FLAGS.data_dir,
output_path=FLAGS.output_path,
classes_to_use=FLAGS.classes_to_use.split(','),
label_map_path=FLAGS.label_map_path,
validation_set_size=FLAGS.validation_set_size)
if __name__ == '__main__':
tf.app.run()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Test for create_kitti_tf_record.py."""
import os
import numpy as np
import PIL.Image
import tensorflow as tf
from object_detection.dataset_tools import create_kitti_tf_record
class CreateKittiTFRecordTest(tf.test.TestCase):
def _assertProtoEqual(self, proto_field, expectation):
"""Helper function to assert if a proto field equals some value.
Args:
proto_field: The protobuf field to compare.
expectation: The expected value of the protobuf field.
"""
proto_list = [p for p in proto_field]
self.assertListEqual(proto_list, expectation)
def test_dict_to_tf_example(self):
image_file_name = 'tmp_image.jpg'
image_data = np.random.rand(256, 256, 3)
save_path = os.path.join(self.get_temp_dir(), image_file_name)
image = PIL.Image.fromarray(image_data, 'RGB')
image.save(save_path)
annotations = {}
annotations['2d_bbox_left'] = np.array([64])
annotations['2d_bbox_top'] = np.array([64])
annotations['2d_bbox_right'] = np.array([192])
annotations['2d_bbox_bottom'] = np.array([192])
annotations['type'] = ['car']
annotations['truncated'] = np.array([1])
annotations['alpha'] = np.array([2])
annotations['3d_bbox_height'] = np.array([10])
annotations['3d_bbox_width'] = np.array([11])
annotations['3d_bbox_length'] = np.array([12])
annotations['3d_bbox_x'] = np.array([13])
annotations['3d_bbox_y'] = np.array([14])
annotations['3d_bbox_z'] = np.array([15])
annotations['3d_bbox_rot_y'] = np.array([4])
label_map_dict = {
'background': 0,
'car': 1,
}
example = create_kitti_tf_record.prepare_example(
save_path,
annotations,
label_map_dict)
self._assertProtoEqual(
example.features.feature['image/height'].int64_list.value, [256])
self._assertProtoEqual(
example.features.feature['image/width'].int64_list.value, [256])
self._assertProtoEqual(
example.features.feature['image/filename'].bytes_list.value,
[save_path])
self._assertProtoEqual(
example.features.feature['image/source_id'].bytes_list.value,
[save_path])
self._assertProtoEqual(
example.features.feature['image/format'].bytes_list.value, ['png'])
self._assertProtoEqual(
example.features.feature['image/object/bbox/xmin'].float_list.value,
[0.25])
self._assertProtoEqual(
example.features.feature['image/object/bbox/ymin'].float_list.value,
[0.25])
self._assertProtoEqual(
example.features.feature['image/object/bbox/xmax'].float_list.value,
[0.75])
self._assertProtoEqual(
example.features.feature['image/object/bbox/ymax'].float_list.value,
[0.75])
self._assertProtoEqual(
example.features.feature['image/object/class/text'].bytes_list.value,
['car'])
self._assertProtoEqual(
example.features.feature['image/object/class/label'].int64_list.value,
[1])
self._assertProtoEqual(
example.features.feature['image/object/truncated'].float_list.value,
[1])
self._assertProtoEqual(
example.features.feature['image/object/alpha'].float_list.value,
[2])
self._assertProtoEqual(example.features.feature[
'image/object/3d_bbox/height'].float_list.value, [10])
self._assertProtoEqual(
example.features.feature['image/object/3d_bbox/width'].float_list.value,
[11])
self._assertProtoEqual(example.features.feature[
'image/object/3d_bbox/length'].float_list.value, [12])
self._assertProtoEqual(
example.features.feature['image/object/3d_bbox/x'].float_list.value,
[13])
self._assertProtoEqual(
example.features.feature['image/object/3d_bbox/y'].float_list.value,
[14])
self._assertProtoEqual(
example.features.feature['image/object/3d_bbox/z'].float_list.value,
[15])
self._assertProtoEqual(
example.features.feature['image/object/3d_bbox/rot_y'].float_list.value,
[4])
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Creates TFRecords of Open Images dataset for object detection.
Example usage:
python object_detection/dataset_tools/create_oid_tf_record.py \
--input_box_annotations_csv=/path/to/input/annotations-human-bbox.csv \
--input_image_label_annotations_csv=/path/to/input/annotations-label.csv \
--input_images_directory=/path/to/input/image_pixels_directory \
--input_label_map=/path/to/input/labels_bbox_545.labelmap \
--output_tf_record_path_prefix=/path/to/output/prefix.tfrecord
CSVs with bounding box annotations and image metadata (including the image URLs)
can be downloaded from the Open Images GitHub repository:
https://github.com/openimages/dataset
This script will include every image found in the input_images_directory in the
output TFRecord, even if the image has no corresponding bounding box annotations
in the input_annotations_csv. If input_image_label_annotations_csv is specified,
it will add image-level labels as well. Note that the information of whether a
label is positivelly or negativelly verified is NOT added to tfrecord.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import contextlib2
import pandas as pd
import tensorflow as tf
from object_detection.dataset_tools import oid_tfrecord_creation
from object_detection.dataset_tools import tf_record_creation_util
from object_detection.utils import label_map_util
tf.flags.DEFINE_string('input_box_annotations_csv', None,
'Path to CSV containing image bounding box annotations')
tf.flags.DEFINE_string('input_images_directory', None,
'Directory containing the image pixels '
'downloaded from the OpenImages GitHub repository.')
tf.flags.DEFINE_string('input_image_label_annotations_csv', None,
'Path to CSV containing image-level labels annotations')
tf.flags.DEFINE_string('input_label_map', None, 'Path to the label map proto')
tf.flags.DEFINE_string(
'output_tf_record_path_prefix', None,
'Path to the output TFRecord. The shard index and the number of shards '
'will be appended for each output shard.')
tf.flags.DEFINE_integer('num_shards', 100, 'Number of TFRecord shards')
FLAGS = tf.flags.FLAGS
def main(_):
tf.logging.set_verbosity(tf.logging.INFO)
required_flags = [
'input_box_annotations_csv', 'input_images_directory', 'input_label_map',
'output_tf_record_path_prefix'
]
for flag_name in required_flags:
if not getattr(FLAGS, flag_name):
raise ValueError('Flag --{} is required'.format(flag_name))
label_map = label_map_util.get_label_map_dict(FLAGS.input_label_map)
all_box_annotations = pd.read_csv(FLAGS.input_box_annotations_csv)
if FLAGS.input_image_label_annotations_csv:
all_label_annotations = pd.read_csv(FLAGS.input_image_label_annotations_csv)
all_label_annotations.rename(
columns={'Confidence': 'ConfidenceImageLabel'}, inplace=True)
else:
all_label_annotations = None
all_images = tf.gfile.Glob(
os.path.join(FLAGS.input_images_directory, '*.jpg'))
all_image_ids = [os.path.splitext(os.path.basename(v))[0] for v in all_images]
all_image_ids = pd.DataFrame({'ImageID': all_image_ids})
all_annotations = pd.concat(
[all_box_annotations, all_image_ids, all_label_annotations])
tf.logging.log(tf.logging.INFO, 'Found %d images...', len(all_image_ids))
with contextlib2.ExitStack() as tf_record_close_stack:
output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
tf_record_close_stack, FLAGS.output_tf_record_path_prefix,
FLAGS.num_shards)
for counter, image_data in enumerate(all_annotations.groupby('ImageID')):
tf.logging.log_every_n(tf.logging.INFO, 'Processed %d images...', 1000,
counter)
image_id, image_annotations = image_data
# In OID image file names are formed by appending ".jpg" to the image ID.
image_path = os.path.join(FLAGS.input_images_directory, image_id + '.jpg')
with tf.gfile.Open(image_path) as image_file:
encoded_image = image_file.read()
tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame(
image_annotations, label_map, encoded_image)
if tf_example:
shard_idx = int(image_id, 16) % FLAGS.num_shards
output_tfrecords[shard_idx].write(tf_example.SerializeToString())
if __name__ == '__main__':
tf.app.run()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Convert raw PASCAL dataset to TFRecord for object_detection.
Example usage:
python object_detection/dataset_tools/create_pascal_tf_record.py \
--data_dir=/home/user/VOCdevkit \
--year=VOC2012 \
--output_path=/home/user/pascal.record
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import hashlib
import io
import logging
import os
from lxml import etree
import PIL.Image
import tensorflow as tf
from object_detection.utils import dataset_util
from object_detection.utils import label_map_util
flags = tf.app.flags
flags.DEFINE_string('data_dir', '', 'Root directory to raw PASCAL VOC dataset.')
flags.DEFINE_string('set', 'train', 'Convert training set, validation set or '
'merged set.')
flags.DEFINE_string('annotations_dir', 'Annotations',
'(Relative) path to annotations directory.')
flags.DEFINE_string('year', 'VOC2007', 'Desired challenge year.')
flags.DEFINE_string('output_path', '', 'Path to output TFRecord')
flags.DEFINE_string('label_map_path', 'data/pascal_label_map.pbtxt',
'Path to label map proto')
flags.DEFINE_boolean('ignore_difficult_instances', False, 'Whether to ignore '
'difficult instances')
FLAGS = flags.FLAGS
SETS = ['train', 'val', 'trainval', 'test']
YEARS = ['VOC2007', 'VOC2012', 'merged']
def dict_to_tf_example(data,
dataset_directory,
label_map_dict,
ignore_difficult_instances=False,
image_subdirectory='JPEGImages'):
"""Convert XML derived dict to tf.Example proto.
Notice that this function normalizes the bounding box coordinates provided
by the raw data.
Args:
data: dict holding PASCAL XML fields for a single image (obtained by
running dataset_util.recursive_parse_xml_to_dict)
dataset_directory: Path to root directory holding PASCAL dataset
label_map_dict: A map from string label names to integers ids.
ignore_difficult_instances: Whether to skip difficult instances in the
dataset (default: False).
image_subdirectory: String specifying subdirectory within the
PASCAL dataset directory holding the actual image data.
Returns:
example: The converted tf.Example.
Raises:
ValueError: if the image pointed to by data['filename'] is not a valid JPEG
"""
img_path = os.path.join(data['folder'], image_subdirectory, data['filename'])
full_path = os.path.join(dataset_directory, img_path)
with tf.gfile.GFile(full_path, 'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = PIL.Image.open(encoded_jpg_io)
if image.format != 'JPEG':
raise ValueError('Image format not JPEG')
key = hashlib.sha256(encoded_jpg).hexdigest()
width = int(data['size']['width'])
height = int(data['size']['height'])
xmin = []
ymin = []
xmax = []
ymax = []
classes = []
classes_text = []
truncated = []
poses = []
difficult_obj = []
if 'object' in data:
for obj in data['object']:
difficult = bool(int(obj['difficult']))
if ignore_difficult_instances and difficult:
continue
difficult_obj.append(int(difficult))
xmin.append(float(obj['bndbox']['xmin']) / width)
ymin.append(float(obj['bndbox']['ymin']) / height)
xmax.append(float(obj['bndbox']['xmax']) / width)
ymax.append(float(obj['bndbox']['ymax']) / height)
classes_text.append(obj['name'].encode('utf8'))
classes.append(label_map_dict[obj['name']])
truncated.append(int(obj['truncated']))
poses.append(obj['pose'].encode('utf8'))
example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(
data['filename'].encode('utf8')),
'image/source_id': dataset_util.bytes_feature(
data['filename'].encode('utf8')),
'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
'image/encoded': dataset_util.bytes_feature(encoded_jpg),
'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
'image/object/truncated': dataset_util.int64_list_feature(truncated),
'image/object/view': dataset_util.bytes_list_feature(poses),
}))
return example
def main(_):
if FLAGS.set not in SETS:
raise ValueError('set must be in : {}'.format(SETS))
if FLAGS.year not in YEARS:
raise ValueError('year must be in : {}'.format(YEARS))
data_dir = FLAGS.data_dir
years = ['VOC2007', 'VOC2012']
if FLAGS.year != 'merged':
years = [FLAGS.year]
writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)
for year in years:
logging.info('Reading from PASCAL %s dataset.', year)
examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main',
'aeroplane_' + FLAGS.set + '.txt')
annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir)
examples_list = dataset_util.read_examples_list(examples_path)
for idx, example in enumerate(examples_list):
if idx % 100 == 0:
logging.info('On image %d of %d', idx, len(examples_list))
path = os.path.join(annotations_dir, example + '.xml')
with tf.gfile.GFile(path, 'r') as fid:
xml_str = fid.read()
xml = etree.fromstring(xml_str)
data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict,
FLAGS.ignore_difficult_instances)
writer.write(tf_example.SerializeToString())
writer.close()
if __name__ == '__main__':
tf.app.run()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Test for create_pascal_tf_record.py."""
import os
import numpy as np
import PIL.Image
import tensorflow as tf
from object_detection.dataset_tools import create_pascal_tf_record
class CreatePascalTFRecordTest(tf.test.TestCase):
def _assertProtoEqual(self, proto_field, expectation):
"""Helper function to assert if a proto field equals some value.
Args:
proto_field: The protobuf field to compare.
expectation: The expected value of the protobuf field.
"""
proto_list = [p for p in proto_field]
self.assertListEqual(proto_list, expectation)
def test_dict_to_tf_example(self):
image_file_name = 'tmp_image.jpg'
image_data = np.random.rand(256, 256, 3)
save_path = os.path.join(self.get_temp_dir(), image_file_name)
image = PIL.Image.fromarray(image_data, 'RGB')
image.save(save_path)
data = {
'folder': '',
'filename': image_file_name,
'size': {
'height': 256,
'width': 256,
},
'object': [
{
'difficult': 1,
'bndbox': {
'xmin': 64,
'ymin': 64,
'xmax': 192,
'ymax': 192,
},
'name': 'person',
'truncated': 0,
'pose': '',
},
],
}
label_map_dict = {
'background': 0,
'person': 1,
'notperson': 2,
}
example = create_pascal_tf_record.dict_to_tf_example(
data, self.get_temp_dir(), label_map_dict, image_subdirectory='')
self._assertProtoEqual(
example.features.feature['image/height'].int64_list.value, [256])
self._assertProtoEqual(
example.features.feature['image/width'].int64_list.value, [256])
self._assertProtoEqual(
example.features.feature['image/filename'].bytes_list.value,
[image_file_name])
self._assertProtoEqual(
example.features.feature['image/source_id'].bytes_list.value,
[image_file_name])
self._assertProtoEqual(
example.features.feature['image/format'].bytes_list.value, ['jpeg'])
self._assertProtoEqual(
example.features.feature['image/object/bbox/xmin'].float_list.value,
[0.25])
self._assertProtoEqual(
example.features.feature['image/object/bbox/ymin'].float_list.value,
[0.25])
self._assertProtoEqual(
example.features.feature['image/object/bbox/xmax'].float_list.value,
[0.75])
self._assertProtoEqual(
example.features.feature['image/object/bbox/ymax'].float_list.value,
[0.75])
self._assertProtoEqual(
example.features.feature['image/object/class/text'].bytes_list.value,
['person'])
self._assertProtoEqual(
example.features.feature['image/object/class/label'].int64_list.value,
[1])
self._assertProtoEqual(
example.features.feature['image/object/difficult'].int64_list.value,
[1])
self._assertProtoEqual(
example.features.feature['image/object/truncated'].int64_list.value,
[0])
self._assertProtoEqual(
example.features.feature['image/object/view'].bytes_list.value, [''])
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Convert the Oxford pet dataset to TFRecord for object_detection.
See: O. M. Parkhi, A. Vedaldi, A. Zisserman, C. V. Jawahar
Cats and Dogs
IEEE Conference on Computer Vision and Pattern Recognition, 2012
http://www.robots.ox.ac.uk/~vgg/data/pets/
Example usage:
python object_detection/dataset_tools/create_pet_tf_record.py \
--data_dir=/home/user/pet \
--output_dir=/home/user/pet/output
"""
import hashlib
import io
import logging
import os
import random
import re
import contextlib2
from lxml import etree
import numpy as np
import PIL.Image
import tensorflow as tf
from object_detection.dataset_tools import tf_record_creation_util
from object_detection.utils import dataset_util
from object_detection.utils import label_map_util
flags = tf.app.flags
flags.DEFINE_string('data_dir', '', 'Root directory to raw pet dataset.')
flags.DEFINE_string('output_dir', '', 'Path to directory to output TFRecords.')
flags.DEFINE_string('label_map_path', 'data/pet_label_map.pbtxt',
'Path to label map proto')
flags.DEFINE_boolean('faces_only', True, 'If True, generates bounding boxes '
'for pet faces. Otherwise generates bounding boxes (as '
'well as segmentations for full pet bodies). Note that '
'in the latter case, the resulting files are much larger.')
flags.DEFINE_string('mask_type', 'png', 'How to represent instance '
'segmentation masks. Options are "png" or "numerical".')
flags.DEFINE_integer('num_shards', 10, 'Number of TFRecord shards')
FLAGS = flags.FLAGS
def get_class_name_from_filename(file_name):
"""Gets the class name from a file.
Args:
file_name: The file name to get the class name from.
ie. "american_pit_bull_terrier_105.jpg"
Returns:
A string of the class name.
"""
match = re.match(r'([A-Za-z_]+)(_[0-9]+\.jpg)', file_name, re.I)
return match.groups()[0]
def dict_to_tf_example(data,
mask_path,
label_map_dict,
image_subdirectory,
ignore_difficult_instances=False,
faces_only=True,
mask_type='png'):
"""Convert XML derived dict to tf.Example proto.
Notice that this function normalizes the bounding box coordinates provided
by the raw data.
Args:
data: dict holding PASCAL XML fields for a single image (obtained by
running dataset_util.recursive_parse_xml_to_dict)
mask_path: String path to PNG encoded mask.
label_map_dict: A map from string label names to integers ids.
image_subdirectory: String specifying subdirectory within the
Pascal dataset directory holding the actual image data.
ignore_difficult_instances: Whether to skip difficult instances in the
dataset (default: False).
faces_only: If True, generates bounding boxes for pet faces. Otherwise
generates bounding boxes (as well as segmentations for full pet bodies).
mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to
smaller file sizes.
Returns:
example: The converted tf.Example.
Raises:
ValueError: if the image pointed to by data['filename'] is not a valid JPEG
"""
img_path = os.path.join(image_subdirectory, data['filename'])
with tf.gfile.GFile(img_path, 'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = PIL.Image.open(encoded_jpg_io)
if image.format != 'JPEG':
raise ValueError('Image format not JPEG')
key = hashlib.sha256(encoded_jpg).hexdigest()
with tf.gfile.GFile(mask_path, 'rb') as fid:
encoded_mask_png = fid.read()
encoded_png_io = io.BytesIO(encoded_mask_png)
mask = PIL.Image.open(encoded_png_io)
if mask.format != 'PNG':
raise ValueError('Mask format not PNG')
mask_np = np.asarray(mask)
nonbackground_indices_x = np.any(mask_np != 2, axis=0)
nonbackground_indices_y = np.any(mask_np != 2, axis=1)
nonzero_x_indices = np.where(nonbackground_indices_x)
nonzero_y_indices = np.where(nonbackground_indices_y)
width = int(data['size']['width'])
height = int(data['size']['height'])
xmins = []
ymins = []
xmaxs = []
ymaxs = []
classes = []
classes_text = []
truncated = []
poses = []
difficult_obj = []
masks = []
if 'object' in data:
for obj in data['object']:
difficult = bool(int(obj['difficult']))
if ignore_difficult_instances and difficult:
continue
difficult_obj.append(int(difficult))
if faces_only:
xmin = float(obj['bndbox']['xmin'])
xmax = float(obj['bndbox']['xmax'])
ymin = float(obj['bndbox']['ymin'])
ymax = float(obj['bndbox']['ymax'])
else:
xmin = float(np.min(nonzero_x_indices))
xmax = float(np.max(nonzero_x_indices))
ymin = float(np.min(nonzero_y_indices))
ymax = float(np.max(nonzero_y_indices))
xmins.append(xmin / width)
ymins.append(ymin / height)
xmaxs.append(xmax / width)
ymaxs.append(ymax / height)
class_name = get_class_name_from_filename(data['filename'])
classes_text.append(class_name.encode('utf8'))
classes.append(label_map_dict[class_name])
truncated.append(int(obj['truncated']))
poses.append(obj['pose'].encode('utf8'))
if not faces_only:
mask_remapped = (mask_np != 2).astype(np.uint8)
masks.append(mask_remapped)
feature_dict = {
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(
data['filename'].encode('utf8')),
'image/source_id': dataset_util.bytes_feature(
data['filename'].encode('utf8')),
'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
'image/encoded': dataset_util.bytes_feature(encoded_jpg),
'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
'image/object/truncated': dataset_util.int64_list_feature(truncated),
'image/object/view': dataset_util.bytes_list_feature(poses),
}
if not faces_only:
if mask_type == 'numerical':
mask_stack = np.stack(masks).astype(np.float32)
masks_flattened = np.reshape(mask_stack, [-1])
feature_dict['image/object/mask'] = (
dataset_util.float_list_feature(masks_flattened.tolist()))
elif mask_type == 'png':
encoded_mask_png_list = []
for mask in masks:
img = PIL.Image.fromarray(mask)
output = io.BytesIO()
img.save(output, format='PNG')
encoded_mask_png_list.append(output.getvalue())
feature_dict['image/object/mask'] = (
dataset_util.bytes_list_feature(encoded_mask_png_list))
example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
return example
def create_tf_record(output_filename,
num_shards,
label_map_dict,
annotations_dir,
image_dir,
examples,
faces_only=True,
mask_type='png'):
"""Creates a TFRecord file from examples.
Args:
output_filename: Path to where output file is saved.
num_shards: Number of shards for output file.
label_map_dict: The label map dictionary.
annotations_dir: Directory where annotation files are stored.
image_dir: Directory where image files are stored.
examples: Examples to parse and save to tf record.
faces_only: If True, generates bounding boxes for pet faces. Otherwise
generates bounding boxes (as well as segmentations for full pet bodies).
mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to
smaller file sizes.
"""
with contextlib2.ExitStack() as tf_record_close_stack:
output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
tf_record_close_stack, output_filename, num_shards)
for idx, example in enumerate(examples):
if idx % 100 == 0:
logging.info('On image %d of %d', idx, len(examples))
xml_path = os.path.join(annotations_dir, 'xmls', example + '.xml')
mask_path = os.path.join(annotations_dir, 'trimaps', example + '.png')
if not os.path.exists(xml_path):
logging.warning('Could not find %s, ignoring example.', xml_path)
continue
with tf.gfile.GFile(xml_path, 'r') as fid:
xml_str = fid.read()
xml = etree.fromstring(xml_str)
data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
try:
tf_example = dict_to_tf_example(
data,
mask_path,
label_map_dict,
image_dir,
faces_only=faces_only,
mask_type=mask_type)
if tf_example:
shard_idx = idx % num_shards
output_tfrecords[shard_idx].write(tf_example.SerializeToString())
except ValueError:
logging.warning('Invalid example: %s, ignoring.', xml_path)
# TODO(derekjchow): Add test for pet/PASCAL main files.
def main(_):
data_dir = FLAGS.data_dir
label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)
logging.info('Reading from Pet dataset.')
image_dir = os.path.join(data_dir, 'images')
annotations_dir = os.path.join(data_dir, 'annotations')
examples_path = os.path.join(annotations_dir, 'trainval.txt')
examples_list = dataset_util.read_examples_list(examples_path)
# Test images are not included in the downloaded data set, so we shall perform
# our own split.
random.seed(42)
random.shuffle(examples_list)
num_examples = len(examples_list)
num_train = int(0.7 * num_examples)
train_examples = examples_list[:num_train]
val_examples = examples_list[num_train:]
logging.info('%d training and %d validation examples.',
len(train_examples), len(val_examples))
train_output_path = os.path.join(FLAGS.output_dir, 'pet_faces_train.record')
val_output_path = os.path.join(FLAGS.output_dir, 'pet_faces_val.record')
if not FLAGS.faces_only:
train_output_path = os.path.join(FLAGS.output_dir,
'pets_fullbody_with_masks_train.record')
val_output_path = os.path.join(FLAGS.output_dir,
'pets_fullbody_with_masks_val.record')
create_tf_record(
train_output_path,
FLAGS.num_shards,
label_map_dict,
annotations_dir,
image_dir,
train_examples,
faces_only=FLAGS.faces_only,
mask_type=FLAGS.mask_type)
create_tf_record(
val_output_path,
FLAGS.num_shards,
label_map_dict,
annotations_dir,
image_dir,
val_examples,
faces_only=FLAGS.faces_only,
mask_type=FLAGS.mask_type)
if __name__ == '__main__':
tf.app.run()
#!/bin/bash
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# Script to download and preprocess the MSCOCO data set for detection.
#
# The outputs of this script are TFRecord files containing serialized
# tf.Example protocol buffers. See create_coco_tf_record.py for details of how
# the tf.Example protocol buffers are constructed and see
# http://cocodataset.org/#overview for an overview of the dataset.
#
# usage:
# bash object_detection/dataset_tools/download_and_preprocess_mscoco.sh \
# /tmp/mscoco
set -e
if [ -z "$1" ]; then
echo "usage download_and_preprocess_mscoco.sh [data dir]"
exit
fi
if [ "$(uname)" == "Darwin" ]; then
UNZIP="tar -xf"
else
UNZIP="unzip -nq"
fi
# Create the output directories.
OUTPUT_DIR="${1%/}"
SCRATCH_DIR="${OUTPUT_DIR}/raw-data"
mkdir -p "${OUTPUT_DIR}"
mkdir -p "${SCRATCH_DIR}"
CURRENT_DIR=$(pwd)
# Helper function to download and unpack a .zip file.
function download_and_unzip() {
local BASE_URL=${1}
local FILENAME=${2}
if [ ! -f ${FILENAME} ]; then
echo "Downloading ${FILENAME} to $(pwd)"
wget -nd -c "${BASE_URL}/${FILENAME}"
else
echo "Skipping download of ${FILENAME}"
fi
echo "Unzipping ${FILENAME}"
${UNZIP} ${FILENAME}
}
cd ${SCRATCH_DIR}
# Download the images.
BASE_IMAGE_URL="http://images.cocodataset.org/zips"
TRAIN_IMAGE_FILE="train2017.zip"
download_and_unzip ${BASE_IMAGE_URL} ${TRAIN_IMAGE_FILE}
TRAIN_IMAGE_DIR="${SCRATCH_DIR}/train2017"
VAL_IMAGE_FILE="val2017.zip"
download_and_unzip ${BASE_IMAGE_URL} ${VAL_IMAGE_FILE}
VAL_IMAGE_DIR="${SCRATCH_DIR}/val2017"
TEST_IMAGE_FILE="test2017.zip"
download_and_unzip ${BASE_IMAGE_URL} ${TEST_IMAGE_FILE}
TEST_IMAGE_DIR="${SCRATCH_DIR}/test2017"
# Download the annotations.
BASE_INSTANCES_URL="http://images.cocodataset.org/annotations"
INSTANCES_FILE="annotations_trainval2017.zip"
download_and_unzip ${BASE_INSTANCES_URL} ${INSTANCES_FILE}
TRAIN_ANNOTATIONS_FILE="${SCRATCH_DIR}/annotations/instances_train2017.json"
VAL_ANNOTATIONS_FILE="${SCRATCH_DIR}/annotations/instances_val2017.json"
# Download the test image info.
BASE_IMAGE_INFO_URL="http://images.cocodataset.org/annotations"
IMAGE_INFO_FILE="image_info_test2017.zip"
download_and_unzip ${BASE_IMAGE_INFO_URL} ${IMAGE_INFO_FILE}
TESTDEV_ANNOTATIONS_FILE="${SCRATCH_DIR}/annotations/image_info_test-dev2017.json"
# Build TFRecords of the image data.
cd "${CURRENT_DIR}"
python object_detection/dataset_tools/create_coco_tf_record.py \
--logtostderr \
--include_masks \
--train_image_dir="${TRAIN_IMAGE_DIR}" \
--val_image_dir="${VAL_IMAGE_DIR}" \
--test_image_dir="${TEST_IMAGE_DIR}" \
--train_annotations_file="${TRAIN_ANNOTATIONS_FILE}" \
--val_annotations_file="${VAL_ANNOTATIONS_FILE}" \
--testdev_annotations_file="${TESTDEV_ANNOTATIONS_FILE}" \
--output_dir="${OUTPUT_DIR}"
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A class and executable to expand hierarchically image-level labels and boxes.
Example usage:
./hierarchical_labels_expansion <path to JSON hierarchy> <input csv file>
<output csv file> [optional]labels_file
"""
import json
import sys
def _update_dict(initial_dict, update):
"""Updates dictionary with update content.
Args:
initial_dict: initial dictionary.
update: updated dictionary.
"""
for key, value_list in update.iteritems():
if key in initial_dict:
initial_dict[key].extend(value_list)
else:
initial_dict[key] = value_list
def _build_plain_hierarchy(hierarchy, skip_root=False):
"""Expands tree hierarchy representation to parent-child dictionary.
Args:
hierarchy: labels hierarchy as JSON file.
skip_root: if true skips root from the processing (done for the case when all
classes under hierarchy are collected under virtual node).
Returns:
keyed_parent - dictionary of parent - all its children nodes.
keyed_child - dictionary of children - all its parent nodes
children - all children of the current node.
"""
all_children = []
all_keyed_parent = {}
all_keyed_child = {}
if 'Subcategory' in hierarchy:
for node in hierarchy['Subcategory']:
keyed_parent, keyed_child, children = _build_plain_hierarchy(node)
# Update is not done through dict.update() since some children have multi-
# ple parents in the hiearchy.
_update_dict(all_keyed_parent, keyed_parent)
_update_dict(all_keyed_child, keyed_child)
all_children.extend(children)
if not skip_root:
all_keyed_parent[hierarchy['LabelName']] = all_children
all_children = [hierarchy['LabelName']] + all_children
for child, _ in all_keyed_child.iteritems():
all_keyed_child[child].append(hierarchy['LabelName'])
all_keyed_child[hierarchy['LabelName']] = []
return all_keyed_parent, all_keyed_child, all_children
class OIDHierarchicalLabelsExpansion(object):
""" Main class to perform labels hierachical expansion."""
def __init__(self, hierarchy):
"""Constructor.
Args:
hierarchy: labels hierarchy as JSON file.
"""
self._hierarchy_keyed_parent, self._hierarchy_keyed_child, _ = (
_build_plain_hierarchy(hierarchy, skip_root=True))
def expand_boxes_from_csv(self, csv_row):
"""Expands a row containing bounding boxes from CSV file.
Args:
csv_row: a single row of Open Images released groundtruth file.
Returns:
a list of strings (including the initial row) corresponding to the ground
truth expanded to multiple annotation for evaluation with Open Images
Challenge 2018 metric.
"""
# Row header is expected to be exactly:
# ImageID,Source,LabelName,Confidence,XMin,XMax,YMin,YMax,IsOccluded,
# IsTruncated,IsGroupOf,IsDepiction,IsInside
cvs_row_splited = csv_row.split(',')
assert len(cvs_row_splited) == 13
result = [csv_row]
assert cvs_row_splited[2] in self._hierarchy_keyed_child
parent_nodes = self._hierarchy_keyed_child[cvs_row_splited[2]]
for parent_node in parent_nodes:
cvs_row_splited[2] = parent_node
result.append(','.join(cvs_row_splited))
return result
def expand_labels_from_csv(self, csv_row):
"""Expands a row containing bounding boxes from CSV file.
Args:
csv_row: a single row of Open Images released groundtruth file.
Returns:
a list of strings (including the initial row) corresponding to the ground
truth expanded to multiple annotation for evaluation with Open Images
Challenge 2018 metric.
"""
# Row header is expected to be exactly:
# ImageID,Source,LabelName,Confidence
cvs_row_splited = csv_row.split(',')
assert len(cvs_row_splited) == 4
result = [csv_row]
if int(cvs_row_splited[3]) == 1:
assert cvs_row_splited[2] in self._hierarchy_keyed_child
parent_nodes = self._hierarchy_keyed_child[cvs_row_splited[2]]
for parent_node in parent_nodes:
cvs_row_splited[2] = parent_node
result.append(','.join(cvs_row_splited))
else:
assert cvs_row_splited[2] in self._hierarchy_keyed_parent
child_nodes = self._hierarchy_keyed_parent[cvs_row_splited[2]]
for child_node in child_nodes:
cvs_row_splited[2] = child_node
result.append(','.join(cvs_row_splited))
return result
def main(argv):
if len(argv) < 4:
print """Missing arguments. \n
Usage: ./hierarchical_labels_expansion <path to JSON hierarchy>
<input csv file> <output csv file> [optional]labels_file"""
return
with open(argv[1]) as f:
hierarchy = json.load(f)
expansion_generator = OIDHierarchicalLabelsExpansion(hierarchy)
labels_file = False
if len(argv) > 4 and argv[4] == 'labels_file':
labels_file = True
with open(argv[2], 'r') as source:
with open(argv[3], 'w') as target:
header_skipped = False
for line in source:
if not header_skipped:
header_skipped = True
continue
if labels_file:
expanded_lines = expansion_generator.expand_labels_from_csv(line)
else:
expanded_lines = expansion_generator.expand_boxes_from_csv(line)
target.writelines(expanded_lines)
if __name__ == '__main__':
main(sys.argv)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for the OpenImages label expansion (OIDHierarchicalLabelsExpansion)."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from object_detection.dataset_tools import oid_hierarchical_labels_expansion
def create_test_data():
hierarchy = {
'LabelName':
'a',
'Subcategory': [{
'LabelName': 'b'
}, {
'LabelName': 'c',
'Subcategory': [{
'LabelName': 'd'
}, {
'LabelName': 'e'
}]
}, {
'LabelName': 'f',
'Subcategory': [{
'LabelName': 'd'
},]
}]
}
bbox_rows = [
'123,xclick,b,1,0.1,0.2,0.1,0.2,1,1,0,0,0',
'123,xclick,d,1,0.2,0.3,0.1,0.2,1,1,0,0,0'
]
label_rows = [
'123,verification,b,0', '123,verification,c,0', '124,verification,d,1'
]
return hierarchy, bbox_rows, label_rows
class HierarchicalLabelsExpansionTest(tf.test.TestCase):
def test_bbox_expansion(self):
hierarchy, bbox_rows, _ = create_test_data()
expansion_generator = (
oid_hierarchical_labels_expansion.OIDHierarchicalLabelsExpansion(
hierarchy))
all_result_rows = []
for row in bbox_rows:
all_result_rows.extend(expansion_generator.expand_boxes_from_csv(row))
self.assertItemsEqual([
'123,xclick,b,1,0.1,0.2,0.1,0.2,1,1,0,0,0',
'123,xclick,d,1,0.2,0.3,0.1,0.2,1,1,0,0,0',
'123,xclick,f,1,0.2,0.3,0.1,0.2,1,1,0,0,0',
'123,xclick,c,1,0.2,0.3,0.1,0.2,1,1,0,0,0'
], all_result_rows)
def test_labels_expansion(self):
hierarchy, _, label_rows = create_test_data()
expansion_generator = (
oid_hierarchical_labels_expansion.OIDHierarchicalLabelsExpansion(
hierarchy))
all_result_rows = []
for row in label_rows:
all_result_rows.extend(expansion_generator.expand_labels_from_csv(row))
self.assertItemsEqual([
'123,verification,b,0', '123,verification,c,0', '123,verification,d,0',
'123,verification,e,0', '124,verification,d,1', '124,verification,f,1',
'124,verification,c,1'
], all_result_rows)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Utilities for creating TFRecords of TF examples for the Open Images dataset.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from object_detection.core import standard_fields
from object_detection.utils import dataset_util
def tf_example_from_annotations_data_frame(annotations_data_frame, label_map,
encoded_image):
"""Populates a TF Example message with image annotations from a data frame.
Args:
annotations_data_frame: Data frame containing the annotations for a single
image.
label_map: String to integer label map.
encoded_image: The encoded image string
Returns:
The populated TF Example, if the label of at least one object is present in
label_map. Otherwise, returns None.
"""
filtered_data_frame = annotations_data_frame[
annotations_data_frame.LabelName.isin(label_map)]
filtered_data_frame_boxes = filtered_data_frame[
~filtered_data_frame.YMin.isnull()]
filtered_data_frame_labels = filtered_data_frame[
filtered_data_frame.YMin.isnull()]
image_id = annotations_data_frame.ImageID.iloc[0]
feature_map = {
standard_fields.TfExampleFields.object_bbox_ymin:
dataset_util.float_list_feature(
filtered_data_frame_boxes.YMin.as_matrix()),
standard_fields.TfExampleFields.object_bbox_xmin:
dataset_util.float_list_feature(
filtered_data_frame_boxes.XMin.as_matrix()),
standard_fields.TfExampleFields.object_bbox_ymax:
dataset_util.float_list_feature(
filtered_data_frame_boxes.YMax.as_matrix()),
standard_fields.TfExampleFields.object_bbox_xmax:
dataset_util.float_list_feature(
filtered_data_frame_boxes.XMax.as_matrix()),
standard_fields.TfExampleFields.object_class_text:
dataset_util.bytes_list_feature(
filtered_data_frame_boxes.LabelName.as_matrix()),
standard_fields.TfExampleFields.object_class_label:
dataset_util.int64_list_feature(
filtered_data_frame_boxes.LabelName.map(lambda x: label_map[x])
.as_matrix()),
standard_fields.TfExampleFields.filename:
dataset_util.bytes_feature('{}.jpg'.format(image_id)),
standard_fields.TfExampleFields.source_id:
dataset_util.bytes_feature(image_id),
standard_fields.TfExampleFields.image_encoded:
dataset_util.bytes_feature(encoded_image),
}
if 'IsGroupOf' in filtered_data_frame.columns:
feature_map[standard_fields.TfExampleFields.
object_group_of] = dataset_util.int64_list_feature(
filtered_data_frame_boxes.IsGroupOf.as_matrix().astype(int))
if 'IsOccluded' in filtered_data_frame.columns:
feature_map[standard_fields.TfExampleFields.
object_occluded] = dataset_util.int64_list_feature(
filtered_data_frame_boxes.IsOccluded.as_matrix().astype(
int))
if 'IsTruncated' in filtered_data_frame.columns:
feature_map[standard_fields.TfExampleFields.
object_truncated] = dataset_util.int64_list_feature(
filtered_data_frame_boxes.IsTruncated.as_matrix().astype(
int))
if 'IsDepiction' in filtered_data_frame.columns:
feature_map[standard_fields.TfExampleFields.
object_depiction] = dataset_util.int64_list_feature(
filtered_data_frame_boxes.IsDepiction.as_matrix().astype(
int))
if 'ConfidenceImageLabel' in filtered_data_frame_labels.columns:
feature_map[standard_fields.TfExampleFields.
image_class_label] = dataset_util.int64_list_feature(
filtered_data_frame_labels.LabelName.map(
lambda x: label_map[x]).as_matrix())
feature_map[standard_fields.TfExampleFields.
image_class_text] = dataset_util.bytes_list_feature(
filtered_data_frame_labels.LabelName.as_matrix()),
return tf.train.Example(features=tf.train.Features(feature=feature_map))
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for oid_tfrecord_creation.py."""
import pandas as pd
import tensorflow as tf
from object_detection.dataset_tools import oid_tfrecord_creation
def create_test_data():
data = {
'ImageID': ['i1', 'i1', 'i1', 'i1', 'i1', 'i2', 'i2'],
'LabelName': ['a', 'a', 'b', 'b', 'c', 'b', 'c'],
'YMin': [0.3, 0.6, 0.8, 0.1, None, 0.0, 0.0],
'XMin': [0.1, 0.3, 0.7, 0.0, None, 0.1, 0.1],
'XMax': [0.2, 0.3, 0.8, 0.5, None, 0.9, 0.9],
'YMax': [0.3, 0.6, 1, 0.8, None, 0.8, 0.8],
'IsOccluded': [0, 1, 1, 0, None, 0, 0],
'IsTruncated': [0, 0, 0, 1, None, 0, 0],
'IsGroupOf': [0, 0, 0, 0, None, 0, 1],
'IsDepiction': [1, 0, 0, 0, None, 0, 0],
'ConfidenceImageLabel': [None, None, None, None, 0, None, None],
}
df = pd.DataFrame(data=data)
label_map = {'a': 0, 'b': 1, 'c': 2}
return label_map, df
class TfExampleFromAnnotationsDataFrameTests(tf.test.TestCase):
def test_simple(self):
label_map, df = create_test_data()
tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame(
df[df.ImageID == 'i1'], label_map, 'encoded_image_test')
self.assertProtoEquals(
"""
features {
feature {
key: "image/encoded"
value { bytes_list { value: "encoded_image_test" } } }
feature {
key: "image/filename"
value { bytes_list { value: "i1.jpg" } } }
feature {
key: "image/object/bbox/ymin"
value { float_list { value: [0.3, 0.6, 0.8, 0.1] } } }
feature {
key: "image/object/bbox/xmin"
value { float_list { value: [0.1, 0.3, 0.7, 0.0] } } }
feature {
key: "image/object/bbox/ymax"
value { float_list { value: [0.3, 0.6, 1.0, 0.8] } } }
feature {
key: "image/object/bbox/xmax"
value { float_list { value: [0.2, 0.3, 0.8, 0.5] } } }
feature {
key: "image/object/class/label"
value { int64_list { value: [0, 0, 1, 1] } } }
feature {
key: "image/object/class/text"
value { bytes_list { value: ["a", "a", "b", "b"] } } }
feature {
key: "image/source_id"
value { bytes_list { value: "i1" } } }
feature {
key: "image/object/depiction"
value { int64_list { value: [1, 0, 0, 0] } } }
feature {
key: "image/object/group_of"
value { int64_list { value: [0, 0, 0, 0] } } }
feature {
key: "image/object/occluded"
value { int64_list { value: [0, 1, 1, 0] } } }
feature {
key: "image/object/truncated"
value { int64_list { value: [0, 0, 0, 1] } } }
feature {
key: "image/class/label"
value { int64_list { value: [2] } } }
feature {
key: "image/class/text"
value { bytes_list { value: ["c"] } } } }
""", tf_example)
def test_no_attributes(self):
label_map, df = create_test_data()
del df['IsDepiction']
del df['IsGroupOf']
del df['IsOccluded']
del df['IsTruncated']
del df['ConfidenceImageLabel']
tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame(
df[df.ImageID == 'i2'], label_map, 'encoded_image_test')
self.assertProtoEquals("""
features {
feature {
key: "image/encoded"
value { bytes_list { value: "encoded_image_test" } } }
feature {
key: "image/filename"
value { bytes_list { value: "i2.jpg" } } }
feature {
key: "image/object/bbox/ymin"
value { float_list { value: [0.0, 0.0] } } }
feature {
key: "image/object/bbox/xmin"
value { float_list { value: [0.1, 0.1] } } }
feature {
key: "image/object/bbox/ymax"
value { float_list { value: [0.8, 0.8] } } }
feature {
key: "image/object/bbox/xmax"
value { float_list { value: [0.9, 0.9] } } }
feature {
key: "image/object/class/label"
value { int64_list { value: [1, 2] } } }
feature {
key: "image/object/class/text"
value { bytes_list { value: ["b", "c"] } } }
feature {
key: "image/source_id"
value { bytes_list { value: "i2" } } } }
""", tf_example)
def test_label_filtering(self):
label_map, df = create_test_data()
label_map = {'a': 0}
tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame(
df[df.ImageID == 'i1'], label_map, 'encoded_image_test')
self.assertProtoEquals(
"""
features {
feature {
key: "image/encoded"
value { bytes_list { value: "encoded_image_test" } } }
feature {
key: "image/filename"
value { bytes_list { value: "i1.jpg" } } }
feature {
key: "image/object/bbox/ymin"
value { float_list { value: [0.3, 0.6] } } }
feature {
key: "image/object/bbox/xmin"
value { float_list { value: [0.1, 0.3] } } }
feature {
key: "image/object/bbox/ymax"
value { float_list { value: [0.3, 0.6] } } }
feature {
key: "image/object/bbox/xmax"
value { float_list { value: [0.2, 0.3] } } }
feature {
key: "image/object/class/label"
value { int64_list { value: [0, 0] } } }
feature {
key: "image/object/class/text"
value { bytes_list { value: ["a", "a"] } } }
feature {
key: "image/source_id"
value { bytes_list { value: "i1" } } }
feature {
key: "image/object/depiction"
value { int64_list { value: [1, 0] } } }
feature {
key: "image/object/group_of"
value { int64_list { value: [0, 0] } } }
feature {
key: "image/object/occluded"
value { int64_list { value: [0, 1] } } }
feature {
key: "image/object/truncated"
value { int64_list { value: [0, 0] } } }
feature {
key: "image/class/label"
value { int64_list { } } }
feature {
key: "image/class/text"
value { bytes_list { } } } }
""", tf_example)
if __name__ == '__main__':
tf.test.main()
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Utilities for creating TFRecords of TF examples for the Open Images dataset.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
def open_sharded_output_tfrecords(exit_stack, base_path, num_shards):
"""Opens all TFRecord shards for writing and adds them to an exit stack.
Args:
exit_stack: A context2.ExitStack used to automatically closed the TFRecords
opened in this function.
base_path: The base path for all shards
num_shards: The number of shards
Returns:
The list of opened TFRecords. Position k in the list corresponds to shard k.
"""
tf_record_output_filenames = [
'{}-{:05d}-of-{:05d}'.format(base_path, idx, num_shards)
for idx in range(num_shards)
]
tfrecords = [
exit_stack.enter_context(tf.python_io.TFRecordWriter(file_name))
for file_name in tf_record_output_filenames
]
return tfrecords
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for tf_record_creation_util.py."""
import os
import contextlib2
import tensorflow as tf
from object_detection.dataset_tools import tf_record_creation_util
class OpenOutputTfrecordsTests(tf.test.TestCase):
def test_sharded_tfrecord_writes(self):
with contextlib2.ExitStack() as tf_record_close_stack:
output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
tf_record_close_stack,
os.path.join(tf.test.get_temp_dir(), 'test.tfrec'), 10)
for idx in range(10):
output_tfrecords[idx].write('test_{}'.format(idx))
for idx in range(10):
tf_record_path = '{}-{:05d}-of-00010'.format(
os.path.join(tf.test.get_temp_dir(), 'test.tfrec'), idx)
records = list(tf.python_io.tf_record_iterator(tf_record_path))
self.assertAllEqual(records, ['test_{}'.format(idx)])
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Evaluation executable for detection models.
This executable is used to evaluate DetectionModels. There are two ways of
configuring the eval job.
1) A single pipeline_pb2.TrainEvalPipelineConfig file maybe specified instead.
In this mode, the --eval_training_data flag may be given to force the pipeline
to evaluate on training data instead.
Example usage:
./eval \
--logtostderr \
--checkpoint_dir=path/to/checkpoint_dir \
--eval_dir=path/to/eval_dir \
--pipeline_config_path=pipeline_config.pbtxt
2) Three configuration files may be provided: a model_pb2.DetectionModel
configuration file to define what type of DetectionModel is being evaluated, an
input_reader_pb2.InputReader file to specify what data the model is evaluating
and an eval_pb2.EvalConfig file to configure evaluation parameters.
Example usage:
./eval \
--logtostderr \
--checkpoint_dir=path/to/checkpoint_dir \
--eval_dir=path/to/eval_dir \
--eval_config_path=eval_config.pbtxt \
--model_config_path=model_config.pbtxt \
--input_config_path=eval_input_config.pbtxt
"""
import functools
import os
import tensorflow as tf
from object_detection import evaluator
from object_detection.builders import dataset_builder
from object_detection.builders import graph_rewriter_builder
from object_detection.builders import model_builder
from object_detection.utils import config_util
from object_detection.utils import dataset_util
from object_detection.utils import label_map_util
tf.logging.set_verbosity(tf.logging.INFO)
flags = tf.app.flags
flags.DEFINE_boolean('eval_training_data', False,
'If training data should be evaluated for this job.')
flags.DEFINE_string('checkpoint_dir', '',
'Directory containing checkpoints to evaluate, typically '
'set to `train_dir` used in the training job.')
flags.DEFINE_string('eval_dir', '',
'Directory to write eval summaries to.')
flags.DEFINE_string('pipeline_config_path', '',
'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
'file. If provided, other configs are ignored')
flags.DEFINE_string('eval_config_path', '',
'Path to an eval_pb2.EvalConfig config file.')
flags.DEFINE_string('input_config_path', '',
'Path to an input_reader_pb2.InputReader config file.')
flags.DEFINE_string('model_config_path', '',
'Path to a model_pb2.DetectionModel config file.')
flags.DEFINE_boolean('run_once', False, 'Option to only run a single pass of '
'evaluation. Overrides the `max_evals` parameter in the '
'provided config.')
FLAGS = flags.FLAGS
def main(unused_argv):
assert FLAGS.checkpoint_dir, '`checkpoint_dir` is missing.'
assert FLAGS.eval_dir, '`eval_dir` is missing.'
tf.gfile.MakeDirs(FLAGS.eval_dir)
if FLAGS.pipeline_config_path:
configs = config_util.get_configs_from_pipeline_file(
FLAGS.pipeline_config_path)
tf.gfile.Copy(FLAGS.pipeline_config_path,
os.path.join(FLAGS.eval_dir, 'pipeline.config'),
overwrite=True)
else:
configs = config_util.get_configs_from_multiple_files(
model_config_path=FLAGS.model_config_path,
eval_config_path=FLAGS.eval_config_path,
eval_input_config_path=FLAGS.input_config_path)
for name, config in [('model.config', FLAGS.model_config_path),
('eval.config', FLAGS.eval_config_path),
('input.config', FLAGS.input_config_path)]:
tf.gfile.Copy(config,
os.path.join(FLAGS.eval_dir, name),
overwrite=True)
model_config = configs['model']
eval_config = configs['eval_config']
input_config = configs['eval_input_config']
if FLAGS.eval_training_data:
input_config = configs['train_input_config']
model_fn = functools.partial(
model_builder.build,
model_config=model_config,
is_training=False)
def get_next(config):
return dataset_util.make_initializable_iterator(
dataset_builder.build(config)).get_next()
create_input_dict_fn = functools.partial(get_next, input_config)
label_map = label_map_util.load_labelmap(input_config.label_map_path)
max_num_classes = max([item.id for item in label_map.item])
categories = label_map_util.convert_label_map_to_categories(
label_map, max_num_classes)
if FLAGS.run_once:
eval_config.max_evals = 1
graph_rewriter_fn = None
if 'graph_rewriter_config' in configs:
graph_rewriter_fn = graph_rewriter_builder.build(
configs['graph_rewriter_config'], is_training=False)
evaluator.evaluate(
create_input_dict_fn,
model_fn,
eval_config,
categories,
FLAGS.checkpoint_dir,
FLAGS.eval_dir,
graph_hook_fn=graph_rewriter_fn)
if __name__ == '__main__':
tf.app.run()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Common utility functions for evaluation."""
import collections
import logging
import os
import time
import numpy as np
import tensorflow as tf
from object_detection.core import box_list
from object_detection.core import box_list_ops
from object_detection.core import keypoint_ops
from object_detection.core import standard_fields as fields
from object_detection.metrics import coco_evaluation
from object_detection.utils import label_map_util
from object_detection.utils import ops
from object_detection.utils import visualization_utils as vis_utils
slim = tf.contrib.slim
def write_metrics(metrics, global_step, summary_dir):
"""Write metrics to a summary directory.
Args:
metrics: A dictionary containing metric names and values.
global_step: Global step at which the metrics are computed.
summary_dir: Directory to write tensorflow summaries to.
"""
logging.info('Writing metrics to tf summary.')
summary_writer = tf.summary.FileWriterCache.get(summary_dir)
for key in sorted(metrics):
summary = tf.Summary(value=[
tf.Summary.Value(tag=key, simple_value=metrics[key]),
])
summary_writer.add_summary(summary, global_step)
logging.info('%s: %f', key, metrics[key])
logging.info('Metrics written to tf summary.')
# TODO(rathodv): Add tests.
def visualize_detection_results(result_dict,
tag,
global_step,
categories,
summary_dir='',
export_dir='',
agnostic_mode=False,
show_groundtruth=False,
groundtruth_box_visualization_color='black',
min_score_thresh=.5,
max_num_predictions=20,
skip_scores=False,
skip_labels=False,
keep_image_id_for_visualization_export=False):
"""Visualizes detection results and writes visualizations to image summaries.
This function visualizes an image with its detected bounding boxes and writes
to image summaries which can be viewed on tensorboard. It optionally also
writes images to a directory. In the case of missing entry in the label map,
unknown class name in the visualization is shown as "N/A".
Args:
result_dict: a dictionary holding groundtruth and detection
data corresponding to each image being evaluated. The following keys
are required:
'original_image': a numpy array representing the image with shape
[1, height, width, 3] or [1, height, width, 1]
'detection_boxes': a numpy array of shape [N, 4]
'detection_scores': a numpy array of shape [N]
'detection_classes': a numpy array of shape [N]
The following keys are optional:
'groundtruth_boxes': a numpy array of shape [N, 4]
'groundtruth_keypoints': a numpy array of shape [N, num_keypoints, 2]
Detections are assumed to be provided in decreasing order of score and for
display, and we assume that scores are probabilities between 0 and 1.
tag: tensorboard tag (string) to associate with image.
global_step: global step at which the visualization are generated.
categories: a list of dictionaries representing all possible categories.
Each dict in this list has the following keys:
'id': (required) an integer id uniquely identifying this category
'name': (required) string representing category name
e.g., 'cat', 'dog', 'pizza'
'supercategory': (optional) string representing the supercategory
e.g., 'animal', 'vehicle', 'food', etc
summary_dir: the output directory to which the image summaries are written.
export_dir: the output directory to which images are written. If this is
empty (default), then images are not exported.
agnostic_mode: boolean (default: False) controlling whether to evaluate in
class-agnostic mode or not.
show_groundtruth: boolean (default: False) controlling whether to show
groundtruth boxes in addition to detected boxes
groundtruth_box_visualization_color: box color for visualizing groundtruth
boxes
min_score_thresh: minimum score threshold for a box to be visualized
max_num_predictions: maximum number of detections to visualize
skip_scores: whether to skip score when drawing a single detection
skip_labels: whether to skip label when drawing a single detection
keep_image_id_for_visualization_export: whether to keep image identifier in
filename when exported to export_dir
Raises:
ValueError: if result_dict does not contain the expected keys (i.e.,
'original_image', 'detection_boxes', 'detection_scores',
'detection_classes')
"""
detection_fields = fields.DetectionResultFields
input_fields = fields.InputDataFields
if not set([
input_fields.original_image,
detection_fields.detection_boxes,
detection_fields.detection_scores,
detection_fields.detection_classes,
]).issubset(set(result_dict.keys())):
raise ValueError('result_dict does not contain all expected keys.')
if show_groundtruth and input_fields.groundtruth_boxes not in result_dict:
raise ValueError('If show_groundtruth is enabled, result_dict must contain '
'groundtruth_boxes.')
logging.info('Creating detection visualizations.')
category_index = label_map_util.create_category_index(categories)
image = np.squeeze(result_dict[input_fields.original_image], axis=0)
if image.shape[2] == 1: # If one channel image, repeat in RGB.
image = np.tile(image, [1, 1, 3])
detection_boxes = result_dict[detection_fields.detection_boxes]
detection_scores = result_dict[detection_fields.detection_scores]
detection_classes = np.int32((result_dict[
detection_fields.detection_classes]))
detection_keypoints = result_dict.get(detection_fields.detection_keypoints)
detection_masks = result_dict.get(detection_fields.detection_masks)
detection_boundaries = result_dict.get(detection_fields.detection_boundaries)
# Plot groundtruth underneath detections
if show_groundtruth:
groundtruth_boxes = result_dict[input_fields.groundtruth_boxes]
groundtruth_keypoints = result_dict.get(input_fields.groundtruth_keypoints)
vis_utils.visualize_boxes_and_labels_on_image_array(
image=image,
boxes=groundtruth_boxes,
classes=None,
scores=None,
category_index=category_index,
keypoints=groundtruth_keypoints,
use_normalized_coordinates=False,
max_boxes_to_draw=None,
groundtruth_box_visualization_color=groundtruth_box_visualization_color)
vis_utils.visualize_boxes_and_labels_on_image_array(
image,
detection_boxes,
detection_classes,
detection_scores,
category_index,
instance_masks=detection_masks,
instance_boundaries=detection_boundaries,
keypoints=detection_keypoints,
use_normalized_coordinates=False,
max_boxes_to_draw=max_num_predictions,
min_score_thresh=min_score_thresh,
agnostic_mode=agnostic_mode,
skip_scores=skip_scores,
skip_labels=skip_labels)
if export_dir:
if keep_image_id_for_visualization_export and result_dict[fields.
InputDataFields()
.key]:
export_path = os.path.join(export_dir, 'export-{}-{}.png'.format(
tag, result_dict[fields.InputDataFields().key]))
else:
export_path = os.path.join(export_dir, 'export-{}.png'.format(tag))
vis_utils.save_image_array_as_png(image, export_path)
summary = tf.Summary(value=[
tf.Summary.Value(
tag=tag,
image=tf.Summary.Image(
encoded_image_string=vis_utils.encode_image_array_as_png_str(
image)))
])
summary_writer = tf.summary.FileWriterCache.get(summary_dir)
summary_writer.add_summary(summary, global_step)
logging.info('Detection visualizations written to summary with tag %s.', tag)
def _run_checkpoint_once(tensor_dict,
evaluators=None,
batch_processor=None,
checkpoint_dirs=None,
variables_to_restore=None,
restore_fn=None,
num_batches=1,
master='',
save_graph=False,
save_graph_dir='',
losses_dict=None):
"""Evaluates metrics defined in evaluators and returns summaries.
This function loads the latest checkpoint in checkpoint_dirs and evaluates
all metrics defined in evaluators. The metrics are processed in batch by the
batch_processor.
Args:
tensor_dict: a dictionary holding tensors representing a batch of detections
and corresponding groundtruth annotations.
evaluators: a list of object of type DetectionEvaluator to be used for
evaluation. Note that the metric names produced by different evaluators
must be unique.
batch_processor: a function taking four arguments:
1. tensor_dict: the same tensor_dict that is passed in as the first
argument to this function.
2. sess: a tensorflow session
3. batch_index: an integer representing the index of the batch amongst
all batches
By default, batch_processor is None, which defaults to running:
return sess.run(tensor_dict)
To skip an image, it suffices to return an empty dictionary in place of
result_dict.
checkpoint_dirs: list of directories to load into an EnsembleModel. If it
has only one directory, EnsembleModel will not be used --
a DetectionModel
will be instantiated directly. Not used if restore_fn is set.
variables_to_restore: None, or a dictionary mapping variable names found in
a checkpoint to model variables. The dictionary would normally be
generated by creating a tf.train.ExponentialMovingAverage object and
calling its variables_to_restore() method. Not used if restore_fn is set.
restore_fn: None, or a function that takes a tf.Session object and correctly
restores all necessary variables from the correct checkpoint file. If
None, attempts to restore from the first directory in checkpoint_dirs.
num_batches: the number of batches to use for evaluation.
master: the location of the Tensorflow session.
save_graph: whether or not the Tensorflow graph is stored as a pbtxt file.
save_graph_dir: where to store the Tensorflow graph on disk. If save_graph
is True this must be non-empty.
losses_dict: optional dictionary of scalar detection losses.
Returns:
global_step: the count of global steps.
all_evaluator_metrics: A dictionary containing metric names and values.
Raises:
ValueError: if restore_fn is None and checkpoint_dirs doesn't have at least
one element.
ValueError: if save_graph is True and save_graph_dir is not defined.
"""
if save_graph and not save_graph_dir:
raise ValueError('`save_graph_dir` must be defined.')
sess = tf.Session(master, graph=tf.get_default_graph())
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
sess.run(tf.tables_initializer())
if restore_fn:
restore_fn(sess)
else:
if not checkpoint_dirs:
raise ValueError('`checkpoint_dirs` must have at least one entry.')
checkpoint_file = tf.train.latest_checkpoint(checkpoint_dirs[0])
saver = tf.train.Saver(variables_to_restore)
saver.restore(sess, checkpoint_file)
if save_graph:
tf.train.write_graph(sess.graph_def, save_graph_dir, 'eval.pbtxt')
counters = {'skipped': 0, 'success': 0}
aggregate_result_losses_dict = collections.defaultdict(list)
with tf.contrib.slim.queues.QueueRunners(sess):
try:
for batch in range(int(num_batches)):
if (batch + 1) % 100 == 0:
logging.info('Running eval ops batch %d/%d', batch + 1, num_batches)
if not batch_processor:
try:
if not losses_dict:
losses_dict = {}
result_dict, result_losses_dict = sess.run([tensor_dict,
losses_dict])
counters['success'] += 1
except tf.errors.InvalidArgumentError:
logging.info('Skipping image')
counters['skipped'] += 1
result_dict = {}
else:
result_dict, result_losses_dict = batch_processor(
tensor_dict, sess, batch, counters, losses_dict=losses_dict)
if not result_dict:
continue
for key, value in iter(result_losses_dict.items()):
aggregate_result_losses_dict[key].append(value)
for evaluator in evaluators:
# TODO(b/65130867): Use image_id tensor once we fix the input data
# decoders to return correct image_id.
# TODO(akuznetsa): result_dict contains batches of images, while
# add_single_ground_truth_image_info expects a single image. Fix
evaluator.add_single_ground_truth_image_info(
image_id=batch, groundtruth_dict=result_dict)
evaluator.add_single_detected_image_info(
image_id=batch, detections_dict=result_dict)
logging.info('Running eval batches done.')
except tf.errors.OutOfRangeError:
logging.info('Done evaluating -- epoch limit reached')
finally:
# When done, ask the threads to stop.
logging.info('# success: %d', counters['success'])
logging.info('# skipped: %d', counters['skipped'])
all_evaluator_metrics = {}
for evaluator in evaluators:
metrics = evaluator.evaluate()
evaluator.clear()
if any(key in all_evaluator_metrics for key in metrics):
raise ValueError('Metric names between evaluators must not collide.')
all_evaluator_metrics.update(metrics)
global_step = tf.train.global_step(sess, tf.train.get_global_step())
for key, value in iter(aggregate_result_losses_dict.items()):
all_evaluator_metrics['Losses/' + key] = np.mean(value)
sess.close()
return (global_step, all_evaluator_metrics)
# TODO(rathodv): Add tests.
def repeated_checkpoint_run(tensor_dict,
summary_dir,
evaluators,
batch_processor=None,
checkpoint_dirs=None,
variables_to_restore=None,
restore_fn=None,
num_batches=1,
eval_interval_secs=120,
max_number_of_evaluations=None,
master='',
save_graph=False,
save_graph_dir='',
losses_dict=None):
"""Periodically evaluates desired tensors using checkpoint_dirs or restore_fn.
This function repeatedly loads a checkpoint and evaluates a desired
set of tensors (provided by tensor_dict) and hands the resulting numpy
arrays to a function result_processor which can be used to further
process/save/visualize the results.
Args:
tensor_dict: a dictionary holding tensors representing a batch of detections
and corresponding groundtruth annotations.
summary_dir: a directory to write metrics summaries.
evaluators: a list of object of type DetectionEvaluator to be used for
evaluation. Note that the metric names produced by different evaluators
must be unique.
batch_processor: a function taking three arguments:
1. tensor_dict: the same tensor_dict that is passed in as the first
argument to this function.
2. sess: a tensorflow session
3. batch_index: an integer representing the index of the batch amongst
all batches
By default, batch_processor is None, which defaults to running:
return sess.run(tensor_dict)
checkpoint_dirs: list of directories to load into a DetectionModel or an
EnsembleModel if restore_fn isn't set. Also used to determine when to run
next evaluation. Must have at least one element.
variables_to_restore: None, or a dictionary mapping variable names found in
a checkpoint to model variables. The dictionary would normally be
generated by creating a tf.train.ExponentialMovingAverage object and
calling its variables_to_restore() method. Not used if restore_fn is set.
restore_fn: a function that takes a tf.Session object and correctly restores
all necessary variables from the correct checkpoint file.
num_batches: the number of batches to use for evaluation.
eval_interval_secs: the number of seconds between each evaluation run.
max_number_of_evaluations: the max number of iterations of the evaluation.
If the value is left as None the evaluation continues indefinitely.
master: the location of the Tensorflow session.
save_graph: whether or not the Tensorflow graph is saved as a pbtxt file.
save_graph_dir: where to save on disk the Tensorflow graph. If store_graph
is True this must be non-empty.
losses_dict: optional dictionary of scalar detection losses.
Returns:
metrics: A dictionary containing metric names and values in the latest
evaluation.
Raises:
ValueError: if max_num_of_evaluations is not None or a positive number.
ValueError: if checkpoint_dirs doesn't have at least one element.
"""
if max_number_of_evaluations and max_number_of_evaluations <= 0:
raise ValueError(
'`number_of_steps` must be either None or a positive number.')
if not checkpoint_dirs:
raise ValueError('`checkpoint_dirs` must have at least one entry.')
last_evaluated_model_path = None
number_of_evaluations = 0
while True:
start = time.time()
logging.info('Starting evaluation at ' + time.strftime(
'%Y-%m-%d-%H:%M:%S', time.gmtime()))
model_path = tf.train.latest_checkpoint(checkpoint_dirs[0])
if not model_path:
logging.info('No model found in %s. Will try again in %d seconds',
checkpoint_dirs[0], eval_interval_secs)
elif model_path == last_evaluated_model_path:
logging.info('Found already evaluated checkpoint. Will try again in %d '
'seconds', eval_interval_secs)
else:
last_evaluated_model_path = model_path
global_step, metrics = _run_checkpoint_once(tensor_dict, evaluators,
batch_processor,
checkpoint_dirs,
variables_to_restore,
restore_fn, num_batches,
master, save_graph,
save_graph_dir,
losses_dict=losses_dict)
write_metrics(metrics, global_step, summary_dir)
number_of_evaluations += 1
if (max_number_of_evaluations and
number_of_evaluations >= max_number_of_evaluations):
logging.info('Finished evaluation!')
break
time_to_next_eval = start + eval_interval_secs - time.time()
if time_to_next_eval > 0:
time.sleep(time_to_next_eval)
return metrics
def result_dict_for_single_example(image,
key,
detections,
groundtruth=None,
class_agnostic=False,
scale_to_absolute=False):
"""Merges all detection and groundtruth information for a single example.
Note that evaluation tools require classes that are 1-indexed, and so this
function performs the offset. If `class_agnostic` is True, all output classes
have label 1.
Args:
image: A single 4D uint8 image tensor of shape [1, H, W, C].
key: A single string tensor identifying the image.
detections: A dictionary of detections, returned from
DetectionModel.postprocess().
groundtruth: (Optional) Dictionary of groundtruth items, with fields:
'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in
normalized coordinates.
'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes.
'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional)
'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional)
'groundtruth_difficult': [num_boxes] int64 tensor. (Optional)
'groundtruth_group_of': [num_boxes] int64 tensor. (Optional)
'groundtruth_instance_masks': 3D int64 tensor of instance masks
(Optional).
class_agnostic: Boolean indicating whether the detections are class-agnostic
(i.e. binary). Default False.
scale_to_absolute: Boolean indicating whether boxes and keypoints should be
scaled to absolute coordinates. Note that for IoU based evaluations, it
does not matter whether boxes are expressed in absolute or relative
coordinates. Default False.
Returns:
A dictionary with:
'original_image': A [1, H, W, C] uint8 image tensor.
'key': A string tensor with image identifier.
'detection_boxes': [max_detections, 4] float32 tensor of boxes, in
normalized or absolute coordinates, depending on the value of
`scale_to_absolute`.
'detection_scores': [max_detections] float32 tensor of scores.
'detection_classes': [max_detections] int64 tensor of 1-indexed classes.
'detection_masks': [max_detections, H, W] float32 tensor of binarized
masks, reframed to full image masks.
'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in
normalized or absolute coordinates, depending on the value of
`scale_to_absolute`. (Optional)
'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes.
(Optional)
'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional)
'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional)
'groundtruth_difficult': [num_boxes] int64 tensor. (Optional)
'groundtruth_group_of': [num_boxes] int64 tensor. (Optional)
'groundtruth_instance_masks': 3D int64 tensor of instance masks
(Optional).
"""
label_id_offset = 1 # Applying label id offset (b/63711816)
input_data_fields = fields.InputDataFields
output_dict = {
input_data_fields.original_image: image,
input_data_fields.key: key,
}
detection_fields = fields.DetectionResultFields
detection_boxes = detections[detection_fields.detection_boxes][0]
image_shape = tf.shape(image)
detection_scores = detections[detection_fields.detection_scores][0]
if class_agnostic:
detection_classes = tf.ones_like(detection_scores, dtype=tf.int64)
else:
detection_classes = (
tf.to_int64(detections[detection_fields.detection_classes][0]) +
label_id_offset)
num_detections = tf.to_int32(detections[detection_fields.num_detections][0])
detection_boxes = tf.slice(
detection_boxes, begin=[0, 0], size=[num_detections, -1])
detection_classes = tf.slice(
detection_classes, begin=[0], size=[num_detections])
detection_scores = tf.slice(
detection_scores, begin=[0], size=[num_detections])
if scale_to_absolute:
absolute_detection_boxlist = box_list_ops.to_absolute_coordinates(
box_list.BoxList(detection_boxes), image_shape[1], image_shape[2])
output_dict[detection_fields.detection_boxes] = (
absolute_detection_boxlist.get())
else:
output_dict[detection_fields.detection_boxes] = detection_boxes
output_dict[detection_fields.detection_classes] = detection_classes
output_dict[detection_fields.detection_scores] = detection_scores
if detection_fields.detection_masks in detections:
detection_masks = detections[detection_fields.detection_masks][0]
# TODO(rathodv): This should be done in model's postprocess
# function ideally.
detection_masks = tf.slice(
detection_masks, begin=[0, 0, 0], size=[num_detections, -1, -1])
detection_masks_reframed = ops.reframe_box_masks_to_image_masks(
detection_masks, detection_boxes, image_shape[1], image_shape[2])
detection_masks_reframed = tf.cast(
tf.greater(detection_masks_reframed, 0.5), tf.uint8)
output_dict[detection_fields.detection_masks] = detection_masks_reframed
if detection_fields.detection_keypoints in detections:
detection_keypoints = detections[detection_fields.detection_keypoints][0]
output_dict[detection_fields.detection_keypoints] = detection_keypoints
if scale_to_absolute:
absolute_detection_keypoints = keypoint_ops.scale(
detection_keypoints, image_shape[1], image_shape[2])
output_dict[detection_fields.detection_keypoints] = (
absolute_detection_keypoints)
if groundtruth:
if input_data_fields.groundtruth_instance_masks in groundtruth:
groundtruth[input_data_fields.groundtruth_instance_masks] = tf.cast(
groundtruth[input_data_fields.groundtruth_instance_masks], tf.uint8)
output_dict.update(groundtruth)
if scale_to_absolute:
groundtruth_boxes = groundtruth[input_data_fields.groundtruth_boxes]
absolute_gt_boxlist = box_list_ops.to_absolute_coordinates(
box_list.BoxList(groundtruth_boxes), image_shape[1], image_shape[2])
output_dict[input_data_fields.groundtruth_boxes] = (
absolute_gt_boxlist.get())
# For class-agnostic models, groundtruth classes all become 1.
if class_agnostic:
groundtruth_classes = groundtruth[input_data_fields.groundtruth_classes]
groundtruth_classes = tf.ones_like(groundtruth_classes, dtype=tf.int64)
output_dict[input_data_fields.groundtruth_classes] = groundtruth_classes
return output_dict
def get_eval_metric_ops_for_evaluators(evaluation_metrics,
categories,
eval_dict,
include_metrics_per_category=False):
"""Returns a dictionary of eval metric ops to use with `tf.EstimatorSpec`.
Args:
evaluation_metrics: List of evaluation metric names. Current options are
'coco_detection_metrics' and 'coco_mask_metrics'.
categories: A list of dicts, each of which has the following keys -
'id': (required) an integer id uniquely identifying this category.
'name': (required) string representing category name e.g., 'cat', 'dog'.
eval_dict: An evaluation dictionary, returned from
result_dict_for_single_example().
include_metrics_per_category: If True, additionally include per-category
metrics.
Returns:
A dictionary of metric names to tuple of value_op and update_op that can be
used as eval metric ops in tf.EstimatorSpec.
Raises:
ValueError: If any of the metrics in `evaluation_metric` is not
'coco_detection_metrics' or 'coco_mask_metrics'.
"""
evaluation_metrics = list(set(evaluation_metrics))
input_data_fields = fields.InputDataFields
detection_fields = fields.DetectionResultFields
eval_metric_ops = {}
for metric in evaluation_metrics:
if metric == 'coco_detection_metrics':
coco_evaluator = coco_evaluation.CocoDetectionEvaluator(
categories, include_metrics_per_category=include_metrics_per_category)
eval_metric_ops.update(
coco_evaluator.get_estimator_eval_metric_ops(
image_id=eval_dict[input_data_fields.key],
groundtruth_boxes=eval_dict[input_data_fields.groundtruth_boxes],
groundtruth_classes=eval_dict[
input_data_fields.groundtruth_classes],
detection_boxes=eval_dict[detection_fields.detection_boxes],
detection_scores=eval_dict[detection_fields.detection_scores],
detection_classes=eval_dict[detection_fields.detection_classes],
groundtruth_is_crowd=eval_dict.get(
input_data_fields.groundtruth_is_crowd)))
elif metric == 'coco_mask_metrics':
coco_mask_evaluator = coco_evaluation.CocoMaskEvaluator(
categories, include_metrics_per_category=include_metrics_per_category)
eval_metric_ops.update(
coco_mask_evaluator.get_estimator_eval_metric_ops(
image_id=eval_dict[input_data_fields.key],
groundtruth_boxes=eval_dict[input_data_fields.groundtruth_boxes],
groundtruth_classes=eval_dict[
input_data_fields.groundtruth_classes],
groundtruth_instance_masks=eval_dict[
input_data_fields.groundtruth_instance_masks],
detection_scores=eval_dict[detection_fields.detection_scores],
detection_classes=eval_dict[detection_fields.detection_classes],
detection_masks=eval_dict[detection_fields.detection_masks],
groundtruth_is_crowd=eval_dict.get(
input_data_fields.groundtruth_is_crowd),))
else:
raise ValueError('The only evaluation metrics supported are '
'"coco_detection_metrics" and "coco_mask_metrics". '
'Found {} in the evaluation metrics'.format(metric))
return eval_metric_ops
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for eval_util."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from object_detection import eval_util
from object_detection.core import standard_fields as fields
class EvalUtilTest(tf.test.TestCase):
def _get_categories_list(self):
return [{'id': 0, 'name': 'person'},
{'id': 1, 'name': 'dog'},
{'id': 2, 'name': 'cat'}]
def _make_evaluation_dict(self):
input_data_fields = fields.InputDataFields
detection_fields = fields.DetectionResultFields
image = tf.zeros(shape=[1, 20, 20, 3], dtype=tf.uint8)
key = tf.constant('image1')
detection_boxes = tf.constant([[[0., 0., 1., 1.]]])
detection_scores = tf.constant([[0.8]])
detection_classes = tf.constant([[0]])
detection_masks = tf.ones(shape=[1, 1, 20, 20], dtype=tf.float32)
num_detections = tf.constant([1])
groundtruth_boxes = tf.constant([[0., 0., 1., 1.]])
groundtruth_classes = tf.constant([1])
groundtruth_instance_masks = tf.ones(shape=[1, 20, 20], dtype=tf.uint8)
detections = {
detection_fields.detection_boxes: detection_boxes,
detection_fields.detection_scores: detection_scores,
detection_fields.detection_classes: detection_classes,
detection_fields.detection_masks: detection_masks,
detection_fields.num_detections: num_detections
}
groundtruth = {
input_data_fields.groundtruth_boxes: groundtruth_boxes,
input_data_fields.groundtruth_classes: groundtruth_classes,
input_data_fields.groundtruth_instance_masks: groundtruth_instance_masks
}
return eval_util.result_dict_for_single_example(image, key, detections,
groundtruth)
def test_get_eval_metric_ops_for_coco_detections(self):
evaluation_metrics = ['coco_detection_metrics']
categories = self._get_categories_list()
eval_dict = self._make_evaluation_dict()
metric_ops = eval_util.get_eval_metric_ops_for_evaluators(
evaluation_metrics, categories, eval_dict)
_, update_op = metric_ops['DetectionBoxes_Precision/mAP']
with self.test_session() as sess:
metrics = {}
for key, (value_op, _) in metric_ops.iteritems():
metrics[key] = value_op
sess.run(update_op)
metrics = sess.run(metrics)
print(metrics)
self.assertAlmostEqual(1.0, metrics['DetectionBoxes_Precision/mAP'])
self.assertNotIn('DetectionMasks_Precision/mAP', metrics)
def test_get_eval_metric_ops_for_coco_detections_and_masks(self):
evaluation_metrics = ['coco_detection_metrics',
'coco_mask_metrics']
categories = self._get_categories_list()
eval_dict = self._make_evaluation_dict()
metric_ops = eval_util.get_eval_metric_ops_for_evaluators(
evaluation_metrics, categories, eval_dict)
_, update_op_boxes = metric_ops['DetectionBoxes_Precision/mAP']
_, update_op_masks = metric_ops['DetectionMasks_Precision/mAP']
with self.test_session() as sess:
metrics = {}
for key, (value_op, _) in metric_ops.iteritems():
metrics[key] = value_op
sess.run(update_op_boxes)
sess.run(update_op_masks)
metrics = sess.run(metrics)
self.assertAlmostEqual(1.0, metrics['DetectionBoxes_Precision/mAP'])
self.assertAlmostEqual(1.0, metrics['DetectionMasks_Precision/mAP'])
def test_get_eval_metric_ops_raises_error_with_unsupported_metric(self):
evaluation_metrics = ['unsupported_metrics']
categories = self._get_categories_list()
eval_dict = self._make_evaluation_dict()
with self.assertRaises(ValueError):
eval_util.get_eval_metric_ops_for_evaluators(
evaluation_metrics, categories, eval_dict)
if __name__ == '__main__':
tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment