Commit 7a9934df authored by Zhichao Lu's avatar Zhichao Lu Committed by lzc5123016
Browse files

Merged commit includes the following changes:

184048729  by Zhichao Lu:

    Modify target_assigner so that it creates regression targets taking keypoints into account.

--
184027183  by Zhichao Lu:

    Resnet V1 FPN based feature extractors for SSD meta architecture in Object Detection V2 API.

--
184004730  by Zhichao Lu:

    Expose a lever to override the configured mask_type.

--
183933113  by Zhichao Lu:

    Weight shared convolutional box predictor as described in https://arxiv.org/abs/1708.02002

--
183929669  by Zhichao Lu:

    Expanding box list operations for future data augmentations.

--
183916792  by Zhichao Lu:

    Fix unrecognized assertion function in tests.

--
183906851  by Zhichao Lu:

    - Change ssd meta architecture to use regression weights to compute loss normalizer.

--
183871003  by Zhichao Lu:

    Fix config_util_test wrong dependency.

--
183782120  by Zhichao Lu:

    Add __init__ file to third_party directories.

--
183779109  by Zhichao Lu:

    Setup regular version s...
parent 7ef602be
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"])
exports_files([
"pets_examples.record",
])
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"])
exports_files([
"image1.jpg",
"image2.jpg",
])
......@@ -47,9 +47,10 @@ import os
import tensorflow as tf
from object_detection import trainer
from object_detection.builders import input_reader_builder
from object_detection.builders import dataset_builder
from object_detection.builders import model_builder
from object_detection.utils import config_util
from object_detection.utils import dataset_util
tf.logging.set_verbosity(tf.logging.INFO)
......@@ -114,8 +115,13 @@ def main(_):
model_config=model_config,
is_training=True)
create_input_dict_fn = functools.partial(
input_reader_builder.build, input_config)
def get_next(config):
return dataset_util.make_initializable_iterator(
dataset_builder.build(
config, num_workers=FLAGS.worker_replicas,
worker_index=FLAGS.task)).get_next()
create_input_dict_fn = functools.partial(get_next, input_config)
env = json.loads(os.environ.get('TF_CONFIG', '{}'))
cluster_data = env.get('cluster', None)
......
......@@ -108,6 +108,8 @@ def get_inputs(input_queue, num_classes, merge_multiple_label_boxes=False):
keypoints_list: a list of 3-D float tensors of shape [num_boxes,
num_keypoints, 2] containing keypoints for objects if present in the
input queue. Else returns None.
weights_lists: a list of 1-D float32 tensors of shape [num_boxes]
containing groundtruth weight for each box.
"""
read_data_list = input_queue.dequeue()
label_id_offset = 1
......@@ -132,7 +134,10 @@ def get_inputs(input_queue, num_classes, merge_multiple_label_boxes=False):
if (merge_multiple_label_boxes and (
masks_gt is not None or keypoints_gt is not None)):
raise NotImplementedError('Multi-label support is only for boxes.')
return image, key, location_gt, classes_gt, masks_gt, keypoints_gt
weights_gt = read_data.get(
fields.InputDataFields.groundtruth_weights)
return (image, key, location_gt, classes_gt, masks_gt, keypoints_gt,
weights_gt)
return zip(*map(extract_images_and_targets, read_data_list))
......@@ -147,12 +152,21 @@ def _create_losses(input_queue, create_model_fn, train_config):
"""
detection_model = create_model_fn()
(images, _, groundtruth_boxes_list, groundtruth_classes_list,
groundtruth_masks_list, groundtruth_keypoints_list) = get_inputs(
groundtruth_masks_list, groundtruth_keypoints_list, _) = get_inputs(
input_queue,
detection_model.num_classes,
train_config.merge_multiple_label_boxes)
images = [detection_model.preprocess(image) for image in images]
images = tf.concat(images, 0)
preprocessed_images = []
true_image_shapes = []
for image in images:
resized_image, true_image_shape = detection_model.preprocess(image)
preprocessed_images.append(resized_image)
true_image_shapes.append(true_image_shape)
images = tf.concat(preprocessed_images, 0)
true_image_shapes = tf.concat(true_image_shapes, 0)
if any(mask is None for mask in groundtruth_masks_list):
groundtruth_masks_list = None
if any(keypoints is None for keypoints in groundtruth_keypoints_list):
......@@ -162,16 +176,16 @@ def _create_losses(input_queue, create_model_fn, train_config):
groundtruth_classes_list,
groundtruth_masks_list,
groundtruth_keypoints_list)
prediction_dict = detection_model.predict(images)
prediction_dict = detection_model.predict(images, true_image_shapes)
losses_dict = detection_model.loss(prediction_dict)
losses_dict = detection_model.loss(prediction_dict, true_image_shapes)
for loss_tensor in losses_dict.values():
tf.losses.add_loss(loss_tensor)
def train(create_tensor_dict_fn, create_model_fn, train_config, master, task,
num_clones, worker_replicas, clone_on_cpu, ps_tasks, worker_job_name,
is_chief, train_dir):
is_chief, train_dir, graph_hook_fn=None):
"""Training function for detection models.
Args:
......@@ -188,6 +202,10 @@ def train(create_tensor_dict_fn, create_model_fn, train_config, master, task,
worker_job_name: Name of the worker job.
is_chief: Whether this replica is the chief replica.
train_dir: Directory to write checkpoints and training summaries to.
graph_hook_fn: Optional function that is called after the training graph is
completely built. This is helpful to perform additional changes to the
training graph such as optimizing batchnorm. The function should modify
the default graph.
"""
detection_model = create_model_fn()
......@@ -217,7 +235,7 @@ def train(create_tensor_dict_fn, create_model_fn, train_config, master, task,
train_config.prefetch_queue_capacity, data_augmentation_options)
# Gather initial summaries.
# TODO(rathodv): See if summaries can be added/extracted from global tf
# TODO: See if summaries can be added/extracted from global tf
# collections so that they don't have to be passed around.
summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))
global_summaries = set([])
......@@ -258,8 +276,11 @@ def train(create_tensor_dict_fn, create_model_fn, train_config, master, task,
init_fn = initializer_fn
with tf.device(deploy_config.optimizer_device()):
regularization_losses = (None if train_config.add_regularization_loss
else [])
total_loss, grads_and_vars = model_deploy.optimize_clones(
clones, training_optimizer, regularization_losses=None)
clones, training_optimizer,
regularization_losses=regularization_losses)
total_loss = tf.check_numerics(total_loss, 'LossTensor is inf or nan.')
# Optionally multiply bias gradients by train_config.bias_grad_multiplier.
......@@ -285,11 +306,14 @@ def train(create_tensor_dict_fn, create_model_fn, train_config, master, task,
grad_updates = training_optimizer.apply_gradients(grads_and_vars,
global_step=global_step)
update_ops.append(grad_updates)
update_op = tf.group(*update_ops)
update_op = tf.group(*update_ops, name='update_barrier')
with tf.control_dependencies([update_op]):
train_tensor = tf.identity(total_loss, name='train_op')
if graph_hook_fn:
with tf.device(deploy_config.variables_device()):
graph_hook_fn()
# Add summaries.
for model_var in slim.get_model_variables():
global_summaries.add(tf.summary.histogram(model_var.op.name, model_var))
......
......@@ -51,10 +51,8 @@ class FakeDetectionModel(model.DetectionModel):
def __init__(self):
super(FakeDetectionModel, self).__init__(num_classes=NUMBER_OF_CLASSES)
self._classification_loss = losses.WeightedSigmoidClassificationLoss(
anchorwise_output=True)
self._localization_loss = losses.WeightedSmoothL1LocalizationLoss(
anchorwise_output=True)
self._classification_loss = losses.WeightedSigmoidClassificationLoss()
self._localization_loss = losses.WeightedSmoothL1LocalizationLoss()
def preprocess(self, inputs):
"""Input preprocessing, resizes images to 28x28.
......@@ -65,14 +63,24 @@ class FakeDetectionModel(model.DetectionModel):
Returns:
preprocessed_inputs: a [batch, 28, 28, channels] float32 tensor.
true_image_shapes: int32 tensor of shape [batch, 3] where each row is
of the form [height, width, channels] indicating the shapes
of true images in the resized images, as resized images can be padded
with zeros.
"""
return tf.image.resize_images(inputs, [28, 28])
true_image_shapes = [inputs.shape[:-1].as_list()
for _ in range(inputs.shape[-1])]
return tf.image.resize_images(inputs, [28, 28]), true_image_shapes
def predict(self, preprocessed_inputs):
def predict(self, preprocessed_inputs, true_image_shapes):
"""Prediction tensors from inputs tensor.
Args:
preprocessed_inputs: a [batch, 28, 28, channels] float32 tensor.
true_image_shapes: int32 tensor of shape [batch, 3] where each row is
of the form [height, width, channels] indicating the shapes
of true images in the resized images, as resized images can be padded
with zeros.
Returns:
prediction_dict: a dictionary holding prediction tensors to be
......@@ -89,11 +97,15 @@ class FakeDetectionModel(model.DetectionModel):
'box_encodings': tf.reshape(box_prediction, [-1, 1, 4])
}
def postprocess(self, prediction_dict, **params):
def postprocess(self, prediction_dict, true_image_shapes, **params):
"""Convert predicted output tensors to final detections. Unused.
Args:
prediction_dict: a dictionary holding prediction tensors.
true_image_shapes: int32 tensor of shape [batch, 3] where each row is
of the form [height, width, channels] indicating the shapes
of true images in the resized images, as resized images can be padded
with zeros.
**params: Additional keyword arguments for specific implementations of
DetectionModel.
......@@ -107,7 +119,7 @@ class FakeDetectionModel(model.DetectionModel):
'num_detections': None
}
def loss(self, prediction_dict):
def loss(self, prediction_dict, true_image_shapes):
"""Compute scalar loss tensors with respect to provided groundtruth.
Calling this function requires that groundtruth tensors have been
......@@ -115,6 +127,10 @@ class FakeDetectionModel(model.DetectionModel):
Args:
prediction_dict: a dictionary holding predicted tensors
true_image_shapes: int32 tensor of shape [batch, 3] where each row is
of the form [height, width, channels] indicating the shapes
of true images in the resized images, as resized images can be padded
with zeros.
Returns:
a dictionary mapping strings (loss names) to scalar tensors representing
......
......@@ -8,6 +8,12 @@ licenses(["notice"])
# Apache 2.0
py_library(
name = "test_case",
srcs = ["test_case.py"],
deps = ["//tensorflow"],
)
py_library(
name = "category_util",
srcs = ["category_util.py"],
......@@ -18,12 +24,13 @@ py_library(
name = "config_util",
srcs = ["config_util.py"],
deps = [
"//pyglib/logging",
"//tensorflow",
"//tensorflow_models/object_detection/protos:eval_py_pb2",
"//tensorflow_models/object_detection/protos:input_reader_py_pb2",
"//tensorflow_models/object_detection/protos:model_py_pb2",
"//tensorflow_models/object_detection/protos:pipeline_py_pb2",
"//tensorflow_models/object_detection/protos:train_py_pb2",
"//tensorflow/models/research/object_detection/protos:eval_py_pb2",
"//tensorflow/models/research/object_detection/protos:input_reader_py_pb2",
"//tensorflow/models/research/object_detection/protos:model_py_pb2",
"//tensorflow/models/research/object_detection/protos:pipeline_py_pb2",
"//tensorflow/models/research/object_detection/protos:train_py_pb2",
],
)
......@@ -35,13 +42,28 @@ py_library(
],
)
py_library(
name = "json_utils",
srcs = ["json_utils.py"],
deps = [],
)
py_test(
name = "json_utils_test",
srcs = ["json_utils_test.py"],
deps = [
":json_utils",
"//tensorflow",
],
)
py_library(
name = "label_map_util",
srcs = ["label_map_util.py"],
deps = [
"//third_party/py/google/protobuf",
"//google/protobuf",
"//tensorflow",
"//tensorflow_models/object_detection/protos:string_int_label_map_py_pb2",
"//tensorflow/models/research/object_detection/protos:string_int_label_map_py_pb2",
],
)
......@@ -56,13 +78,22 @@ py_library(
py_library(
name = "metrics",
srcs = ["metrics.py"],
deps = ["//third_party/py/numpy"],
deps = ["//numpy"],
)
py_library(
name = "np_box_list",
srcs = ["np_box_list.py"],
deps = ["//tensorflow"],
deps = ["//numpy"],
)
py_library(
name = "np_box_mask_list",
srcs = ["np_box_mask_list.py"],
deps = [
":np_box_list",
"//numpy",
],
)
py_library(
......@@ -71,7 +102,18 @@ py_library(
deps = [
":np_box_list",
":np_box_ops",
"//tensorflow",
"//numpy",
],
)
py_library(
name = "np_box_mask_list_ops",
srcs = ["np_box_mask_list_ops.py"],
deps = [
":np_box_list_ops",
":np_box_mask_list",
":np_mask_ops",
"//numpy",
],
)
......@@ -81,6 +123,12 @@ py_library(
deps = ["//tensorflow"],
)
py_library(
name = "np_mask_ops",
srcs = ["np_mask_ops.py"],
deps = ["//numpy"],
)
py_library(
name = "object_detection_evaluation",
srcs = ["object_detection_evaluation.py"],
......@@ -89,7 +137,7 @@ py_library(
":metrics",
":per_image_evaluation",
"//tensorflow",
"//tensorflow_models/object_detection/core:standard_fields",
"//tensorflow/models/research/object_detection/core:standard_fields",
],
)
......@@ -97,11 +145,12 @@ py_library(
name = "ops",
srcs = ["ops.py"],
deps = [
":shape_utils",
":static_shape",
"//tensorflow",
"//tensorflow_models/object_detection/core:box_list",
"//tensorflow_models/object_detection/core:box_list_ops",
"//tensorflow_models/object_detection/core:standard_fields",
"//tensorflow/models/research/object_detection/core:box_list",
"//tensorflow/models/research/object_detection/core:box_list_ops",
"//tensorflow/models/research/object_detection/core:standard_fields",
],
)
......@@ -111,6 +160,8 @@ py_library(
deps = [
":np_box_list",
":np_box_list_ops",
":np_box_mask_list",
":np_box_mask_list_ops",
"//tensorflow",
],
)
......@@ -118,7 +169,10 @@ py_library(
py_library(
name = "shape_utils",
srcs = ["shape_utils.py"],
deps = ["//tensorflow"],
deps = [
":static_shape",
"//tensorflow",
],
)
py_library(
......@@ -132,12 +186,12 @@ py_library(
srcs = ["test_utils.py"],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection/core:anchor_generator",
"//tensorflow_models/object_detection/core:box_coder",
"//tensorflow_models/object_detection/core:box_list",
"//tensorflow_models/object_detection/core:box_predictor",
"//tensorflow_models/object_detection/core:matcher",
"//tensorflow_models/object_detection/utils:shape_utils",
"//tensorflow/models/research/object_detection/core:anchor_generator",
"//tensorflow/models/research/object_detection/core:box_coder",
"//tensorflow/models/research/object_detection/core:box_list",
"//tensorflow/models/research/object_detection/core:box_predictor",
"//tensorflow/models/research/object_detection/core:matcher",
"//tensorflow/models/research/object_detection/utils:shape_utils",
],
)
......@@ -153,10 +207,12 @@ py_library(
name = "visualization_utils",
srcs = ["visualization_utils.py"],
deps = [
"//third_party/py/PIL:pil",
"//third_party/py/matplotlib",
"//third_party/py/six",
"//PIL:pil",
"//Tkinter", # buildcleaner: keep
"//matplotlib",
"//six",
"//tensorflow",
"//tensorflow/models/research/object_detection/core:standard_fields",
],
)
......@@ -174,11 +230,11 @@ py_test(
srcs = ["config_util_test.py"],
deps = [
":config_util",
"//tensorflow:tensorflow_google",
"//tensorflow_models/object_detection/protos:input_reader_py_pb2",
"//tensorflow_models/object_detection/protos:model_py_pb2",
"//tensorflow_models/object_detection/protos:pipeline_py_pb2",
"//tensorflow_models/object_detection/protos:train_py_pb2",
"//tensorflow",
"//tensorflow/models/research/object_detection/protos:input_reader_py_pb2",
"//tensorflow/models/research/object_detection/protos:model_py_pb2",
"//tensorflow/models/research/object_detection/protos:pipeline_py_pb2",
"//tensorflow/models/research/object_detection/protos:train_py_pb2",
],
)
......@@ -188,6 +244,7 @@ py_test(
deps = [
":dataset_util",
"//tensorflow",
"//tensorflow/models/research/object_detection/protos:input_reader_py_pb2",
],
)
......@@ -223,6 +280,17 @@ py_test(
srcs = ["np_box_list_test.py"],
deps = [
":np_box_list",
"//numpy",
"//tensorflow",
],
)
py_test(
name = "np_box_mask_list_test",
srcs = ["np_box_mask_list_test.py"],
deps = [
":np_box_mask_list",
"//numpy",
"//tensorflow",
],
)
......@@ -233,6 +301,18 @@ py_test(
deps = [
":np_box_list",
":np_box_list_ops",
"//numpy",
"//tensorflow",
],
)
py_test(
name = "np_box_mask_list_ops_test",
srcs = ["np_box_mask_list_ops_test.py"],
deps = [
":np_box_mask_list",
":np_box_mask_list_ops",
"//numpy",
"//tensorflow",
],
)
......@@ -246,13 +326,22 @@ py_test(
],
)
py_test(
name = "np_mask_ops_test",
srcs = ["np_mask_ops_test.py"],
deps = [
":np_mask_ops",
"//tensorflow",
],
)
py_test(
name = "object_detection_evaluation_test",
srcs = ["object_detection_evaluation_test.py"],
deps = [
":object_detection_evaluation",
"//tensorflow",
"//tensorflow_models/object_detection/core:standard_fields",
"//tensorflow/models/research/object_detection/core:standard_fields",
],
)
......@@ -261,8 +350,9 @@ py_test(
srcs = ["ops_test.py"],
deps = [
":ops",
":test_case",
"//tensorflow",
"//tensorflow_models/object_detection/core:standard_fields",
"//tensorflow/models/research/object_detection/core:standard_fields",
],
)
......@@ -280,6 +370,7 @@ py_test(
srcs = ["shape_utils_test.py"],
deps = [
":shape_utils",
"//numpy",
"//tensorflow",
],
)
......@@ -315,10 +406,11 @@ py_test(
name = "visualization_utils_test",
srcs = ["visualization_utils_test.py"],
data = [
"//tensorflow_models/object_detection/test_images:image1.jpg",
"//tensorflow/models/research/object_detection/test_images:image1.jpg",
],
deps = [
":visualization_utils",
"//third_party/py/PIL:pil",
"//pyglib/flags",
"//PIL:pil",
],
)
......@@ -228,6 +228,9 @@ def merge_external_params_with_configs(configs, hparams=None, **kwargs):
if value:
_update_label_map_path(configs, value)
tf.logging.info("Overwriting label map path: %s", value)
if key == "mask_type":
_update_mask_type(configs, value)
tf.logging.info("Overwritten mask type: %s", value)
return configs
......@@ -450,3 +453,18 @@ def _update_label_map_path(configs, label_map_path):
"""
configs["train_input_config"].label_map_path = label_map_path
configs["eval_input_config"].label_map_path = label_map_path
def _update_mask_type(configs, mask_type):
"""Updates the mask type for both train and eval input readers.
The configs dictionary is updated in place, and hence not returned.
Args:
configs: Dictionary of configuration objects. See outputs from
get_configs_from_pipeline_file() or get_configs_from_multiple_files().
mask_type: A string name representing a value of
input_reader_pb2.InstanceMaskType
"""
configs["train_input_config"].mask_type = mask_type
configs["eval_input_config"].mask_type = mask_type
......@@ -16,8 +16,7 @@
import os
import google3
import tensorflow.google as tf
import tensorflow as tf
from google.protobuf import text_format
......@@ -154,7 +153,7 @@ class ConfigUtilTest(tf.test.TestCase):
"""Asserts successful updating of all learning rate schemes."""
original_learning_rate = 0.7
learning_rate_scaling = 0.1
hparams = tf.HParams(learning_rate=0.15)
hparams = tf.contrib.training.HParams(learning_rate=0.15)
pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config")
# Constant learning rate.
......@@ -216,7 +215,7 @@ class ConfigUtilTest(tf.test.TestCase):
def testNewBatchSize(self):
"""Tests that batch size is updated appropriately."""
original_batch_size = 2
hparams = tf.HParams(batch_size=16)
hparams = tf.contrib.training.HParams(batch_size=16)
pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config")
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
......@@ -231,7 +230,7 @@ class ConfigUtilTest(tf.test.TestCase):
def testNewBatchSizeWithClipping(self):
"""Tests that batch size is clipped to 1 from below."""
original_batch_size = 2
hparams = tf.HParams(batch_size=0.5)
hparams = tf.contrib.training.HParams(batch_size=0.5)
pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config")
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
......@@ -246,7 +245,7 @@ class ConfigUtilTest(tf.test.TestCase):
def testNewMomentumOptimizerValue(self):
"""Tests that new momentum value is updated appropriately."""
original_momentum_value = 0.4
hparams = tf.HParams(momentum_optimizer_value=1.1)
hparams = tf.contrib.training.HParams(momentum_optimizer_value=1.1)
pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config")
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
......@@ -265,7 +264,7 @@ class ConfigUtilTest(tf.test.TestCase):
original_localization_weight = 0.1
original_classification_weight = 0.2
new_weight_ratio = 5.0
hparams = tf.HParams(
hparams = tf.contrib.training.HParams(
classification_localization_weight_ratio=new_weight_ratio)
pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config")
......@@ -288,7 +287,8 @@ class ConfigUtilTest(tf.test.TestCase):
original_gamma = 1.0
new_alpha = 0.3
new_gamma = 2.0
hparams = tf.HParams(focal_loss_alpha=new_alpha, focal_loss_gamma=new_gamma)
hparams = tf.contrib.training.HParams(
focal_loss_alpha=new_alpha, focal_loss_gamma=new_gamma)
pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config")
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
......@@ -396,6 +396,25 @@ class ConfigUtilTest(tf.test.TestCase):
self.assertEqual(new_label_map_path,
configs["eval_input_config"].label_map_path)
def testNewMaskType(self):
"""Tests that mask type can be overwritten in input readers."""
original_mask_type = input_reader_pb2.NUMERICAL_MASKS
new_mask_type = input_reader_pb2.PNG_MASKS
pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config")
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
train_input_reader = pipeline_config.train_input_reader
train_input_reader.mask_type = original_mask_type
eval_input_reader = pipeline_config.eval_input_reader
eval_input_reader.mask_type = original_mask_type
_write_config(pipeline_config, pipeline_config_path)
configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
configs = config_util.merge_external_params_with_configs(
configs, mask_type=new_mask_type)
self.assertEqual(new_mask_type, configs["train_input_config"].mask_type)
self.assertEqual(new_mask_type, configs["eval_input_config"].mask_type)
if __name__ == "__main__":
tf.test.main()
......@@ -84,3 +84,64 @@ def recursive_parse_xml_to_dict(xml):
result[child.tag] = []
result[child.tag].append(child_result[child.tag])
return {xml.tag: result}
def make_initializable_iterator(dataset):
"""Creates an iterator, and initializes tables.
This is useful in cases where make_one_shot_iterator wouldn't work because
the graph contains a hash table that needs to be initialized.
Args:
dataset: A `tf.data.Dataset` object.
Returns:
A `tf.data.Iterator`.
"""
iterator = dataset.make_initializable_iterator()
tf.add_to_collection(tf.GraphKeys.TABLE_INITIALIZERS, iterator.initializer)
return iterator
def read_dataset(
file_read_func, decode_func, input_files, config, num_workers=1,
worker_index=0):
"""Reads a dataset, and handles repetition and shuffling.
Args:
file_read_func: Function to use in tf.data.Dataset.interleave, to read
every individual file into a tf.data.Dataset.
decode_func: Function to apply to all records.
input_files: A list of file paths to read.
config: A input_reader_builder.InputReader object.
num_workers: Number of workers / shards.
worker_index: Id for the current worker.
Returns:
A tf.data.Dataset based on config.
"""
# Shard, shuffle, and read files.
filenames = tf.concat([tf.matching_files(pattern) for pattern in input_files],
0)
dataset = tf.data.Dataset.from_tensor_slices(filenames)
dataset = dataset.shard(num_workers, worker_index)
dataset = dataset.repeat(config.num_epochs or None)
if config.shuffle:
dataset = dataset.shuffle(config.filenames_shuffle_buffer_size,
reshuffle_each_iteration=True)
# Read file records and shuffle them.
# If cycle_length is larger than the number of files, more than one reader
# will be assigned to the same file, leading to repetition.
cycle_length = tf.cast(
tf.minimum(config.num_readers, tf.size(filenames)), tf.int64)
# TODO: find the optimal block_length.
dataset = dataset.interleave(
file_read_func, cycle_length=cycle_length, block_length=1)
if config.shuffle:
dataset = dataset.shuffle(config.shuffle_buffer_size,
reshuffle_each_iteration=True)
dataset = dataset.map(decode_func, num_parallel_calls=config.num_readers)
return dataset.prefetch(config.prefetch_buffer_size)
......@@ -18,11 +18,29 @@
import os
import tensorflow as tf
from object_detection.protos import input_reader_pb2
from object_detection.utils import dataset_util
class DatasetUtilTest(tf.test.TestCase):
def setUp(self):
self._path_template = os.path.join(self.get_temp_dir(), 'examples_%s.txt')
for i in range(5):
path = self._path_template % i
with tf.gfile.Open(path, 'wb') as f:
f.write('\n'.join([str(i + 1), str((i + 1) * 10)]))
def _get_dataset_next(self, files, config, batch_size):
def decode_func(value):
return [tf.string_to_number(value, out_type=tf.int32)]
dataset = dataset_util.read_dataset(
tf.data.TextLineDataset, decode_func, files, config)
dataset = dataset.batch(batch_size)
return dataset.make_one_shot_iterator().get_next()
def test_read_examples_list(self):
example_list_data = """example1 1\nexample2 2"""
example_list_path = os.path.join(self.get_temp_dir(), 'examples.txt')
......@@ -32,6 +50,47 @@ class DatasetUtilTest(tf.test.TestCase):
examples = dataset_util.read_examples_list(example_list_path)
self.assertListEqual(['example1', 'example2'], examples)
def test_make_initializable_iterator_with_hashTable(self):
keys = [1, 0, -1]
dataset = tf.data.Dataset.from_tensor_slices([[1, 2, -1, 5]])
table = tf.contrib.lookup.HashTable(
initializer=tf.contrib.lookup.KeyValueTensorInitializer(
keys=keys,
values=list(reversed(keys))),
default_value=100)
dataset = dataset.map(table.lookup)
data = dataset_util.make_initializable_iterator(dataset).get_next()
init = tf.tables_initializer()
with self.test_session() as sess:
sess.run(init)
self.assertAllEqual(sess.run(data), [-1, 100, 1, 100])
def test_read_dataset(self):
config = input_reader_pb2.InputReader()
config.num_readers = 1
config.shuffle = False
data = self._get_dataset_next([self._path_template % '*'], config,
batch_size=20)
with self.test_session() as sess:
self.assertAllEqual(sess.run(data),
[[1, 10, 2, 20, 3, 30, 4, 40, 5, 50, 1, 10, 2, 20, 3,
30, 4, 40, 5, 50]])
def test_read_dataset_single_epoch(self):
config = input_reader_pb2.InputReader()
config.num_epochs = 1
config.num_readers = 1
config.shuffle = False
data = self._get_dataset_next([self._path_template % '0'], config,
batch_size=30)
with self.test_session() as sess:
# First batch will retrieve as much as it can, second batch will fail.
self.assertAllEqual(sess.run(data), [[1, 10]])
self.assertRaises(tf.errors.OutOfRangeError, sess.run, data)
if __name__ == '__main__':
tf.test.main()
"""Utilities for dealing with writing json strings.
json_utils wraps json.dump and json.dumps so that they can be used to safely
control the precision of floats when writing to json strings or files.
"""
import json
from json import encoder
def Dump(obj, fid, float_digits=-1, **params):
"""Wrapper of json.dump that allows specifying the float precision used.
Args:
obj: The object to dump.
fid: The file id to write to.
float_digits: The number of digits of precision when writing floats out.
**params: Additional parameters to pass to json.dumps.
"""
original_encoder = encoder.FLOAT_REPR
if float_digits >= 0:
encoder.FLOAT_REPR = lambda o: format(o, '.%df' % float_digits)
try:
json.dump(obj, fid, **params)
finally:
encoder.FLOAT_REPR = original_encoder
def Dumps(obj, float_digits=-1, **params):
"""Wrapper of json.dumps that allows specifying the float precision used.
Args:
obj: The object to dump.
float_digits: The number of digits of precision when writing floats out.
**params: Additional parameters to pass to json.dumps.
Returns:
output: JSON string representation of obj.
"""
original_encoder = encoder.FLOAT_REPR
original_c_make_encoder = encoder.c_make_encoder
if float_digits >= 0:
encoder.FLOAT_REPR = lambda o: format(o, '.%df' % float_digits)
encoder.c_make_encoder = None
try:
output = json.dumps(obj, **params)
finally:
encoder.FLOAT_REPR = original_encoder
encoder.c_make_encoder = original_c_make_encoder
return output
def PrettyParams(**params):
"""Returns parameters for use with Dump and Dumps to output pretty json.
Example usage:
```json_str = json_utils.Dumps(obj, **json_utils.PrettyParams())```
```json_str = json_utils.Dumps(
obj, **json_utils.PrettyParams(allow_nans=False))```
Args:
**params: Additional params to pass to json.dump or json.dumps.
Returns:
params: Parameters that are compatible with json_utils.Dump and
json_utils.Dumps.
"""
params['float_digits'] = 4
params['sort_keys'] = True
params['indent'] = 2
params['separators'] = (',', ': ')
return params
"""Tests for google3.image.understanding.object_detection.utils.json_utils."""
import os
import tensorflow as tf
from object_detection.utils import json_utils
class JsonUtilsTest(tf.test.TestCase):
def testDumpReasonablePrecision(self):
output_path = os.path.join(tf.test.get_temp_dir(), 'test.json')
with tf.gfile.GFile(output_path, 'w') as f:
json_utils.Dump(1.0, f, float_digits=2)
with tf.gfile.GFile(output_path, 'r') as f:
self.assertEqual(f.read(), '1.00')
def testDumpPassExtraParams(self):
output_path = os.path.join(tf.test.get_temp_dir(), 'test.json')
with tf.gfile.GFile(output_path, 'w') as f:
json_utils.Dump([1.0], f, float_digits=2, indent=3)
with tf.gfile.GFile(output_path, 'r') as f:
self.assertEqual(f.read(), '[\n 1.00\n]')
def testDumpZeroPrecision(self):
output_path = os.path.join(tf.test.get_temp_dir(), 'test.json')
with tf.gfile.GFile(output_path, 'w') as f:
json_utils.Dump(1.0, f, float_digits=0, indent=3)
with tf.gfile.GFile(output_path, 'r') as f:
self.assertEqual(f.read(), '1')
def testDumpUnspecifiedPrecision(self):
output_path = os.path.join(tf.test.get_temp_dir(), 'test.json')
with tf.gfile.GFile(output_path, 'w') as f:
json_utils.Dump(1.012345, f)
with tf.gfile.GFile(output_path, 'r') as f:
self.assertEqual(f.read(), '1.012345')
def testDumpsReasonablePrecision(self):
s = json_utils.Dumps(1.0, float_digits=2)
self.assertEqual(s, '1.00')
def testDumpsPassExtraParams(self):
s = json_utils.Dumps([1.0], float_digits=2, indent=3)
self.assertEqual(s, '[\n 1.00\n]')
def testDumpsZeroPrecision(self):
s = json_utils.Dumps(1.0, float_digits=0)
self.assertEqual(s, '1')
def testDumpsUnspecifiedPrecision(self):
s = json_utils.Dumps(1.012345)
self.assertEqual(s, '1.012345')
def testPrettyParams(self):
s = json_utils.Dumps({'v': 1.012345, 'n': 2}, **json_utils.PrettyParams())
self.assertEqual(s, '{\n "n": 2,\n "v": 1.0123\n}')
def testPrettyParamsExtraParamsInside(self):
s = json_utils.Dumps(
{'v': 1.012345,
'n': float('nan')}, **json_utils.PrettyParams(allow_nan=True))
self.assertEqual(s, '{\n "n": NaN,\n "v": 1.0123\n}')
with self.assertRaises(ValueError):
s = json_utils.Dumps(
{'v': 1.012345,
'n': float('nan')}, **json_utils.PrettyParams(allow_nan=False))
def testPrettyParamsExtraParamsOutside(self):
s = json_utils.Dumps(
{'v': 1.012345,
'n': float('nan')}, allow_nan=True, **json_utils.PrettyParams())
self.assertEqual(s, '{\n "n": NaN,\n "v": 1.0123\n}')
with self.assertRaises(ValueError):
s = json_utils.Dumps(
{'v': 1.012345,
'n': float('nan')}, allow_nan=False, **json_utils.PrettyParams())
if __name__ == '__main__':
tf.test.main()
......@@ -55,6 +55,18 @@ def create_category_index(categories):
return category_index
def get_max_label_map_index(label_map):
"""Get maximum index in label map.
Args:
label_map: a StringIntLabelMapProto
Returns:
an integer
"""
return max([item.id for item in label_map.item])
def convert_label_map_to_categories(label_map,
max_num_classes,
use_display_name=True):
......
......@@ -170,6 +170,12 @@ class LabelMapUtilTest(tf.test.TestCase):
}]
self.assertListEqual(expected_categories_list, cat_no_offset)
def test_get_max_label_map_index(self):
num_classes = 4
label_map_proto = self._generate_label_map(num_classes=num_classes)
max_index = label_map_util.get_max_label_map_index(label_map_proto)
self.assertEqual(num_classes, max_index)
def test_create_category_index(self):
categories = [{'name': u'1', 'id': 1}, {'name': u'2', 'id': 2}]
category_index = label_map_util.create_category_index(categories)
......
......@@ -21,7 +21,6 @@ Example box operations that are supported:
"""
import numpy as np
from six.moves import xrange
from object_detection.utils import np_box_list
from object_detection.utils import np_box_ops
......@@ -97,7 +96,7 @@ def ioa(boxlist1, boxlist2):
def gather(boxlist, indices, fields=None):
"""Gather boxes from BoxList according to indices and return new BoxList.
By default, Gather returns boxes corresponding to the input index list, as
By default, gather returns boxes corresponding to the input index list, as
well as all additional fields stored in the boxlist (indexing into the
first dimension). However one can optionally only gather from a
subset of fields.
......
......@@ -100,16 +100,16 @@ class AddExtraFieldTest(tf.test.TestCase):
def test_get_extra_fields(self):
boxlist = self.boxlist
self.assertSameElements(boxlist.get_extra_fields(), [])
self.assertItemsEqual(boxlist.get_extra_fields(), [])
scores = np.array([0.5, 0.7, 0.9], dtype=float)
boxlist.add_field('scores', scores)
self.assertSameElements(boxlist.get_extra_fields(), ['scores'])
self.assertItemsEqual(boxlist.get_extra_fields(), ['scores'])
labels = np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0], [0, 0, 0, 0, 1]],
dtype=int)
boxlist.add_field('labels', labels)
self.assertSameElements(boxlist.get_extra_fields(), ['scores', 'labels'])
self.assertItemsEqual(boxlist.get_extra_fields(), ['scores', 'labels'])
def test_get_coordinates(self):
y_min, x_min, y_max, x_max = self.boxlist.get_coordinates()
......
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Numpy BoxMaskList classes and functions."""
import numpy as np
from object_detection.utils import np_box_list
class BoxMaskList(np_box_list.BoxList):
"""Convenience wrapper for BoxList with masks.
BoxMaskList extends the np_box_list.BoxList to contain masks as well.
In particular, its constructor receives both boxes and masks. Note that the
masks correspond to the full image.
"""
def __init__(self, box_data, mask_data):
"""Constructs box collection.
Args:
box_data: a numpy array of shape [N, 4] representing box coordinates
mask_data: a numpy array of shape [N, height, width] representing masks
with values are in {0,1}. The masks correspond to the full
image. The height and the width will be equal to image height and width.
Raises:
ValueError: if bbox data is not a numpy array
ValueError: if invalid dimensions for bbox data
ValueError: if mask data is not a numpy array
ValueError: if invalid dimension for mask data
"""
super(BoxMaskList, self).__init__(box_data)
if not isinstance(mask_data, np.ndarray):
raise ValueError('Mask data must be a numpy array.')
if len(mask_data.shape) != 3:
raise ValueError('Invalid dimensions for mask data.')
if mask_data.dtype != np.uint8:
raise ValueError('Invalid data type for mask data: uint8 is required.')
if mask_data.shape[0] != box_data.shape[0]:
raise ValueError('There should be the same number of boxes and masks.')
self.data['masks'] = mask_data
def get_masks(self):
"""Convenience function for accessing masks.
Returns:
a numpy array of shape [N, height, width] representing masks
"""
return self.get_field('masks')
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Operations for np_box_mask_list.BoxMaskList.
Example box operations that are supported:
* Areas: compute bounding box areas
* IOU: pairwise intersection-over-union scores
"""
import numpy as np
from object_detection.utils import np_box_list_ops
from object_detection.utils import np_box_mask_list
from object_detection.utils import np_mask_ops
def box_list_to_box_mask_list(boxlist):
"""Converts a BoxList containing 'masks' into a BoxMaskList.
Args:
boxlist: An np_box_list.BoxList object.
Returns:
An np_box_mask_list.BoxMaskList object.
Raises:
ValueError: If boxlist does not contain `masks` as a field.
"""
if not boxlist.has_field('masks'):
raise ValueError('boxlist does not contain mask field.')
box_mask_list = np_box_mask_list.BoxMaskList(
box_data=boxlist.get(),
mask_data=boxlist.get_field('masks'))
extra_fields = boxlist.get_extra_fields()
for key in extra_fields:
if key != 'masks':
box_mask_list.data[key] = boxlist.get_field(key)
return box_mask_list
def area(box_mask_list):
"""Computes area of masks.
Args:
box_mask_list: np_box_mask_list.BoxMaskList holding N boxes and masks
Returns:
a numpy array with shape [N*1] representing mask areas
"""
return np_mask_ops.area(box_mask_list.get_masks())
def intersection(box_mask_list1, box_mask_list2):
"""Compute pairwise intersection areas between masks.
Args:
box_mask_list1: BoxMaskList holding N boxes and masks
box_mask_list2: BoxMaskList holding M boxes and masks
Returns:
a numpy array with shape [N*M] representing pairwise intersection area
"""
return np_mask_ops.intersection(box_mask_list1.get_masks(),
box_mask_list2.get_masks())
def iou(box_mask_list1, box_mask_list2):
"""Computes pairwise intersection-over-union between box and mask collections.
Args:
box_mask_list1: BoxMaskList holding N boxes and masks
box_mask_list2: BoxMaskList holding M boxes and masks
Returns:
a numpy array with shape [N, M] representing pairwise iou scores.
"""
return np_mask_ops.iou(box_mask_list1.get_masks(),
box_mask_list2.get_masks())
def ioa(box_mask_list1, box_mask_list2):
"""Computes pairwise intersection-over-area between box and mask collections.
Intersection-over-area (ioa) between two masks mask1 and mask2 is defined as
their intersection area over mask2's area. Note that ioa is not symmetric,
that is, IOA(mask1, mask2) != IOA(mask2, mask1).
Args:
box_mask_list1: np_box_mask_list.BoxMaskList holding N boxes and masks
box_mask_list2: np_box_mask_list.BoxMaskList holding M boxes and masks
Returns:
a numpy array with shape [N, M] representing pairwise ioa scores.
"""
return np_mask_ops.ioa(box_mask_list1.get_masks(), box_mask_list2.get_masks())
def gather(box_mask_list, indices, fields=None):
"""Gather boxes from np_box_mask_list.BoxMaskList according to indices.
By default, gather returns boxes corresponding to the input index list, as
well as all additional fields stored in the box_mask_list (indexing into the
first dimension). However one can optionally only gather from a
subset of fields.
Args:
box_mask_list: np_box_mask_list.BoxMaskList holding N boxes
indices: a 1-d numpy array of type int_
fields: (optional) list of fields to also gather from. If None (default),
all fields are gathered from. Pass an empty fields list to only gather
the box coordinates.
Returns:
subbox_mask_list: a np_box_mask_list.BoxMaskList corresponding to the subset
of the input box_mask_list specified by indices
Raises:
ValueError: if specified field is not contained in box_mask_list or if the
indices are not of type int_
"""
if fields is not None:
if 'masks' not in fields:
fields.append('masks')
return box_list_to_box_mask_list(
np_box_list_ops.gather(
boxlist=box_mask_list, indices=indices, fields=fields))
def sort_by_field(box_mask_list, field,
order=np_box_list_ops.SortOrder.DESCEND):
"""Sort boxes and associated fields according to a scalar field.
A common use case is reordering the boxes according to descending scores.
Args:
box_mask_list: BoxMaskList holding N boxes.
field: A BoxMaskList field for sorting and reordering the BoxMaskList.
order: (Optional) 'descend' or 'ascend'. Default is descend.
Returns:
sorted_box_mask_list: A sorted BoxMaskList with the field in the specified
order.
"""
return box_list_to_box_mask_list(
np_box_list_ops.sort_by_field(
boxlist=box_mask_list, field=field, order=order))
def non_max_suppression(box_mask_list,
max_output_size=10000,
iou_threshold=1.0,
score_threshold=-10.0):
"""Non maximum suppression.
This op greedily selects a subset of detection bounding boxes, pruning
away boxes that have high IOU (intersection over union) overlap (> thresh)
with already selected boxes. In each iteration, the detected bounding box with
highest score in the available pool is selected.
Args:
box_mask_list: np_box_mask_list.BoxMaskList holding N boxes. Must contain
a 'scores' field representing detection scores. All scores belong to the
same class.
max_output_size: maximum number of retained boxes
iou_threshold: intersection over union threshold.
score_threshold: minimum score threshold. Remove the boxes with scores
less than this value. Default value is set to -10. A very
low threshold to pass pretty much all the boxes, unless
the user sets a different score threshold.
Returns:
an np_box_mask_list.BoxMaskList holding M boxes where M <= max_output_size
Raises:
ValueError: if 'scores' field does not exist
ValueError: if threshold is not in [0, 1]
ValueError: if max_output_size < 0
"""
if not box_mask_list.has_field('scores'):
raise ValueError('Field scores does not exist')
if iou_threshold < 0. or iou_threshold > 1.0:
raise ValueError('IOU threshold must be in [0, 1]')
if max_output_size < 0:
raise ValueError('max_output_size must be bigger than 0.')
box_mask_list = filter_scores_greater_than(box_mask_list, score_threshold)
if box_mask_list.num_boxes() == 0:
return box_mask_list
box_mask_list = sort_by_field(box_mask_list, 'scores')
# Prevent further computation if NMS is disabled.
if iou_threshold == 1.0:
if box_mask_list.num_boxes() > max_output_size:
selected_indices = np.arange(max_output_size)
return gather(box_mask_list, selected_indices)
else:
return box_mask_list
masks = box_mask_list.get_masks()
num_masks = box_mask_list.num_boxes()
# is_index_valid is True only for all remaining valid boxes,
is_index_valid = np.full(num_masks, 1, dtype=bool)
selected_indices = []
num_output = 0
for i in xrange(num_masks):
if num_output < max_output_size:
if is_index_valid[i]:
num_output += 1
selected_indices.append(i)
is_index_valid[i] = False
valid_indices = np.where(is_index_valid)[0]
if valid_indices.size == 0:
break
intersect_over_union = np_mask_ops.iou(
np.expand_dims(masks[i], axis=0), masks[valid_indices])
intersect_over_union = np.squeeze(intersect_over_union, axis=0)
is_index_valid[valid_indices] = np.logical_and(
is_index_valid[valid_indices],
intersect_over_union <= iou_threshold)
return gather(box_mask_list, np.array(selected_indices))
def multi_class_non_max_suppression(box_mask_list, score_thresh, iou_thresh,
max_output_size):
"""Multi-class version of non maximum suppression.
This op greedily selects a subset of detection bounding boxes, pruning
away boxes that have high IOU (intersection over union) overlap (> thresh)
with already selected boxes. It operates independently for each class for
which scores are provided (via the scores field of the input box_list),
pruning boxes with score less than a provided threshold prior to
applying NMS.
Args:
box_mask_list: np_box_mask_list.BoxMaskList holding N boxes. Must contain a
'scores' field representing detection scores. This scores field is a
tensor that can be 1 dimensional (in the case of a single class) or
2-dimensional, in which case we assume that it takes the
shape [num_boxes, num_classes]. We further assume that this rank is known
statically and that scores.shape[1] is also known (i.e., the number of
classes is fixed and known at graph construction time).
score_thresh: scalar threshold for score (low scoring boxes are removed).
iou_thresh: scalar threshold for IOU (boxes that that high IOU overlap
with previously selected boxes are removed).
max_output_size: maximum number of retained boxes per class.
Returns:
a box_mask_list holding M boxes with a rank-1 scores field representing
corresponding scores for each box with scores sorted in decreasing order
and a rank-1 classes field representing a class label for each box.
Raises:
ValueError: if iou_thresh is not in [0, 1] or if input box_mask_list does
not have a valid scores field.
"""
if not 0 <= iou_thresh <= 1.0:
raise ValueError('thresh must be between 0 and 1')
if not isinstance(box_mask_list, np_box_mask_list.BoxMaskList):
raise ValueError('box_mask_list must be a box_mask_list')
if not box_mask_list.has_field('scores'):
raise ValueError('input box_mask_list must have \'scores\' field')
scores = box_mask_list.get_field('scores')
if len(scores.shape) == 1:
scores = np.reshape(scores, [-1, 1])
elif len(scores.shape) == 2:
if scores.shape[1] is None:
raise ValueError('scores field must have statically defined second '
'dimension')
else:
raise ValueError('scores field must be of rank 1 or 2')
num_boxes = box_mask_list.num_boxes()
num_scores = scores.shape[0]
num_classes = scores.shape[1]
if num_boxes != num_scores:
raise ValueError('Incorrect scores field length: actual vs expected.')
selected_boxes_list = []
for class_idx in range(num_classes):
box_mask_list_and_class_scores = np_box_mask_list.BoxMaskList(
box_data=box_mask_list.get(),
mask_data=box_mask_list.get_masks())
class_scores = np.reshape(scores[0:num_scores, class_idx], [-1])
box_mask_list_and_class_scores.add_field('scores', class_scores)
box_mask_list_filt = filter_scores_greater_than(
box_mask_list_and_class_scores, score_thresh)
nms_result = non_max_suppression(
box_mask_list_filt,
max_output_size=max_output_size,
iou_threshold=iou_thresh,
score_threshold=score_thresh)
nms_result.add_field(
'classes',
np.zeros_like(nms_result.get_field('scores')) + class_idx)
selected_boxes_list.append(nms_result)
selected_boxes = np_box_list_ops.concatenate(selected_boxes_list)
sorted_boxes = np_box_list_ops.sort_by_field(selected_boxes, 'scores')
return box_list_to_box_mask_list(boxlist=sorted_boxes)
def prune_non_overlapping_masks(box_mask_list1, box_mask_list2, minoverlap=0.0):
"""Prunes the boxes in list1 that overlap less than thresh with list2.
For each mask in box_mask_list1, we want its IOA to be more than minoverlap
with at least one of the masks in box_mask_list2. If it does not, we remove
it. If the masks are not full size image, we do the pruning based on boxes.
Args:
box_mask_list1: np_box_mask_list.BoxMaskList holding N boxes and masks.
box_mask_list2: np_box_mask_list.BoxMaskList holding M boxes and masks.
minoverlap: Minimum required overlap between boxes, to count them as
overlapping.
Returns:
A pruned box_mask_list with size [N', 4].
"""
intersection_over_area = ioa(box_mask_list2, box_mask_list1) # [M, N] tensor
intersection_over_area = np.amax(intersection_over_area, axis=0) # [N] tensor
keep_bool = np.greater_equal(intersection_over_area, np.array(minoverlap))
keep_inds = np.nonzero(keep_bool)[0]
new_box_mask_list1 = gather(box_mask_list1, keep_inds)
return new_box_mask_list1
def concatenate(box_mask_lists, fields=None):
"""Concatenate list of box_mask_lists.
This op concatenates a list of input box_mask_lists into a larger
box_mask_list. It also
handles concatenation of box_mask_list fields as long as the field tensor
shapes are equal except for the first dimension.
Args:
box_mask_lists: list of np_box_mask_list.BoxMaskList objects
fields: optional list of fields to also concatenate. By default, all
fields from the first BoxMaskList in the list are included in the
concatenation.
Returns:
a box_mask_list with number of boxes equal to
sum([box_mask_list.num_boxes() for box_mask_list in box_mask_list])
Raises:
ValueError: if box_mask_lists is invalid (i.e., is not a list, is empty, or
contains non box_mask_list objects), or if requested fields are not
contained in all box_mask_lists
"""
if fields is not None:
if 'masks' not in fields:
fields.append('masks')
return box_list_to_box_mask_list(
np_box_list_ops.concatenate(boxlists=box_mask_lists, fields=fields))
def filter_scores_greater_than(box_mask_list, thresh):
"""Filter to keep only boxes and masks with score exceeding a given threshold.
This op keeps the collection of boxes and masks whose corresponding scores are
greater than the input threshold.
Args:
box_mask_list: BoxMaskList holding N boxes and masks. Must contain a
'scores' field representing detection scores.
thresh: scalar threshold
Returns:
a BoxMaskList holding M boxes and masks where M <= N
Raises:
ValueError: if box_mask_list not a np_box_mask_list.BoxMaskList object or
if it does not have a scores field
"""
if not isinstance(box_mask_list, np_box_mask_list.BoxMaskList):
raise ValueError('box_mask_list must be a BoxMaskList')
if not box_mask_list.has_field('scores'):
raise ValueError('input box_mask_list must have \'scores\' field')
scores = box_mask_list.get_field('scores')
if len(scores.shape) > 2:
raise ValueError('Scores should have rank 1 or 2')
if len(scores.shape) == 2 and scores.shape[1] != 1:
raise ValueError('Scores should have rank 1 or have shape '
'consistent with [None, 1]')
high_score_indices = np.reshape(np.where(np.greater(scores, thresh)),
[-1]).astype(np.int32)
return gather(box_mask_list, high_score_indices)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.utils.np_box_mask_list_ops."""
import numpy as np
import tensorflow as tf
from object_detection.utils import np_box_mask_list
from object_detection.utils import np_box_mask_list_ops
class AreaRelatedTest(tf.test.TestCase):
def setUp(self):
boxes1 = np.array([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]],
dtype=float)
masks1_0 = np.array([[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[1, 1, 1, 1, 0, 0, 0, 0],
[1, 1, 1, 1, 0, 0, 0, 0]],
dtype=np.uint8)
masks1_1 = np.array([[1, 1, 1, 1, 1, 1, 1, 1],
[1, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0]],
dtype=np.uint8)
masks1 = np.stack([masks1_0, masks1_1])
boxes2 = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0]],
dtype=float)
masks2_0 = np.array([[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[1, 1, 1, 1, 0, 0, 0, 0],
[1, 1, 1, 1, 0, 0, 0, 0]],
dtype=np.uint8)
masks2_1 = np.array([[1, 1, 1, 1, 1, 1, 1, 0],
[1, 1, 1, 1, 1, 0, 0, 0],
[1, 1, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0]],
dtype=np.uint8)
masks2_2 = np.array([[1, 1, 1, 1, 1, 0, 0, 0],
[1, 1, 1, 1, 1, 0, 0, 0],
[1, 1, 1, 1, 1, 0, 0, 0],
[1, 1, 1, 1, 1, 0, 0, 0],
[1, 1, 1, 1, 1, 0, 0, 0]],
dtype=np.uint8)
masks2 = np.stack([masks2_0, masks2_1, masks2_2])
self.box_mask_list1 = np_box_mask_list.BoxMaskList(
box_data=boxes1, mask_data=masks1)
self.box_mask_list2 = np_box_mask_list.BoxMaskList(
box_data=boxes2, mask_data=masks2)
def test_area(self):
areas = np_box_mask_list_ops.area(self.box_mask_list1)
expected_areas = np.array([8.0, 10.0], dtype=float)
self.assertAllClose(expected_areas, areas)
def test_intersection(self):
intersection = np_box_mask_list_ops.intersection(self.box_mask_list1,
self.box_mask_list2)
expected_intersection = np.array([[8.0, 0.0, 8.0], [0.0, 9.0, 7.0]],
dtype=float)
self.assertAllClose(intersection, expected_intersection)
def test_iou(self):
iou = np_box_mask_list_ops.iou(self.box_mask_list1, self.box_mask_list2)
expected_iou = np.array(
[[1.0, 0.0, 8.0 / 25.0], [0.0, 9.0 / 16.0, 7.0 / 28.0]], dtype=float)
self.assertAllClose(iou, expected_iou)
def test_ioa(self):
ioa21 = np_box_mask_list_ops.ioa(self.box_mask_list1, self.box_mask_list2)
expected_ioa21 = np.array([[1.0, 0.0, 8.0/25.0],
[0.0, 9.0/15.0, 7.0/25.0]],
dtype=np.float32)
self.assertAllClose(ioa21, expected_ioa21)
class NonMaximumSuppressionTest(tf.test.TestCase):
def setUp(self):
boxes1 = np.array(
[[4.0, 3.0, 7.0, 6.0], [5.0, 6.0, 10.0, 10.0]], dtype=float)
boxes2 = np.array(
[[3.0, 4.0, 6.0, 8.0], [5.0, 6.0, 10.0, 10.0], [1.0, 1.0, 10.0, 10.0]],
dtype=float)
masks1 = np.array(
[[[0, 1, 0], [1, 1, 0], [0, 0, 0]], [[0, 1, 1], [0, 1, 1], [0, 1, 1]]],
dtype=np.uint8)
masks2 = np.array(
[[[0, 1, 0], [1, 1, 1], [0, 0, 0]], [[0, 1, 0], [0, 0, 1], [0, 1, 1]],
[[0, 1, 1], [0, 1, 1], [0, 1, 1]]],
dtype=np.uint8)
self.boxes1 = boxes1
self.boxes2 = boxes2
self.masks1 = masks1
self.masks2 = masks2
def test_with_no_scores_field(self):
box_mask_list = np_box_mask_list.BoxMaskList(
box_data=self.boxes1, mask_data=self.masks1)
max_output_size = 3
iou_threshold = 0.5
with self.assertRaises(ValueError):
np_box_mask_list_ops.non_max_suppression(
box_mask_list, max_output_size, iou_threshold)
def test_nms_disabled_max_output_size_equals_one(self):
box_mask_list = np_box_mask_list.BoxMaskList(
box_data=self.boxes2, mask_data=self.masks2)
box_mask_list.add_field('scores',
np.array([.9, .75, .6], dtype=float))
max_output_size = 1
iou_threshold = 1. # No NMS
expected_boxes = np.array([[3.0, 4.0, 6.0, 8.0]], dtype=float)
expected_masks = np.array(
[[[0, 1, 0], [1, 1, 1], [0, 0, 0]]], dtype=np.uint8)
nms_box_mask_list = np_box_mask_list_ops.non_max_suppression(
box_mask_list, max_output_size, iou_threshold)
self.assertAllClose(nms_box_mask_list.get(), expected_boxes)
self.assertAllClose(nms_box_mask_list.get_masks(), expected_masks)
def test_multiclass_nms(self):
boxes = np.array(
[[0.2, 0.4, 0.8, 0.8], [0.4, 0.2, 0.8, 0.8], [0.6, 0.0, 1.0, 1.0]],
dtype=np.float32)
mask0 = np.array([[0, 0, 0, 0, 0],
[0, 0, 1, 1, 0],
[0, 0, 1, 1, 0],
[0, 0, 1, 1, 0],
[0, 0, 0, 0, 0]],
dtype=np.uint8)
mask1 = np.array([[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[0, 1, 1, 1, 0],
[0, 1, 1, 1, 0],
[0, 0, 0, 0, 0]],
dtype=np.uint8)
mask2 = np.array([[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[1, 1, 1, 1, 1],
[1, 1, 1, 1, 1]],
dtype=np.uint8)
masks = np.stack([mask0, mask1, mask2])
box_mask_list = np_box_mask_list.BoxMaskList(
box_data=boxes, mask_data=masks)
scores = np.array([[-0.2, 0.1, 0.5, -0.4, 0.3],
[0.7, -0.7, 0.6, 0.2, -0.9],
[0.4, 0.34, -0.9, 0.2, 0.31]],
dtype=np.float32)
box_mask_list.add_field('scores', scores)
box_mask_list_clean = np_box_mask_list_ops.multi_class_non_max_suppression(
box_mask_list, score_thresh=0.25, iou_thresh=0.1, max_output_size=3)
scores_clean = box_mask_list_clean.get_field('scores')
classes_clean = box_mask_list_clean.get_field('classes')
boxes = box_mask_list_clean.get()
masks = box_mask_list_clean.get_masks()
expected_scores = np.array([0.7, 0.6, 0.34, 0.31])
expected_classes = np.array([0, 2, 1, 4])
expected_boxes = np.array([[0.4, 0.2, 0.8, 0.8],
[0.4, 0.2, 0.8, 0.8],
[0.6, 0.0, 1.0, 1.0],
[0.6, 0.0, 1.0, 1.0]],
dtype=np.float32)
self.assertAllClose(scores_clean, expected_scores)
self.assertAllClose(classes_clean, expected_classes)
self.assertAllClose(boxes, expected_boxes)
if __name__ == '__main__':
tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment