"vscode:/vscode.git/clone" did not exist on "c032280b17840c1c03994fa5ac498029ecb4c3f6"
Commit dc78c085 authored by Vivek Rathod's avatar Vivek Rathod
Browse files

update changes in the utils directory.

parent edcd29f2
...@@ -14,6 +14,20 @@ py_library( ...@@ -14,6 +14,20 @@ py_library(
deps = ["//tensorflow"], deps = ["//tensorflow"],
) )
py_library(
name = "config_util",
srcs = ["config_util.py"],
deps = [
"//pyglib/logging",
"//tensorflow",
"//tensorflow_models/object_detection/protos:eval_py_pb2",
"//tensorflow_models/object_detection/protos:input_reader_py_pb2",
"//tensorflow_models/object_detection/protos:model_py_pb2",
"//tensorflow_models/object_detection/protos:pipeline_py_pb2",
"//tensorflow_models/object_detection/protos:train_py_pb2",
],
)
py_library( py_library(
name = "dataset_util", name = "dataset_util",
srcs = ["dataset_util.py"], srcs = ["dataset_util.py"],
...@@ -35,7 +49,9 @@ py_library( ...@@ -35,7 +49,9 @@ py_library(
py_library( py_library(
name = "learning_schedules", name = "learning_schedules",
srcs = ["learning_schedules.py"], srcs = ["learning_schedules.py"],
deps = ["//tensorflow"], deps = [
"//tensorflow",
],
) )
py_library( py_library(
...@@ -70,9 +86,11 @@ py_library( ...@@ -70,9 +86,11 @@ py_library(
name = "object_detection_evaluation", name = "object_detection_evaluation",
srcs = ["object_detection_evaluation.py"], srcs = ["object_detection_evaluation.py"],
deps = [ deps = [
":label_map_util",
":metrics", ":metrics",
":per_image_evaluation", ":per_image_evaluation",
"//tensorflow", "//tensorflow",
"//tensorflow_models/object_detection/core:standard_fields",
], ],
) )
...@@ -120,7 +138,7 @@ py_library( ...@@ -120,7 +138,7 @@ py_library(
"//tensorflow_models/object_detection/core:box_list", "//tensorflow_models/object_detection/core:box_list",
"//tensorflow_models/object_detection/core:box_predictor", "//tensorflow_models/object_detection/core:box_predictor",
"//tensorflow_models/object_detection/core:matcher", "//tensorflow_models/object_detection/core:matcher",
"//tensorflow_models/object_detection/utils:shape_utils" "//tensorflow_models/object_detection/utils:shape_utils",
], ],
) )
...@@ -137,6 +155,8 @@ py_library( ...@@ -137,6 +155,8 @@ py_library(
srcs = ["visualization_utils.py"], srcs = ["visualization_utils.py"],
deps = [ deps = [
"//third_party/py/PIL:pil", "//third_party/py/PIL:pil",
"//third_party/py/matplotlib",
"//third_party/py/six",
"//tensorflow", "//tensorflow",
], ],
) )
...@@ -150,6 +170,19 @@ py_test( ...@@ -150,6 +170,19 @@ py_test(
], ],
) )
py_test(
name = "config_util_test",
srcs = ["config_util_test.py"],
deps = [
":config_util",
"//tensorflow:tensorflow_google",
"//tensorflow_models/object_detection/protos:input_reader_py_pb2",
"//tensorflow_models/object_detection/protos:model_py_pb2",
"//tensorflow_models/object_detection/protos:pipeline_py_pb2",
"//tensorflow_models/object_detection/protos:train_py_pb2",
],
)
py_test( py_test(
name = "dataset_util_test", name = "dataset_util_test",
srcs = ["dataset_util_test.py"], srcs = ["dataset_util_test.py"],
...@@ -220,6 +253,7 @@ py_test( ...@@ -220,6 +253,7 @@ py_test(
deps = [ deps = [
":object_detection_evaluation", ":object_detection_evaluation",
"//tensorflow", "//tensorflow",
"//tensorflow_models/object_detection/core:standard_fields",
], ],
) )
...@@ -281,8 +315,12 @@ py_test( ...@@ -281,8 +315,12 @@ py_test(
py_test( py_test(
name = "visualization_utils_test", name = "visualization_utils_test",
srcs = ["visualization_utils_test.py"], srcs = ["visualization_utils_test.py"],
data = [
"//tensorflow_models/object_detection/test_images:image1.jpg",
],
deps = [ deps = [
":visualization_utils", ":visualization_utils",
"//pyglib/flags",
"//third_party/py/PIL:pil", "//third_party/py/PIL:pil",
], ],
) )
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Functions for reading and updating configuration files."""
import tensorflow as tf
from google.protobuf import text_format
from object_detection.protos import eval_pb2
from object_detection.protos import input_reader_pb2
from object_detection.protos import model_pb2
from object_detection.protos import pipeline_pb2
from object_detection.protos import train_pb2
def get_configs_from_pipeline_file(pipeline_config_path):
"""Reads configuration from a pipeline_pb2.TrainEvalPipelineConfig.
Args:
pipeline_config_path: Path to pipeline_pb2.TrainEvalPipelineConfig text
proto.
Returns:
Dictionary of configuration objects. Keys are `model`, `train_config`,
`train_input_config`, `eval_config`, `eval_input_config`. Value are the
corresponding config objects.
"""
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
with tf.gfile.GFile(pipeline_config_path, "r") as f:
proto_str = f.read()
text_format.Merge(proto_str, pipeline_config)
configs = {}
configs["model"] = pipeline_config.model
configs["train_config"] = pipeline_config.train_config
configs["train_input_config"] = pipeline_config.train_input_reader
configs["eval_config"] = pipeline_config.eval_config
configs["eval_input_config"] = pipeline_config.eval_input_reader
return configs
def create_pipeline_proto_from_configs(configs):
"""Creates a pipeline_pb2.TrainEvalPipelineConfig from configs dictionary.
This function nearly performs the inverse operation of
get_configs_from_pipeline_file(). Instead of returning a file path, it returns
a `TrainEvalPipelineConfig` object.
Args:
configs: Dictionary of configs. See get_configs_from_pipeline_file().
Returns:
A fully populated pipeline_pb2.TrainEvalPipelineConfig.
"""
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.model.CopyFrom(configs["model"])
pipeline_config.train_config.CopyFrom(configs["train_config"])
pipeline_config.train_input_reader.CopyFrom(configs["train_input_config"])
pipeline_config.eval_config.CopyFrom(configs["eval_config"])
pipeline_config.eval_input_reader.CopyFrom(configs["eval_input_config"])
return pipeline_config
def get_configs_from_multiple_files(model_config_path="",
train_config_path="",
train_input_config_path="",
eval_config_path="",
eval_input_config_path=""):
"""Reads training configuration from multiple config files.
Args:
model_config_path: Path to model_pb2.DetectionModel.
train_config_path: Path to train_pb2.TrainConfig.
train_input_config_path: Path to input_reader_pb2.InputReader.
eval_config_path: Path to eval_pb2.EvalConfig.
eval_input_config_path: Path to input_reader_pb2.InputReader.
Returns:
Dictionary of configuration objects. Keys are `model`, `train_config`,
`train_input_config`, `eval_config`, `eval_input_config`. Key/Values are
returned only for valid (non-empty) strings.
"""
configs = {}
if model_config_path:
model_config = model_pb2.DetectionModel()
with tf.gfile.GFile(model_config_path, "r") as f:
text_format.Merge(f.read(), model_config)
configs["model"] = model_config
if train_config_path:
train_config = train_pb2.TrainConfig()
with tf.gfile.GFile(train_config_path, "r") as f:
text_format.Merge(f.read(), train_config)
configs["train_config"] = train_config
if train_input_config_path:
train_input_config = input_reader_pb2.InputReader()
with tf.gfile.GFile(train_input_config_path, "r") as f:
text_format.Merge(f.read(), train_input_config)
configs["train_input_config"] = train_input_config
if eval_config_path:
eval_config = eval_pb2.EvalConfig()
with tf.gfile.GFile(eval_config_path, "r") as f:
text_format.Merge(f.read(), eval_config)
configs["eval_config"] = eval_config
if eval_input_config_path:
eval_input_config = input_reader_pb2.InputReader()
with tf.gfile.GFile(eval_input_config_path, "r") as f:
text_format.Merge(f.read(), eval_input_config)
configs["eval_input_config"] = eval_input_config
return configs
def get_number_of_classes(model_config):
"""Returns the number of classes for a detection model.
Args:
model_config: A model_pb2.DetectionModel.
Returns:
Number of classes.
Raises:
ValueError: If the model type is not recognized.
"""
meta_architecture = model_config.WhichOneof("model")
if meta_architecture == "faster_rcnn":
return model_config.faster_rcnn.num_classes
if meta_architecture == "ssd":
return model_config.ssd.num_classes
raise ValueError("Expected the model to be one of 'faster_rcnn' or 'ssd'.")
def get_optimizer_type(train_config):
"""Returns the optimizer type for training.
Args:
train_config: A train_pb2.TrainConfig.
Returns:
The type of the optimizer
"""
return train_config.optimizer.WhichOneof("optimizer")
def get_learning_rate_type(optimizer_config):
"""Returns the learning rate type for training.
Args:
optimizer_config: An optimizer_pb2.Optimizer.
Returns:
The type of the learning rate.
"""
return optimizer_config.learning_rate.WhichOneof("learning_rate")
def merge_external_params_with_configs(configs, hparams=None, **kwargs):
"""Updates `configs` dictionary based on supplied parameters.
This utility is for modifying specific fields in the object detection configs.
Say that one would like to experiment with different learning rates, momentum
values, or batch sizes. Rather than creating a new config text file for each
experiment, one can use a single base config file, and update particular
values.
Args:
configs: Dictionary of configuration objects. See outputs from
get_configs_from_pipeline_file() or get_configs_from_multiple_files().
hparams: A `HParams`.
**kwargs: Extra keyword arguments that are treated the same way as
attribute/value pairs in `hparams`. Note that hyperparameters with the
same names will override keyword arguments.
Returns:
`configs` dictionary.
"""
if hparams:
kwargs.update(hparams.values())
for key, value in kwargs.iteritems():
if key == "learning_rate":
_update_initial_learning_rate(configs, value)
tf.logging.info("Overwriting learning rate: %f", value)
if key == "batch_size":
_update_batch_size(configs, value)
tf.logging.info("Overwriting batch size: %d", value)
if key == "momentum_optimizer_value":
_update_momentum_optimizer_value(configs, value)
tf.logging.info("Overwriting momentum optimizer value: %f", value)
if key == "classification_localization_weight_ratio":
# Localization weight is fixed to 1.0.
_update_classification_localization_weight_ratio(configs, value)
if key == "focal_loss_gamma":
_update_focal_loss_gamma(configs, value)
if key == "focal_loss_alpha":
_update_focal_loss_alpha(configs, value)
if key == "train_steps":
_update_train_steps(configs, value)
tf.logging.info("Overwriting train steps: %d", value)
if key == "eval_steps":
_update_eval_steps(configs, value)
tf.logging.info("Overwriting eval steps: %d", value)
if key == "train_input_path":
_update_input_path(configs["train_input_config"], value)
tf.logging.info("Overwriting train input path: %s", value)
if key == "eval_input_path":
_update_input_path(configs["eval_input_config"], value)
tf.logging.info("Overwriting eval input path: %s", value)
if key == "label_map_path":
if value:
_update_label_map_path(configs, value)
tf.logging.info("Overwriting label map path: %s", value)
return configs
def _update_initial_learning_rate(configs, learning_rate):
"""Updates `configs` to reflect the new initial learning rate.
The configs dictionary is updated in place, and hence not returned.
Args:
configs: Dictionary of configuration objects. See outputs from
get_configs_from_pipeline_file() or get_configs_from_multiple_files().
learning_rate: Initial learning rate for optimizer.
Raises:
TypeError: if optimizer type is not supported, or if learning rate type is
not supported.
"""
optimizer_type = get_optimizer_type(configs["train_config"])
if optimizer_type == "rms_prop_optimizer":
optimizer_config = configs["train_config"].optimizer.rms_prop_optimizer
elif optimizer_type == "momentum_optimizer":
optimizer_config = configs["train_config"].optimizer.momentum_optimizer
elif optimizer_type == "adam_optimizer":
optimizer_config = configs["train_config"].optimizer.adam_optimizer
else:
raise TypeError("Optimizer %s is not supported." % optimizer_type)
learning_rate_type = get_learning_rate_type(optimizer_config)
if learning_rate_type == "constant_learning_rate":
constant_lr = optimizer_config.learning_rate.constant_learning_rate
constant_lr.learning_rate = learning_rate
elif learning_rate_type == "exponential_decay_learning_rate":
exponential_lr = (
optimizer_config.learning_rate.exponential_decay_learning_rate)
exponential_lr.initial_learning_rate = learning_rate
elif learning_rate_type == "manual_step_learning_rate":
manual_lr = optimizer_config.learning_rate.manual_step_learning_rate
original_learning_rate = manual_lr.initial_learning_rate
learning_rate_scaling = float(learning_rate) / original_learning_rate
manual_lr.initial_learning_rate = learning_rate
for schedule in manual_lr.schedule:
schedule.learning_rate *= learning_rate_scaling
else:
raise TypeError("Learning rate %s is not supported." % learning_rate_type)
def _update_batch_size(configs, batch_size):
"""Updates `configs` to reflect the new training batch size.
The configs dictionary is updated in place, and hence not returned.
Args:
configs: Dictionary of configuration objects. See outputs from
get_configs_from_pipeline_file() or get_configs_from_multiple_files().
batch_size: Batch size to use for training (Ideally a power of 2). Inputs
are rounded, and capped to be 1 or greater.
"""
configs["train_config"].batch_size = max(1, int(round(batch_size)))
def _update_momentum_optimizer_value(configs, momentum):
"""Updates `configs` to reflect the new momentum value.
Momentum is only supported for RMSPropOptimizer and MomentumOptimizer. For any
other optimizer, no changes take place. The configs dictionary is updated in
place, and hence not returned.
Args:
configs: Dictionary of configuration objects. See outputs from
get_configs_from_pipeline_file() or get_configs_from_multiple_files().
momentum: New momentum value. Values are clipped at 0.0 and 1.0.
Raises:
TypeError: If the optimizer type is not `rms_prop_optimizer` or
`momentum_optimizer`.
"""
optimizer_type = get_optimizer_type(configs["train_config"])
if optimizer_type == "rms_prop_optimizer":
optimizer_config = configs["train_config"].optimizer.rms_prop_optimizer
elif optimizer_type == "momentum_optimizer":
optimizer_config = configs["train_config"].optimizer.momentum_optimizer
else:
raise TypeError("Optimizer type must be one of `rms_prop_optimizer` or "
"`momentum_optimizer`.")
optimizer_config.momentum_optimizer_value = min(max(0.0, momentum), 1.0)
def _update_classification_localization_weight_ratio(configs, ratio):
"""Updates the classification/localization weight loss ratio.
Detection models usually define a loss weight for both classification and
objectness. This function updates the weights such that the ratio between
classification weight to localization weight is the ratio provided.
Arbitrarily, localization weight is set to 1.0.
Note that in the case of Faster R-CNN, this same ratio is applied to the first
stage objectness loss weight relative to localization loss weight.
The configs dictionary is updated in place, and hence not returned.
Args:
configs: Dictionary of configuration objects. See outputs from
get_configs_from_pipeline_file() or get_configs_from_multiple_files().
ratio: Desired ratio of classification (and/or objectness) loss weight to
localization loss weight.
"""
meta_architecture = configs["model"].WhichOneof("model")
if meta_architecture == "faster_rcnn":
model = configs["model"].faster_rcnn
model.first_stage_localization_loss_weight = 1.0
model.first_stage_objectness_loss_weight = ratio
model.second_stage_localization_loss_weight = 1.0
model.second_stage_classification_loss_weight = ratio
if meta_architecture == "ssd":
model = configs["model"].ssd
model.loss.localization_weight = 1.0
model.loss.classification_weight = ratio
def _get_classification_loss(model_config):
"""Returns the classification loss for a model."""
meta_architecture = model_config.WhichOneof("model")
if meta_architecture == "faster_rcnn":
model = model_config.faster_rcnn
classification_loss = model.second_stage_classification_loss
if meta_architecture == "ssd":
model = model_config.ssd
classification_loss = model.loss.classification_loss
else:
raise TypeError("Did not recognize the model architecture.")
return classification_loss
def _update_focal_loss_gamma(configs, gamma):
"""Updates the gamma value for a sigmoid focal loss.
The configs dictionary is updated in place, and hence not returned.
Args:
configs: Dictionary of configuration objects. See outputs from
get_configs_from_pipeline_file() or get_configs_from_multiple_files().
gamma: Exponent term in focal loss.
Raises:
TypeError: If the classification loss is not `weighted_sigmoid_focal`.
"""
classification_loss = _get_classification_loss(configs["model"])
classification_loss_type = classification_loss.WhichOneof(
"classification_loss")
if classification_loss_type != "weighted_sigmoid_focal":
raise TypeError("Classification loss must be `weighted_sigmoid_focal`.")
classification_loss.weighted_sigmoid_focal.gamma = gamma
def _update_focal_loss_alpha(configs, alpha):
"""Updates the alpha value for a sigmoid focal loss.
The configs dictionary is updated in place, and hence not returned.
Args:
configs: Dictionary of configuration objects. See outputs from
get_configs_from_pipeline_file() or get_configs_from_multiple_files().
alpha: Class weight multiplier for sigmoid loss.
Raises:
TypeError: If the classification loss is not `weighted_sigmoid_focal`.
"""
classification_loss = _get_classification_loss(configs["model"])
classification_loss_type = classification_loss.WhichOneof(
"classification_loss")
if classification_loss_type != "weighted_sigmoid_focal":
raise TypeError("Classification loss must be `weighted_sigmoid_focal`.")
classification_loss.weighted_sigmoid_focal.alpha = alpha
def _update_train_steps(configs, train_steps):
"""Updates `configs` to reflect new number of training steps."""
configs["train_config"].num_steps = int(train_steps)
def _update_eval_steps(configs, eval_steps):
"""Updates `configs` to reflect new number of eval steps per evaluation."""
configs["eval_config"].num_examples = int(eval_steps)
def _update_input_path(input_config, input_path):
"""Updates input configuration to reflect a new input path.
The input_config object is updated in place, and hence not returned.
Args:
input_config: A input_reader_pb2.InputReader.
input_path: A path to data or list of paths.
Raises:
TypeError: if input reader type is not `tf_record_input_reader`.
"""
input_reader_type = input_config.WhichOneof("input_reader")
if input_reader_type == "tf_record_input_reader":
input_config.tf_record_input_reader.ClearField("input_path")
if isinstance(input_path, list):
input_config.tf_record_input_reader.input_path.extend(input_path)
else:
input_config.tf_record_input_reader.input_path.append(input_path)
else:
raise TypeError("Input reader type must be `tf_record_input_reader`.")
def _update_label_map_path(configs, label_map_path):
"""Updates the label map path for both train and eval input readers.
The configs dictionary is updated in place, and hence not returned.
Args:
configs: Dictionary of configuration objects. See outputs from
get_configs_from_pipeline_file() or get_configs_from_multiple_files().
label_map_path: New path to `StringIntLabelMap` pbtxt file.
"""
configs["train_input_config"].label_map_path = label_map_path
configs["eval_input_config"].label_map_path = label_map_path
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.utils.config_util."""
import os
import google3
import tensorflow.google as tf
from google.protobuf import text_format
from object_detection.protos import eval_pb2
from object_detection.protos import input_reader_pb2
from object_detection.protos import model_pb2
from object_detection.protos import pipeline_pb2
from object_detection.protos import train_pb2
from object_detection.utils import config_util
def _write_config(config, config_path):
"""Writes a config object to disk."""
config_text = text_format.MessageToString(config)
with tf.gfile.Open(config_path, "wb") as f:
f.write(config_text)
def _update_optimizer_with_constant_learning_rate(optimizer, learning_rate):
"""Adds a new constant learning rate."""
constant_lr = optimizer.learning_rate.constant_learning_rate
constant_lr.learning_rate = learning_rate
def _update_optimizer_with_exponential_decay_learning_rate(
optimizer, learning_rate):
"""Adds a new exponential decay learning rate."""
exponential_lr = optimizer.learning_rate.exponential_decay_learning_rate
exponential_lr.initial_learning_rate = learning_rate
def _update_optimizer_with_manual_step_learning_rate(
optimizer, initial_learning_rate, learning_rate_scaling):
"""Adds a learning rate schedule."""
manual_lr = optimizer.learning_rate.manual_step_learning_rate
manual_lr.initial_learning_rate = initial_learning_rate
for i in range(3):
schedule = manual_lr.schedule.add()
schedule.learning_rate = initial_learning_rate * learning_rate_scaling**i
class ConfigUtilTest(tf.test.TestCase):
def test_get_configs_from_pipeline_file(self):
"""Test that proto configs can be read from pipeline config file."""
pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config")
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.model.faster_rcnn.num_classes = 10
pipeline_config.train_config.batch_size = 32
pipeline_config.train_input_reader.label_map_path = "path/to/label_map"
pipeline_config.eval_config.num_examples = 20
pipeline_config.eval_input_reader.queue_capacity = 100
_write_config(pipeline_config, pipeline_config_path)
configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
self.assertProtoEquals(pipeline_config.model, configs["model"])
self.assertProtoEquals(pipeline_config.train_config,
configs["train_config"])
self.assertProtoEquals(pipeline_config.train_input_reader,
configs["train_input_config"])
self.assertProtoEquals(pipeline_config.eval_config,
configs["eval_config"])
self.assertProtoEquals(pipeline_config.eval_input_reader,
configs["eval_input_config"])
def test_create_pipeline_proto_from_configs(self):
"""Tests that proto can be reconstructed from configs dictionary."""
pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config")
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.model.faster_rcnn.num_classes = 10
pipeline_config.train_config.batch_size = 32
pipeline_config.train_input_reader.label_map_path = "path/to/label_map"
pipeline_config.eval_config.num_examples = 20
pipeline_config.eval_input_reader.queue_capacity = 100
_write_config(pipeline_config, pipeline_config_path)
configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
pipeline_config_reconstructed = (
config_util.create_pipeline_proto_from_configs(configs))
self.assertEqual(pipeline_config, pipeline_config_reconstructed)
def test_get_configs_from_multiple_files(self):
"""Tests that proto configs can be read from multiple files."""
temp_dir = self.get_temp_dir()
# Write model config file.
model_config_path = os.path.join(temp_dir, "model.config")
model = model_pb2.DetectionModel()
model.faster_rcnn.num_classes = 10
_write_config(model, model_config_path)
# Write train config file.
train_config_path = os.path.join(temp_dir, "train.config")
train_config = train_config = train_pb2.TrainConfig()
train_config.batch_size = 32
_write_config(train_config, train_config_path)
# Write train input config file.
train_input_config_path = os.path.join(temp_dir, "train_input.config")
train_input_config = input_reader_pb2.InputReader()
train_input_config.label_map_path = "path/to/label_map"
_write_config(train_input_config, train_input_config_path)
# Write eval config file.
eval_config_path = os.path.join(temp_dir, "eval.config")
eval_config = eval_pb2.EvalConfig()
eval_config.num_examples = 20
_write_config(eval_config, eval_config_path)
# Write eval input config file.
eval_input_config_path = os.path.join(temp_dir, "eval_input.config")
eval_input_config = input_reader_pb2.InputReader()
eval_input_config.label_map_path = "path/to/another/label_map"
_write_config(eval_input_config, eval_input_config_path)
configs = config_util.get_configs_from_multiple_files(
model_config_path=model_config_path,
train_config_path=train_config_path,
train_input_config_path=train_input_config_path,
eval_config_path=eval_config_path,
eval_input_config_path=eval_input_config_path)
self.assertProtoEquals(model, configs["model"])
self.assertProtoEquals(train_config, configs["train_config"])
self.assertProtoEquals(train_input_config,
configs["train_input_config"])
self.assertProtoEquals(eval_config, configs["eval_config"])
self.assertProtoEquals(eval_input_config,
configs["eval_input_config"])
def _assertOptimizerWithNewLearningRate(self, optimizer_name):
"""Asserts successful updating of all learning rate schemes."""
original_learning_rate = 0.7
learning_rate_scaling = 0.1
hparams = tf.HParams(learning_rate=0.15)
pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config")
# Constant learning rate.
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
optimizer = getattr(pipeline_config.train_config.optimizer, optimizer_name)
_update_optimizer_with_constant_learning_rate(optimizer,
original_learning_rate)
_write_config(pipeline_config, pipeline_config_path)
configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
configs = config_util.merge_external_params_with_configs(configs, hparams)
optimizer = getattr(configs["train_config"].optimizer, optimizer_name)
constant_lr = optimizer.learning_rate.constant_learning_rate
self.assertAlmostEqual(hparams.learning_rate, constant_lr.learning_rate)
# Exponential decay learning rate.
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
optimizer = getattr(pipeline_config.train_config.optimizer, optimizer_name)
_update_optimizer_with_exponential_decay_learning_rate(
optimizer, original_learning_rate)
_write_config(pipeline_config, pipeline_config_path)
configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
configs = config_util.merge_external_params_with_configs(configs, hparams)
optimizer = getattr(configs["train_config"].optimizer, optimizer_name)
exponential_lr = optimizer.learning_rate.exponential_decay_learning_rate
self.assertAlmostEqual(hparams.learning_rate,
exponential_lr.initial_learning_rate)
# Manual step learning rate.
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
optimizer = getattr(pipeline_config.train_config.optimizer, optimizer_name)
_update_optimizer_with_manual_step_learning_rate(
optimizer, original_learning_rate, learning_rate_scaling)
_write_config(pipeline_config, pipeline_config_path)
configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
configs = config_util.merge_external_params_with_configs(configs, hparams)
optimizer = getattr(configs["train_config"].optimizer, optimizer_name)
manual_lr = optimizer.learning_rate.manual_step_learning_rate
self.assertAlmostEqual(hparams.learning_rate,
manual_lr.initial_learning_rate)
for i, schedule in enumerate(manual_lr.schedule):
self.assertAlmostEqual(hparams.learning_rate * learning_rate_scaling**i,
schedule.learning_rate)
def testRMSPropWithNewLearingRate(self):
"""Tests new learning rates for RMSProp Optimizer."""
self._assertOptimizerWithNewLearningRate("rms_prop_optimizer")
def testMomentumOptimizerWithNewLearningRate(self):
"""Tests new learning rates for Momentum Optimizer."""
self._assertOptimizerWithNewLearningRate("momentum_optimizer")
def testAdamOptimizerWithNewLearningRate(self):
"""Tests new learning rates for Adam Optimizer."""
self._assertOptimizerWithNewLearningRate("adam_optimizer")
def testNewBatchSize(self):
"""Tests that batch size is updated appropriately."""
original_batch_size = 2
hparams = tf.HParams(batch_size=16)
pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config")
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.train_config.batch_size = original_batch_size
_write_config(pipeline_config, pipeline_config_path)
configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
configs = config_util.merge_external_params_with_configs(configs, hparams)
new_batch_size = configs["train_config"].batch_size
self.assertEqual(16, new_batch_size)
def testNewBatchSizeWithClipping(self):
"""Tests that batch size is clipped to 1 from below."""
original_batch_size = 2
hparams = tf.HParams(batch_size=0.5)
pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config")
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.train_config.batch_size = original_batch_size
_write_config(pipeline_config, pipeline_config_path)
configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
configs = config_util.merge_external_params_with_configs(configs, hparams)
new_batch_size = configs["train_config"].batch_size
self.assertEqual(1, new_batch_size) # Clipped to 1.0.
def testNewMomentumOptimizerValue(self):
"""Tests that new momentum value is updated appropriately."""
original_momentum_value = 0.4
hparams = tf.HParams(momentum_optimizer_value=1.1)
pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config")
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
optimizer_config = pipeline_config.train_config.optimizer.rms_prop_optimizer
optimizer_config.momentum_optimizer_value = original_momentum_value
_write_config(pipeline_config, pipeline_config_path)
configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
configs = config_util.merge_external_params_with_configs(configs, hparams)
optimizer_config = configs["train_config"].optimizer.rms_prop_optimizer
new_momentum_value = optimizer_config.momentum_optimizer_value
self.assertAlmostEqual(1.0, new_momentum_value) # Clipped to 1.0.
def testNewClassificationLocalizationWeightRatio(self):
"""Tests that the loss weight ratio is updated appropriately."""
original_localization_weight = 0.1
original_classification_weight = 0.2
new_weight_ratio = 5.0
hparams = tf.HParams(
classification_localization_weight_ratio=new_weight_ratio)
pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config")
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.model.ssd.loss.localization_weight = (
original_localization_weight)
pipeline_config.model.ssd.loss.classification_weight = (
original_classification_weight)
_write_config(pipeline_config, pipeline_config_path)
configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
configs = config_util.merge_external_params_with_configs(configs, hparams)
loss = configs["model"].ssd.loss
self.assertAlmostEqual(1.0, loss.localization_weight)
self.assertAlmostEqual(new_weight_ratio, loss.classification_weight)
def testNewFocalLossParameters(self):
"""Tests that the loss weight ratio is updated appropriately."""
original_alpha = 1.0
original_gamma = 1.0
new_alpha = 0.3
new_gamma = 2.0
hparams = tf.HParams(focal_loss_alpha=new_alpha, focal_loss_gamma=new_gamma)
pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config")
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
classification_loss = pipeline_config.model.ssd.loss.classification_loss
classification_loss.weighted_sigmoid_focal.alpha = original_alpha
classification_loss.weighted_sigmoid_focal.gamma = original_gamma
_write_config(pipeline_config, pipeline_config_path)
configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
configs = config_util.merge_external_params_with_configs(configs, hparams)
classification_loss = configs["model"].ssd.loss.classification_loss
self.assertAlmostEqual(new_alpha,
classification_loss.weighted_sigmoid_focal.alpha)
self.assertAlmostEqual(new_gamma,
classification_loss.weighted_sigmoid_focal.gamma)
def testMergingKeywordArguments(self):
"""Tests that keyword arguments get merged as do hyperparameters."""
original_num_train_steps = 100
original_num_eval_steps = 5
desired_num_train_steps = 10
desired_num_eval_steps = 1
pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config")
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.train_config.num_steps = original_num_train_steps
pipeline_config.eval_config.num_examples = original_num_eval_steps
_write_config(pipeline_config, pipeline_config_path)
configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
configs = config_util.merge_external_params_with_configs(
configs,
train_steps=desired_num_train_steps,
eval_steps=desired_num_eval_steps)
train_steps = configs["train_config"].num_steps
eval_steps = configs["eval_config"].num_examples
self.assertEqual(desired_num_train_steps, train_steps)
self.assertEqual(desired_num_eval_steps, eval_steps)
def testGetNumberOfClasses(self):
"""Tests that number of classes can be retrieved."""
pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config")
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
pipeline_config.model.faster_rcnn.num_classes = 20
_write_config(pipeline_config, pipeline_config_path)
configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
number_of_classes = config_util.get_number_of_classes(configs["model"])
self.assertEqual(20, number_of_classes)
def testNewTrainInputPath(self):
"""Tests that train input path can be overwritten with single file."""
original_train_path = ["path/to/data"]
new_train_path = "another/path/to/data"
pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config")
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
reader_config = pipeline_config.train_input_reader.tf_record_input_reader
reader_config.input_path.extend(original_train_path)
_write_config(pipeline_config, pipeline_config_path)
configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
configs = config_util.merge_external_params_with_configs(
configs, train_input_path=new_train_path)
reader_config = configs["train_input_config"].tf_record_input_reader
final_path = reader_config.input_path
self.assertEqual([new_train_path], final_path)
def testNewTrainInputPathList(self):
"""Tests that train input path can be overwritten with multiple files."""
original_train_path = ["path/to/data"]
new_train_path = ["another/path/to/data", "yet/another/path/to/data"]
pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config")
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
reader_config = pipeline_config.train_input_reader.tf_record_input_reader
reader_config.input_path.extend(original_train_path)
_write_config(pipeline_config, pipeline_config_path)
configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
configs = config_util.merge_external_params_with_configs(
configs, train_input_path=new_train_path)
reader_config = configs["train_input_config"].tf_record_input_reader
final_path = reader_config.input_path
self.assertEqual(new_train_path, final_path)
def testNewLabelMapPath(self):
"""Tests that label map path can be overwritten in input readers."""
original_label_map_path = "path/to/original/label_map"
new_label_map_path = "path//to/new/label_map"
pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config")
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
train_input_reader = pipeline_config.train_input_reader
train_input_reader.label_map_path = original_label_map_path
eval_input_reader = pipeline_config.eval_input_reader
eval_input_reader.label_map_path = original_label_map_path
_write_config(pipeline_config, pipeline_config_path)
configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
configs = config_util.merge_external_params_with_configs(
configs, label_map_path=new_label_map_path)
self.assertEqual(new_label_map_path,
configs["train_input_config"].label_map_path)
self.assertEqual(new_label_map_path,
configs["eval_input_config"].label_map_path)
if __name__ == "__main__":
tf.test.main()
...@@ -124,11 +124,12 @@ def load_labelmap(path): ...@@ -124,11 +124,12 @@ def load_labelmap(path):
return label_map return label_map
def get_label_map_dict(label_map_path): def get_label_map_dict(label_map_path, use_display_name=False):
"""Reads a label map and returns a dictionary of label names to id. """Reads a label map and returns a dictionary of label names to id.
Args: Args:
label_map_path: path to label_map. label_map_path: path to label_map.
use_display_name: whether to use the label map items' display names as keys.
Returns: Returns:
A dictionary mapping label names to id. A dictionary mapping label names to id.
...@@ -136,5 +137,30 @@ def get_label_map_dict(label_map_path): ...@@ -136,5 +137,30 @@ def get_label_map_dict(label_map_path):
label_map = load_labelmap(label_map_path) label_map = load_labelmap(label_map_path)
label_map_dict = {} label_map_dict = {}
for item in label_map.item: for item in label_map.item:
if use_display_name:
label_map_dict[item.display_name] = item.id
else:
label_map_dict[item.name] = item.id label_map_dict[item.name] = item.id
return label_map_dict return label_map_dict
def create_category_index_from_labelmap(label_map_path):
"""Reads a label map and returns a category index.
Args:
label_map_path: Path to `StringIntLabelMap` proto text file.
Returns:
A category index, which is a dictionary that maps integer ids to dicts
containing categories, e.g.
{1: {'id': 1, 'name': 'dog'}, 2: {'id': 2, 'name': 'cat'}, ...}
"""
label_map = load_labelmap(label_map_path)
max_num_classes = max(item.id for item in label_map.item)
categories = convert_label_map_to_categories(label_map, max_num_classes)
return create_category_index(categories)
def create_class_agnostic_category_index():
"""Creates a category index with a single `object` class."""
return {1: {'id': 1, 'name': 'object'}}
...@@ -53,6 +53,26 @@ class LabelMapUtilTest(tf.test.TestCase): ...@@ -53,6 +53,26 @@ class LabelMapUtilTest(tf.test.TestCase):
self.assertEqual(label_map_dict['dog'], 1) self.assertEqual(label_map_dict['dog'], 1)
self.assertEqual(label_map_dict['cat'], 2) self.assertEqual(label_map_dict['cat'], 2)
def test_get_label_map_dict_display(self):
label_map_string = """
item {
id:2
display_name:'cat'
}
item {
id:1
display_name:'dog'
}
"""
label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
with tf.gfile.Open(label_map_path, 'wb') as f:
f.write(label_map_string)
label_map_dict = label_map_util.get_label_map_dict(
label_map_path, use_display_name=True)
self.assertEqual(label_map_dict['dog'], 1)
self.assertEqual(label_map_dict['cat'], 2)
def test_load_bad_label_map(self): def test_load_bad_label_map(self):
label_map_string = """ label_map_string = """
item { item {
...@@ -164,6 +184,34 @@ class LabelMapUtilTest(tf.test.TestCase): ...@@ -164,6 +184,34 @@ class LabelMapUtilTest(tf.test.TestCase):
} }
}, category_index) }, category_index)
def test_create_category_index_from_labelmap(self):
label_map_string = """
item {
id:2
name:'cat'
}
item {
id:1
name:'dog'
}
"""
label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
with tf.gfile.Open(label_map_path, 'wb') as f:
f.write(label_map_string)
category_index = label_map_util.create_category_index_from_labelmap(
label_map_path)
self.assertDictEqual({
1: {
'name': u'dog',
'id': 1
},
2: {
'name': u'cat',
'id': 2
}
}, category_index)
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -12,9 +12,9 @@ ...@@ -12,9 +12,9 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""Library of common learning rate schedules.""" """Library of common learning rate schedules."""
import numpy as np
import tensorflow as tf import tensorflow as tf
...@@ -59,6 +59,54 @@ def exponential_decay_with_burnin(global_step, ...@@ -59,6 +59,54 @@ def exponential_decay_with_burnin(global_step,
lambda: post_burnin_learning_rate) lambda: post_burnin_learning_rate)
def cosine_decay_with_warmup(global_step,
learning_rate_base,
total_steps,
warmup_learning_rate=0.0,
warmup_steps=0):
"""Cosine decay schedule with warm up period.
Cosine annealing learning rate as described in:
Loshchilov and Hutter, SGDR: Stochastic Gradient Descent with Warm Restarts.
ICLR 2017. https://arxiv.org/abs/1608.03983
In this schedule, the learning rate grows linearly from warmup_learning_rate
to learning_rate_base for warmup_steps, then transitions to a cosine decay
schedule.
Args:
global_step: int64 (scalar) tensor representing global step.
learning_rate_base: base learning rate.
total_steps: total number of training steps.
warmup_learning_rate: initial learning rate for warm up.
warmup_steps: number of warmup steps.
Returns:
a (scalar) float tensor representing learning rate.
Raises:
ValueError: if warmup_learning_rate is larger than learning_rate_base,
or if warmup_steps is larger than total_steps.
"""
if learning_rate_base < warmup_learning_rate:
raise ValueError('learning_rate_base must be larger '
'or equal to warmup_learning_rate.')
if total_steps < warmup_steps:
raise ValueError('total_steps must be larger or equal to '
'warmup_steps.')
learning_rate = 0.5 * learning_rate_base * (
1 + tf.cos(np.pi * tf.cast(
global_step - warmup_steps, tf.float32
) / float(total_steps - warmup_steps)))
if warmup_steps > 0:
slope = (learning_rate_base - warmup_learning_rate) / warmup_steps
pre_cosine_learning_rate = slope * tf.cast(
global_step, tf.float32) + warmup_learning_rate
learning_rate = tf.cond(
tf.less(global_step, warmup_steps), lambda: pre_cosine_learning_rate,
lambda: learning_rate)
return learning_rate
def manual_stepping(global_step, boundaries, rates): def manual_stepping(global_step, boundaries, rates):
"""Manually stepped learning rate schedule. """Manually stepped learning rate schedule.
...@@ -96,8 +144,8 @@ def manual_stepping(global_step, boundaries, rates): ...@@ -96,8 +144,8 @@ def manual_stepping(global_step, boundaries, rates):
'number of boundary points by exactly 1.') 'number of boundary points by exactly 1.')
step_boundaries = tf.constant(boundaries, tf.int64) step_boundaries = tf.constant(boundaries, tf.int64)
learning_rates = tf.constant(rates, tf.float32) learning_rates = tf.constant(rates, tf.float32)
unreached_boundaries = tf.reshape(tf.where( unreached_boundaries = tf.reshape(
tf.greater(step_boundaries, global_step)), [-1]) tf.where(tf.greater(step_boundaries, global_step)), [-1])
unreached_boundaries = tf.concat([unreached_boundaries, [len(boundaries)]], 0) unreached_boundaries = tf.concat([unreached_boundaries, [len(boundaries)]], 0)
index = tf.reshape(tf.reduce_min(unreached_boundaries), [1]) index = tf.reshape(tf.reduce_min(unreached_boundaries), [1])
return tf.reshape(tf.slice(learning_rates, index, [1]), []) return tf.reshape(tf.slice(learning_rates, index, [1]), [])
...@@ -40,6 +40,25 @@ class LearningSchedulesTest(tf.test.TestCase): ...@@ -40,6 +40,25 @@ class LearningSchedulesTest(tf.test.TestCase):
output_rates.append(output_rate) output_rates.append(output_rate)
self.assertAllClose(output_rates, exp_rates) self.assertAllClose(output_rates, exp_rates)
def testCosineDecayWithWarmup(self):
global_step = tf.placeholder(tf.int32, [])
learning_rate_base = 1.0
total_steps = 100
warmup_learning_rate = 0.1
warmup_steps = 9
input_global_steps = [0, 4, 8, 9, 100]
exp_rates = [0.1, 0.5, 0.9, 1.0, 0]
learning_rate = learning_schedules.cosine_decay_with_warmup(
global_step, learning_rate_base, total_steps,
warmup_learning_rate, warmup_steps)
with self.test_session() as sess:
output_rates = []
for input_global_step in input_global_steps:
output_rate = sess.run(learning_rate,
feed_dict={global_step: input_global_step})
output_rates.append(output_rate)
self.assertAllClose(output_rates, exp_rates)
def testManualStepping(self): def testManualStepping(self):
global_step = tf.placeholder(tf.int64, []) global_step = tf.placeholder(tf.int64, [])
boundaries = [2, 3, 7] boundaries = [2, 3, 7]
......
...@@ -17,7 +17,6 @@ ...@@ -17,7 +17,6 @@
from __future__ import division from __future__ import division
import numpy as np import numpy as np
from six import moves
def compute_precision_recall(scores, labels, num_gt): def compute_precision_recall(scores, labels, num_gt):
...@@ -104,7 +103,7 @@ def compute_average_precision(precision, recall): ...@@ -104,7 +103,7 @@ def compute_average_precision(precision, recall):
raise ValueError("Precision must be in the range of [0, 1].") raise ValueError("Precision must be in the range of [0, 1].")
if np.amin(recall) < 0 or np.amax(recall) > 1: if np.amin(recall) < 0 or np.amax(recall) > 1:
raise ValueError("recall must be in the range of [0, 1].") raise ValueError("recall must be in the range of [0, 1].")
if not all(recall[i] <= recall[i + 1] for i in moves.range(len(recall) - 1)): if not all(recall[i] <= recall[i + 1] for i in range(len(recall) - 1)):
raise ValueError("recall must be a non-decreasing array") raise ValueError("recall must be a non-decreasing array")
recall = np.concatenate([[0], recall, [1]]) recall = np.concatenate([[0], recall, [1]])
......
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
"""Numpy BoxList classes and functions.""" """Numpy BoxList classes and functions."""
import numpy as np import numpy as np
from six import moves
class BoxList(object): class BoxList(object):
...@@ -128,7 +127,7 @@ class BoxList(object): ...@@ -128,7 +127,7 @@ class BoxList(object):
ymin, and all xmax of boxes are equal or greater than xmin. ymin, and all xmax of boxes are equal or greater than xmin.
""" """
if data.shape[0] > 0: if data.shape[0] > 0:
for i in moves.range(data.shape[0]): for i in range(data.shape[0]):
if data[i, 0] > data[i, 2] or data[i, 1] > data[i, 3]: if data[i, 0] > data[i, 2] or data[i, 1] > data[i, 3]:
return False return False
return True return True
...@@ -18,9 +18,309 @@ ...@@ -18,9 +18,309 @@
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
from object_detection.core import standard_fields
from object_detection.utils import object_detection_evaluation from object_detection.utils import object_detection_evaluation
class OpenImagesV2EvaluationTest(tf.test.TestCase):
def test_returns_correct_metric_values(self):
categories = [{
'id': 1,
'name': 'cat'
}, {
'id': 2,
'name': 'dog'
}, {
'id': 3,
'name': 'elephant'
}]
oiv2_evaluator = object_detection_evaluation.OpenImagesDetectionEvaluator(
categories)
image_key1 = 'img1'
groundtruth_boxes1 = np.array(
[[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]], dtype=float)
groundtruth_class_labels1 = np.array([1, 3, 1], dtype=int)
oiv2_evaluator.add_single_ground_truth_image_info(image_key1, {
standard_fields.InputDataFields.groundtruth_boxes:
groundtruth_boxes1,
standard_fields.InputDataFields.groundtruth_classes:
groundtruth_class_labels1
})
image_key2 = 'img2'
groundtruth_boxes2 = np.array(
[[10, 10, 11, 11], [500, 500, 510, 510], [10, 10, 12, 12]], dtype=float)
groundtruth_class_labels2 = np.array([1, 1, 3], dtype=int)
groundtruth_is_group_of_list2 = np.array([False, True, False], dtype=bool)
oiv2_evaluator.add_single_ground_truth_image_info(image_key2, {
standard_fields.InputDataFields.groundtruth_boxes:
groundtruth_boxes2,
standard_fields.InputDataFields.groundtruth_classes:
groundtruth_class_labels2,
standard_fields.InputDataFields.groundtruth_group_of:
groundtruth_is_group_of_list2
})
image_key3 = 'img3'
groundtruth_boxes3 = np.array([[0, 0, 1, 1]], dtype=float)
groundtruth_class_labels3 = np.array([2], dtype=int)
oiv2_evaluator.add_single_ground_truth_image_info(image_key3, {
standard_fields.InputDataFields.groundtruth_boxes:
groundtruth_boxes3,
standard_fields.InputDataFields.groundtruth_classes:
groundtruth_class_labels3
})
# Add detections
image_key = 'img2'
detected_boxes = np.array(
[[10, 10, 11, 11], [100, 100, 120, 120], [100, 100, 220, 220]],
dtype=float)
detected_class_labels = np.array([1, 1, 3], dtype=int)
detected_scores = np.array([0.7, 0.8, 0.9], dtype=float)
oiv2_evaluator.add_single_detected_image_info(image_key, {
standard_fields.DetectionResultFields.detection_boxes:
detected_boxes,
standard_fields.DetectionResultFields.detection_scores:
detected_scores,
standard_fields.DetectionResultFields.detection_classes:
detected_class_labels
})
metrics = oiv2_evaluator.evaluate()
self.assertAlmostEqual(
metrics['OpenImagesV2/PerformanceByCategory/AP@0.5IOU/dog'], 0.0)
self.assertAlmostEqual(
metrics['OpenImagesV2/PerformanceByCategory/AP@0.5IOU/elephant'], 0.0)
self.assertAlmostEqual(
metrics['OpenImagesV2/PerformanceByCategory/AP@0.5IOU/cat'], 0.16666666)
self.assertAlmostEqual(metrics['OpenImagesV2/Precision/mAP@0.5IOU'],
0.05555555)
oiv2_evaluator.clear()
self.assertFalse(oiv2_evaluator._image_ids)
class PascalEvaluationTest(tf.test.TestCase):
def test_returns_correct_metric_values(self):
categories = [{'id': 1, 'name': 'cat'},
{'id': 2, 'name': 'dog'},
{'id': 3, 'name': 'elephant'}]
# Add groundtruth
pascal_evaluator = object_detection_evaluation.PascalDetectionEvaluator(
categories)
image_key1 = 'img1'
groundtruth_boxes1 = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]],
dtype=float)
groundtruth_class_labels1 = np.array([1, 3, 1], dtype=int)
pascal_evaluator.add_single_ground_truth_image_info(
image_key1,
{standard_fields.InputDataFields.groundtruth_boxes: groundtruth_boxes1,
standard_fields.InputDataFields.groundtruth_classes:
groundtruth_class_labels1})
image_key2 = 'img2'
groundtruth_boxes2 = np.array([[10, 10, 11, 11], [500, 500, 510, 510],
[10, 10, 12, 12]], dtype=float)
groundtruth_class_labels2 = np.array([1, 1, 3], dtype=int)
groundtruth_is_difficult_list2 = np.array([False, True, False], dtype=bool)
pascal_evaluator.add_single_ground_truth_image_info(
image_key2,
{standard_fields.InputDataFields.groundtruth_boxes: groundtruth_boxes2,
standard_fields.InputDataFields.groundtruth_classes:
groundtruth_class_labels2,
standard_fields.InputDataFields.groundtruth_difficult:
groundtruth_is_difficult_list2})
image_key3 = 'img3'
groundtruth_boxes3 = np.array([[0, 0, 1, 1]], dtype=float)
groundtruth_class_labels3 = np.array([2], dtype=int)
pascal_evaluator.add_single_ground_truth_image_info(
image_key3,
{standard_fields.InputDataFields.groundtruth_boxes: groundtruth_boxes3,
standard_fields.InputDataFields.groundtruth_classes:
groundtruth_class_labels3})
# Add detections
image_key = 'img2'
detected_boxes = np.array(
[[10, 10, 11, 11], [100, 100, 120, 120], [100, 100, 220, 220]],
dtype=float)
detected_class_labels = np.array([1, 1, 3], dtype=int)
detected_scores = np.array([0.7, 0.8, 0.9], dtype=float)
pascal_evaluator.add_single_detected_image_info(
image_key,
{standard_fields.DetectionResultFields.detection_boxes: detected_boxes,
standard_fields.DetectionResultFields.detection_scores:
detected_scores,
standard_fields.DetectionResultFields.detection_classes:
detected_class_labels})
metrics = pascal_evaluator.evaluate()
self.assertAlmostEqual(
metrics['PASCAL/PerformanceByCategory/AP@0.5IOU/dog'], 0.0)
self.assertAlmostEqual(
metrics['PASCAL/PerformanceByCategory/AP@0.5IOU/elephant'], 0.0)
self.assertAlmostEqual(
metrics['PASCAL/PerformanceByCategory/AP@0.5IOU/cat'], 0.16666666)
self.assertAlmostEqual(metrics['PASCAL/Precision/mAP@0.5IOU'], 0.05555555)
pascal_evaluator.clear()
self.assertFalse(pascal_evaluator._image_ids)
def test_value_error_on_duplicate_images(self):
categories = [{'id': 1, 'name': 'cat'},
{'id': 2, 'name': 'dog'},
{'id': 3, 'name': 'elephant'}]
# Add groundtruth
pascal_evaluator = object_detection_evaluation.PascalDetectionEvaluator(
categories)
image_key1 = 'img1'
groundtruth_boxes1 = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]],
dtype=float)
groundtruth_class_labels1 = np.array([1, 3, 1], dtype=int)
pascal_evaluator.add_single_ground_truth_image_info(
image_key1,
{standard_fields.InputDataFields.groundtruth_boxes: groundtruth_boxes1,
standard_fields.InputDataFields.groundtruth_classes:
groundtruth_class_labels1})
with self.assertRaises(ValueError):
pascal_evaluator.add_single_ground_truth_image_info(
image_key1,
{standard_fields.InputDataFields.groundtruth_boxes:
groundtruth_boxes1,
standard_fields.InputDataFields.groundtruth_classes:
groundtruth_class_labels1})
class WeightedPascalEvaluationTest(tf.test.TestCase):
def setUp(self):
self.categories = [{'id': 1, 'name': 'cat'},
{'id': 2, 'name': 'dog'},
{'id': 3, 'name': 'elephant'}]
def create_and_add_common_ground_truth(self):
# Add groundtruth
self.wp_eval = (
object_detection_evaluation.WeightedPascalDetectionEvaluator(
self.categories))
image_key1 = 'img1'
groundtruth_boxes1 = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]],
dtype=float)
groundtruth_class_labels1 = np.array([1, 3, 1], dtype=int)
self.wp_eval.add_single_ground_truth_image_info(
image_key1,
{standard_fields.InputDataFields.groundtruth_boxes: groundtruth_boxes1,
standard_fields.InputDataFields.groundtruth_classes:
groundtruth_class_labels1})
# add 'img2' separately
image_key3 = 'img3'
groundtruth_boxes3 = np.array([[0, 0, 1, 1]], dtype=float)
groundtruth_class_labels3 = np.array([2], dtype=int)
self.wp_eval.add_single_ground_truth_image_info(
image_key3,
{standard_fields.InputDataFields.groundtruth_boxes: groundtruth_boxes3,
standard_fields.InputDataFields.groundtruth_classes:
groundtruth_class_labels3})
def add_common_detected(self):
image_key = 'img2'
detected_boxes = np.array(
[[10, 10, 11, 11], [100, 100, 120, 120], [100, 100, 220, 220]],
dtype=float)
detected_class_labels = np.array([1, 1, 3], dtype=int)
detected_scores = np.array([0.7, 0.8, 0.9], dtype=float)
self.wp_eval.add_single_detected_image_info(
image_key,
{standard_fields.DetectionResultFields.detection_boxes: detected_boxes,
standard_fields.DetectionResultFields.detection_scores:
detected_scores,
standard_fields.DetectionResultFields.detection_classes:
detected_class_labels})
def test_returns_correct_metric_values(self):
self.create_and_add_common_ground_truth()
image_key2 = 'img2'
groundtruth_boxes2 = np.array([[10, 10, 11, 11], [500, 500, 510, 510],
[10, 10, 12, 12]], dtype=float)
groundtruth_class_labels2 = np.array([1, 1, 3], dtype=int)
self.wp_eval.add_single_ground_truth_image_info(
image_key2,
{standard_fields.InputDataFields.groundtruth_boxes: groundtruth_boxes2,
standard_fields.InputDataFields.groundtruth_classes:
groundtruth_class_labels2
})
self.add_common_detected()
metrics = self.wp_eval.evaluate()
self.assertAlmostEqual(
metrics[self.wp_eval._metric_prefix +
'PerformanceByCategory/AP@0.5IOU/dog'], 0.0)
self.assertAlmostEqual(
metrics[self.wp_eval._metric_prefix +
'PerformanceByCategory/AP@0.5IOU/elephant'], 0.0)
self.assertAlmostEqual(
metrics[self.wp_eval._metric_prefix +
'PerformanceByCategory/AP@0.5IOU/cat'], 0.5 / 4)
self.assertAlmostEqual(metrics[self.wp_eval._metric_prefix +
'Precision/mAP@0.5IOU'],
1. / (4 + 1 + 2) / 3)
self.wp_eval.clear()
self.assertFalse(self.wp_eval._image_ids)
def test_returns_correct_metric_values_with_difficult_list(self):
self.create_and_add_common_ground_truth()
image_key2 = 'img2'
groundtruth_boxes2 = np.array([[10, 10, 11, 11], [500, 500, 510, 510],
[10, 10, 12, 12]], dtype=float)
groundtruth_class_labels2 = np.array([1, 1, 3], dtype=int)
groundtruth_is_difficult_list2 = np.array([False, True, False], dtype=bool)
self.wp_eval.add_single_ground_truth_image_info(
image_key2,
{standard_fields.InputDataFields.groundtruth_boxes: groundtruth_boxes2,
standard_fields.InputDataFields.groundtruth_classes:
groundtruth_class_labels2,
standard_fields.InputDataFields.groundtruth_difficult:
groundtruth_is_difficult_list2
})
self.add_common_detected()
metrics = self.wp_eval.evaluate()
self.assertAlmostEqual(
metrics[self.wp_eval._metric_prefix +
'PerformanceByCategory/AP@0.5IOU/dog'], 0.0)
self.assertAlmostEqual(
metrics[self.wp_eval._metric_prefix +
'PerformanceByCategory/AP@0.5IOU/elephant'], 0.0)
self.assertAlmostEqual(
metrics[self.wp_eval._metric_prefix +
'PerformanceByCategory/AP@0.5IOU/cat'], 0.5 / 3)
self.assertAlmostEqual(metrics[self.wp_eval._metric_prefix +
'Precision/mAP@0.5IOU'],
1. / (3 + 1 + 2) / 3)
self.wp_eval.clear()
self.assertFalse(self.wp_eval._image_ids)
def test_value_error_on_duplicate_images(self):
# Add groundtruth
self.wp_eval = (
object_detection_evaluation.WeightedPascalDetectionEvaluator(
self.categories))
image_key1 = 'img1'
groundtruth_boxes1 = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]],
dtype=float)
groundtruth_class_labels1 = np.array([1, 3, 1], dtype=int)
self.wp_eval.add_single_ground_truth_image_info(
image_key1,
{standard_fields.InputDataFields.groundtruth_boxes: groundtruth_boxes1,
standard_fields.InputDataFields.groundtruth_classes:
groundtruth_class_labels1})
with self.assertRaises(ValueError):
self.wp_eval.add_single_ground_truth_image_info(
image_key1,
{standard_fields.InputDataFields.groundtruth_boxes:
groundtruth_boxes1,
standard_fields.InputDataFields.groundtruth_classes:
groundtruth_class_labels1})
class ObjectDetectionEvaluationTest(tf.test.TestCase): class ObjectDetectionEvaluationTest(tf.test.TestCase):
def setUp(self): def setUp(self):
...@@ -28,27 +328,29 @@ class ObjectDetectionEvaluationTest(tf.test.TestCase): ...@@ -28,27 +328,29 @@ class ObjectDetectionEvaluationTest(tf.test.TestCase):
self.od_eval = object_detection_evaluation.ObjectDetectionEvaluation( self.od_eval = object_detection_evaluation.ObjectDetectionEvaluation(
num_groundtruth_classes) num_groundtruth_classes)
image_key1 = "img1" image_key1 = 'img1'
groundtruth_boxes1 = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]], groundtruth_boxes1 = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]],
dtype=float) dtype=float)
groundtruth_class_labels1 = np.array([0, 2, 0], dtype=int) groundtruth_class_labels1 = np.array([0, 2, 0], dtype=int)
self.od_eval.add_single_ground_truth_image_info( self.od_eval.add_single_ground_truth_image_info(
image_key1, groundtruth_boxes1, groundtruth_class_labels1) image_key1, groundtruth_boxes1, groundtruth_class_labels1)
image_key2 = "img2" image_key2 = 'img2'
groundtruth_boxes2 = np.array([[10, 10, 11, 11], [500, 500, 510, 510], groundtruth_boxes2 = np.array([[10, 10, 11, 11], [500, 500, 510, 510],
[10, 10, 12, 12]], dtype=float) [10, 10, 12, 12]], dtype=float)
groundtruth_class_labels2 = np.array([0, 0, 2], dtype=int) groundtruth_class_labels2 = np.array([0, 0, 2], dtype=int)
groundtruth_is_difficult_list2 = np.array([False, True, False], dtype=bool) groundtruth_is_difficult_list2 = np.array([False, True, False], dtype=bool)
groundtruth_is_group_of_list2 = np.array([False, False, True], dtype=bool)
self.od_eval.add_single_ground_truth_image_info( self.od_eval.add_single_ground_truth_image_info(
image_key2, groundtruth_boxes2, groundtruth_class_labels2, image_key2, groundtruth_boxes2, groundtruth_class_labels2,
groundtruth_is_difficult_list2) groundtruth_is_difficult_list2, groundtruth_is_group_of_list2)
image_key3 = "img3"
image_key3 = 'img3'
groundtruth_boxes3 = np.array([[0, 0, 1, 1]], dtype=float) groundtruth_boxes3 = np.array([[0, 0, 1, 1]], dtype=float)
groundtruth_class_labels3 = np.array([1], dtype=int) groundtruth_class_labels3 = np.array([1], dtype=int)
self.od_eval.add_single_ground_truth_image_info( self.od_eval.add_single_ground_truth_image_info(
image_key3, groundtruth_boxes3, groundtruth_class_labels3) image_key3, groundtruth_boxes3, groundtruth_class_labels3)
image_key = "img2" image_key = 'img2'
detected_boxes = np.array( detected_boxes = np.array(
[[10, 10, 11, 11], [100, 100, 120, 120], [100, 100, 220, 220]], [[10, 10, 11, 11], [100, 100, 120, 120], [100, 100, 220, 220]],
dtype=float) dtype=float)
...@@ -58,7 +360,7 @@ class ObjectDetectionEvaluationTest(tf.test.TestCase): ...@@ -58,7 +360,7 @@ class ObjectDetectionEvaluationTest(tf.test.TestCase):
image_key, detected_boxes, detected_scores, detected_class_labels) image_key, detected_boxes, detected_scores, detected_class_labels)
def test_add_single_ground_truth_image_info(self): def test_add_single_ground_truth_image_info(self):
expected_num_gt_instances_per_class = np.array([3, 1, 2], dtype=int) expected_num_gt_instances_per_class = np.array([3, 1, 1], dtype=int)
expected_num_gt_imgs_per_class = np.array([2, 1, 2], dtype=int) expected_num_gt_imgs_per_class = np.array([2, 1, 2], dtype=int)
self.assertTrue(np.array_equal(expected_num_gt_instances_per_class, self.assertTrue(np.array_equal(expected_num_gt_instances_per_class,
self.od_eval.num_gt_instances_per_class)) self.od_eval.num_gt_instances_per_class))
...@@ -66,15 +368,20 @@ class ObjectDetectionEvaluationTest(tf.test.TestCase): ...@@ -66,15 +368,20 @@ class ObjectDetectionEvaluationTest(tf.test.TestCase):
self.od_eval.num_gt_imgs_per_class)) self.od_eval.num_gt_imgs_per_class))
groundtruth_boxes2 = np.array([[10, 10, 11, 11], [500, 500, 510, 510], groundtruth_boxes2 = np.array([[10, 10, 11, 11], [500, 500, 510, 510],
[10, 10, 12, 12]], dtype=float) [10, 10, 12, 12]], dtype=float)
self.assertTrue(np.allclose(self.od_eval.groundtruth_boxes["img2"], self.assertTrue(np.allclose(self.od_eval.groundtruth_boxes['img2'],
groundtruth_boxes2)) groundtruth_boxes2))
groundtruth_is_difficult_list2 = np.array([False, True, False], dtype=bool) groundtruth_is_difficult_list2 = np.array([False, True, False], dtype=bool)
self.assertTrue(np.allclose( self.assertTrue(np.allclose(
self.od_eval.groundtruth_is_difficult_list["img2"], self.od_eval.groundtruth_is_difficult_list['img2'],
groundtruth_is_difficult_list2)) groundtruth_is_difficult_list2))
groundtruth_is_group_of_list2 = np.array([False, False, True], dtype=bool)
self.assertTrue(
np.allclose(self.od_eval.groundtruth_is_group_of_list['img2'],
groundtruth_is_group_of_list2))
groundtruth_class_labels1 = np.array([0, 2, 0], dtype=int) groundtruth_class_labels1 = np.array([0, 2, 0], dtype=int)
self.assertTrue(np.array_equal(self.od_eval.groundtruth_class_labels[ self.assertTrue(np.array_equal(self.od_eval.groundtruth_class_labels[
"img1"], groundtruth_class_labels1)) 'img1'], groundtruth_class_labels1))
def test_add_single_detected_image_info(self): def test_add_single_detected_image_info(self):
expected_scores_per_class = [[np.array([0.8, 0.7], dtype=float)], [], expected_scores_per_class = [[np.array([0.8, 0.7], dtype=float)], [],
...@@ -121,5 +428,5 @@ class ObjectDetectionEvaluationTest(tf.test.TestCase): ...@@ -121,5 +428,5 @@ class ObjectDetectionEvaluationTest(tf.test.TestCase):
self.assertAlmostEqual(expected_mean_corloc, mean_corloc) self.assertAlmostEqual(expected_mean_corloc, mean_corloc)
if __name__ == "__main__": if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
"""A module for helper tensorflow ops.""" """A module for helper tensorflow ops."""
import math import math
import numpy as np
import six import six
import tensorflow as tf import tensorflow as tf
...@@ -156,6 +157,10 @@ def pad_to_multiple(tensor, multiple): ...@@ -156,6 +157,10 @@ def pad_to_multiple(tensor, multiple):
padded_tensor_width = int( padded_tensor_width = int(
math.ceil(float(tensor_width) / multiple) * multiple) math.ceil(float(tensor_width) / multiple) * multiple)
if (padded_tensor_height == tensor_height and
padded_tensor_width == tensor_width):
return tensor
if tensor_depth is None: if tensor_depth is None:
tensor_depth = tf.shape(tensor)[3] tensor_depth = tf.shape(tensor)[3]
...@@ -285,6 +290,7 @@ def retain_groundtruth(tensor_dict, valid_indices): ...@@ -285,6 +290,7 @@ def retain_groundtruth(tensor_dict, valid_indices):
Args: Args:
tensor_dict: a dictionary of following groundtruth tensors - tensor_dict: a dictionary of following groundtruth tensors -
fields.InputDataFields.groundtruth_boxes fields.InputDataFields.groundtruth_boxes
fields.InputDataFields.groundtruth_instance_masks
fields.InputDataFields.groundtruth_classes fields.InputDataFields.groundtruth_classes
fields.InputDataFields.groundtruth_is_crowd fields.InputDataFields.groundtruth_is_crowd
fields.InputDataFields.groundtruth_area fields.InputDataFields.groundtruth_area
...@@ -312,7 +318,8 @@ def retain_groundtruth(tensor_dict, valid_indices): ...@@ -312,7 +318,8 @@ def retain_groundtruth(tensor_dict, valid_indices):
tensor_dict[fields.InputDataFields.groundtruth_boxes])[0], 1) tensor_dict[fields.InputDataFields.groundtruth_boxes])[0], 1)
for key in tensor_dict: for key in tensor_dict:
if key in [fields.InputDataFields.groundtruth_boxes, if key in [fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_classes]: fields.InputDataFields.groundtruth_classes,
fields.InputDataFields.groundtruth_instance_masks]:
valid_dict[key] = tf.gather(tensor_dict[key], valid_indices) valid_dict[key] = tf.gather(tensor_dict[key], valid_indices)
# Input decoder returns empty tensor when these fields are not provided. # Input decoder returns empty tensor when these fields are not provided.
# Needs to reshape into [num_boxes, -1] for tf.gather() to work. # Needs to reshape into [num_boxes, -1] for tf.gather() to work.
...@@ -358,12 +365,49 @@ def retain_groundtruth_with_positive_classes(tensor_dict): ...@@ -358,12 +365,49 @@ def retain_groundtruth_with_positive_classes(tensor_dict):
return retain_groundtruth(tensor_dict, keep_indices) return retain_groundtruth(tensor_dict, keep_indices)
def replace_nan_groundtruth_label_scores_with_ones(label_scores):
"""Replaces nan label scores with 1.0.
Args:
label_scores: a tensor containing object annoation label scores.
Returns:
a tensor where NaN label scores have been replaced by ones.
"""
return tf.where(
tf.is_nan(label_scores), tf.ones(tf.shape(label_scores)), label_scores)
def filter_groundtruth_with_crowd_boxes(tensor_dict):
"""Filters out groundtruth with boxes corresponding to crowd.
Args:
tensor_dict: a dictionary of following groundtruth tensors -
fields.InputDataFields.groundtruth_boxes
fields.InputDataFields.groundtruth_classes
fields.InputDataFields.groundtruth_is_crowd
fields.InputDataFields.groundtruth_area
fields.InputDataFields.groundtruth_label_types
Returns:
a dictionary of tensors containing only the groundtruth that have bounding
boxes.
"""
if fields.InputDataFields.groundtruth_is_crowd in tensor_dict:
is_crowd = tensor_dict[fields.InputDataFields.groundtruth_is_crowd]
is_not_crowd = tf.logical_not(is_crowd)
is_not_crowd_indices = tf.where(is_not_crowd)
tensor_dict = retain_groundtruth(tensor_dict, is_not_crowd_indices)
return tensor_dict
def filter_groundtruth_with_nan_box_coordinates(tensor_dict): def filter_groundtruth_with_nan_box_coordinates(tensor_dict):
"""Filters out groundtruth with no bounding boxes. """Filters out groundtruth with no bounding boxes.
Args: Args:
tensor_dict: a dictionary of following groundtruth tensors - tensor_dict: a dictionary of following groundtruth tensors -
fields.InputDataFields.groundtruth_boxes fields.InputDataFields.groundtruth_boxes
fields.InputDataFields.groundtruth_instance_masks
fields.InputDataFields.groundtruth_classes fields.InputDataFields.groundtruth_classes
fields.InputDataFields.groundtruth_is_crowd fields.InputDataFields.groundtruth_is_crowd
fields.InputDataFields.groundtruth_area fields.InputDataFields.groundtruth_area
...@@ -649,3 +693,49 @@ def reframe_box_masks_to_image_masks(box_masks, boxes, image_height, ...@@ -649,3 +693,49 @@ def reframe_box_masks_to_image_masks(box_masks, boxes, image_height,
crop_size=[image_height, image_width], crop_size=[image_height, image_width],
extrapolation_value=0.0) extrapolation_value=0.0)
return tf.squeeze(image_masks, axis=3) return tf.squeeze(image_masks, axis=3)
def merge_boxes_with_multiple_labels(boxes, classes, num_classes):
"""Merges boxes with same coordinates and returns K-hot encoded classes.
Args:
boxes: A tf.float32 tensor with shape [N, 4] holding N boxes.
classes: A tf.int32 tensor with shape [N] holding class indices.
The class index starts at 0.
num_classes: total number of classes to use for K-hot encoding.
Returns:
merged_boxes: A tf.float32 tensor with shape [N', 4] holding boxes,
where N' <= N.
class_encodings: A tf.int32 tensor with shape [N', num_classes] holding
k-hot encodings for the merged boxes.
merged_box_indices: A tf.int32 tensor with shape [N'] holding original
indices of the boxes.
"""
def merge_numpy_boxes(boxes, classes, num_classes):
"""Python function to merge numpy boxes."""
if boxes.size < 1:
return (np.zeros([0, 4], dtype=np.float32),
np.zeros([0, num_classes], dtype=np.int32),
np.zeros([0], dtype=np.int32))
box_to_class_indices = {}
for box_index in range(boxes.shape[0]):
box = tuple(boxes[box_index, :].tolist())
class_index = classes[box_index]
if box not in box_to_class_indices:
box_to_class_indices[box] = [box_index, np.zeros([num_classes])]
box_to_class_indices[box][1][class_index] = 1
merged_boxes = np.vstack(box_to_class_indices.keys()).astype(np.float32)
class_encodings = [item[1] for item in box_to_class_indices.values()]
class_encodings = np.vstack(class_encodings).astype(np.int32)
merged_box_indices = [item[0] for item in box_to_class_indices.values()]
merged_box_indices = np.array(merged_box_indices).astype(np.int32)
return merged_boxes, class_encodings, merged_box_indices
merged_boxes, class_encodings, merged_box_indices = tf.py_func(
merge_numpy_boxes, [boxes, classes, num_classes],
[tf.float32, tf.int32, tf.int32])
merged_boxes = tf.reshape(merged_boxes, [-1, 4])
class_encodings = tf.reshape(class_encodings, [-1, num_classes])
merged_box_indices = tf.reshape(merged_box_indices, [-1])
return merged_boxes, class_encodings, merged_box_indices
...@@ -602,6 +602,64 @@ class RetainGroundTruthWithPositiveClasses(tf.test.TestCase): ...@@ -602,6 +602,64 @@ class RetainGroundTruthWithPositiveClasses(tf.test.TestCase):
self.assertAllEqual(expected_tensors[key], output_tensors[key]) self.assertAllEqual(expected_tensors[key], output_tensors[key])
class ReplaceNaNGroundtruthLabelScoresWithOnes(tf.test.TestCase):
def test_replace_nan_groundtruth_label_scores_with_ones(self):
label_scores = tf.constant([np.nan, 1.0, np.nan])
output_tensor = ops.replace_nan_groundtruth_label_scores_with_ones(
label_scores)
expected_tensor = [1.0, 1.0, 1.0]
with self.test_session():
output_tensor = output_tensor.eval()
self.assertAllClose(expected_tensor, output_tensor)
def test_input_equals_output_when_no_nans(self):
input_label_scores = [0.5, 1.0, 1.0]
label_scores_tensor = tf.constant(input_label_scores)
output_label_scores = ops.replace_nan_groundtruth_label_scores_with_ones(
label_scores_tensor)
with self.test_session():
output_label_scores = output_label_scores.eval()
self.assertAllClose(input_label_scores, output_label_scores)
class GroundtruthFilterWithCrowdBoxesTest(tf.test.TestCase):
def test_filter_groundtruth_with_crowd_boxes(self):
input_tensors = {
fields.InputDataFields.groundtruth_boxes:
[[0.1, 0.2, 0.6, 0.8], [0.2, 0.4, 0.1, 0.8]],
fields.InputDataFields.groundtruth_classes:
[1, 2],
fields.InputDataFields.groundtruth_is_crowd:
[True, False],
fields.InputDataFields.groundtruth_area:
[100.0, 238.7]
}
expected_tensors = {
fields.InputDataFields.groundtruth_boxes:
[[0.2, 0.4, 0.1, 0.8]],
fields.InputDataFields.groundtruth_classes:
[2],
fields.InputDataFields.groundtruth_is_crowd:
[False],
fields.InputDataFields.groundtruth_area:
[238.7]
}
output_tensors = ops.filter_groundtruth_with_crowd_boxes(
input_tensors)
with self.test_session() as sess:
output_tensors = sess.run(output_tensors)
for key in [fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_area]:
self.assertAllClose(expected_tensors[key], output_tensors[key])
for key in [fields.InputDataFields.groundtruth_classes,
fields.InputDataFields.groundtruth_is_crowd]:
self.assertAllEqual(expected_tensors[key], output_tensors[key])
class GroundtruthFilterWithNanBoxTest(tf.test.TestCase): class GroundtruthFilterWithNanBoxTest(tf.test.TestCase):
def test_filter_groundtruth_with_nan_box_coordinates(self): def test_filter_groundtruth_with_nan_box_coordinates(self):
...@@ -1029,5 +1087,46 @@ class ReframeBoxMasksToImageMasksTest(tf.test.TestCase): ...@@ -1029,5 +1087,46 @@ class ReframeBoxMasksToImageMasksTest(tf.test.TestCase):
self.assertAllClose(np_image_masks, np_expected_image_masks) self.assertAllClose(np_image_masks, np_expected_image_masks)
class MergeBoxesWithMultipleLabelsTest(tf.test.TestCase):
def testMergeBoxesWithMultipleLabels(self):
boxes = tf.constant(
[[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75],
[0.25, 0.25, 0.75, 0.75]],
dtype=tf.float32)
class_indices = tf.constant([0, 4, 2], dtype=tf.int32)
num_classes = 5
merged_boxes, merged_classes, merged_box_indices = (
ops.merge_boxes_with_multiple_labels(boxes, class_indices, num_classes))
expected_merged_boxes = np.array(
[[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75]], dtype=np.float32)
expected_merged_classes = np.array(
[[1, 0, 1, 0, 0], [0, 0, 0, 0, 1]], dtype=np.int32)
expected_merged_box_indices = np.array([0, 1], dtype=np.int32)
with self.test_session() as sess:
np_merged_boxes, np_merged_classes, np_merged_box_indices = sess.run(
[merged_boxes, merged_classes, merged_box_indices])
if np_merged_classes[0, 0] != 1:
expected_merged_boxes = expected_merged_boxes[::-1, :]
expected_merged_classes = expected_merged_classes[::-1, :]
expected_merged_box_indices = expected_merged_box_indices[::-1, :]
self.assertAllClose(np_merged_boxes, expected_merged_boxes)
self.assertAllClose(np_merged_classes, expected_merged_classes)
self.assertAllClose(np_merged_box_indices, expected_merged_box_indices)
def testMergeBoxesWithEmptyInputs(self):
boxes = tf.constant([[]])
class_indices = tf.constant([])
num_classes = 5
merged_boxes, merged_classes, merged_box_indices = (
ops.merge_boxes_with_multiple_labels(boxes, class_indices, num_classes))
with self.test_session() as sess:
np_merged_boxes, np_merged_classes, np_merged_box_indices = sess.run(
[merged_boxes, merged_classes, merged_box_indices])
self.assertAllEqual(np_merged_boxes.shape, [0, 4])
self.assertAllEqual(np_merged_classes.shape, [0, 5])
self.assertAllEqual(np_merged_box_indices.shape, [0])
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""Evaluate Object Detection result on a single image. """Evaluate Object Detection result on a single image.
Annotate each detected result as true positives or false positive according to Annotate each detected result as true positives or false positive according to
...@@ -47,11 +46,17 @@ class PerImageEvaluation(object): ...@@ -47,11 +46,17 @@ class PerImageEvaluation(object):
self.nms_max_output_boxes = nms_max_output_boxes self.nms_max_output_boxes = nms_max_output_boxes
self.num_groundtruth_classes = num_groundtruth_classes self.num_groundtruth_classes = num_groundtruth_classes
def compute_object_detection_metrics(self, detected_boxes, detected_scores, def compute_object_detection_metrics(
detected_class_labels, groundtruth_boxes, self, detected_boxes, detected_scores, detected_class_labels,
groundtruth_class_labels, groundtruth_boxes, groundtruth_class_labels,
groundtruth_is_difficult_lists): groundtruth_is_difficult_lists, groundtruth_is_group_of_list):
"""Compute Object Detection related metrics from a single image. """Evaluates detections as being tp, fp or ignored from a single image.
The evaluation is done in two stages:
1. All detections are matched to non group-of boxes; true positives are
determined and detections matched to difficult boxes are ignored.
2. Detections that are determined as false positives are matched against
group-of boxes and ignored if matched.
Args: Args:
detected_boxes: A float numpy array of shape [N, 4], representing N detected_boxes: A float numpy array of shape [N, 4], representing N
...@@ -67,6 +72,8 @@ class PerImageEvaluation(object): ...@@ -67,6 +72,8 @@ class PerImageEvaluation(object):
representing M class labels of object instances in ground truth representing M class labels of object instances in ground truth
groundtruth_is_difficult_lists: A boolean numpy array of length M denoting groundtruth_is_difficult_lists: A boolean numpy array of length M denoting
whether a ground truth box is a difficult instance or not whether a ground truth box is a difficult instance or not
groundtruth_is_group_of_list: A boolean numpy array of length M denoting
whether a ground truth box has group-of tag
Returns: Returns:
scores: A list of C float numpy arrays. Each numpy array is of scores: A list of C float numpy arrays. Each numpy array is of
...@@ -85,7 +92,8 @@ class PerImageEvaluation(object): ...@@ -85,7 +92,8 @@ class PerImageEvaluation(object):
scores, tp_fp_labels = self._compute_tp_fp( scores, tp_fp_labels = self._compute_tp_fp(
detected_boxes, detected_scores, detected_class_labels, detected_boxes, detected_scores, detected_class_labels,
groundtruth_boxes, groundtruth_class_labels, groundtruth_boxes, groundtruth_class_labels,
groundtruth_is_difficult_lists) groundtruth_is_difficult_lists, groundtruth_is_group_of_list)
is_class_correctly_detected_in_image = self._compute_cor_loc( is_class_correctly_detected_in_image = self._compute_cor_loc(
detected_boxes, detected_scores, detected_class_labels, detected_boxes, detected_scores, detected_class_labels,
groundtruth_boxes, groundtruth_class_labels) groundtruth_boxes, groundtruth_class_labels)
...@@ -116,10 +124,10 @@ class PerImageEvaluation(object): ...@@ -116,10 +124,10 @@ class PerImageEvaluation(object):
is_class_correctly_detected_in_image = np.zeros( is_class_correctly_detected_in_image = np.zeros(
self.num_groundtruth_classes, dtype=int) self.num_groundtruth_classes, dtype=int)
for i in range(self.num_groundtruth_classes): for i in range(self.num_groundtruth_classes):
gt_boxes_at_ith_class = groundtruth_boxes[ gt_boxes_at_ith_class = groundtruth_boxes[groundtruth_class_labels ==
groundtruth_class_labels == i, :] i, :]
detected_boxes_at_ith_class = detected_boxes[ detected_boxes_at_ith_class = detected_boxes[detected_class_labels ==
detected_class_labels == i, :] i, :]
detected_scores_at_ith_class = detected_scores[detected_class_labels == i] detected_scores_at_ith_class = detected_scores[detected_class_labels == i]
is_class_correctly_detected_in_image[i] = ( is_class_correctly_detected_in_image[i] = (
self._compute_is_aclass_correctly_detected_in_image( self._compute_is_aclass_correctly_detected_in_image(
...@@ -157,7 +165,8 @@ class PerImageEvaluation(object): ...@@ -157,7 +165,8 @@ class PerImageEvaluation(object):
def _compute_tp_fp(self, detected_boxes, detected_scores, def _compute_tp_fp(self, detected_boxes, detected_scores,
detected_class_labels, groundtruth_boxes, detected_class_labels, groundtruth_boxes,
groundtruth_class_labels, groundtruth_is_difficult_lists): groundtruth_class_labels, groundtruth_is_difficult_lists,
groundtruth_is_group_of_list):
"""Labels true/false positives of detections of an image across all classes. """Labels true/false positives of detections of an image across all classes.
Args: Args:
...@@ -174,6 +183,8 @@ class PerImageEvaluation(object): ...@@ -174,6 +183,8 @@ class PerImageEvaluation(object):
representing M class labels of object instances in ground truth representing M class labels of object instances in ground truth
groundtruth_is_difficult_lists: A boolean numpy array of length M denoting groundtruth_is_difficult_lists: A boolean numpy array of length M denoting
whether a ground truth box is a difficult instance or not whether a ground truth box is a difficult instance or not
groundtruth_is_group_of_list: A boolean numpy array of length M denoting
whether a ground truth box has group-of tag
Returns: Returns:
result_scores: A list of float numpy arrays. Each numpy array is of result_scores: A list of float numpy arrays. Each numpy array is of
...@@ -190,12 +201,15 @@ class PerImageEvaluation(object): ...@@ -190,12 +201,15 @@ class PerImageEvaluation(object):
), :] ), :]
groundtruth_is_difficult_list_at_ith_class = ( groundtruth_is_difficult_list_at_ith_class = (
groundtruth_is_difficult_lists[groundtruth_class_labels == i]) groundtruth_is_difficult_lists[groundtruth_class_labels == i])
groundtruth_is_group_of_list_at_ith_class = (
groundtruth_is_group_of_list[groundtruth_class_labels == i])
detected_boxes_at_ith_class = detected_boxes[(detected_class_labels == i detected_boxes_at_ith_class = detected_boxes[(detected_class_labels == i
), :] ), :]
detected_scores_at_ith_class = detected_scores[detected_class_labels == i] detected_scores_at_ith_class = detected_scores[detected_class_labels == i]
scores, tp_fp_labels = self._compute_tp_fp_for_single_class( scores, tp_fp_labels = self._compute_tp_fp_for_single_class(
detected_boxes_at_ith_class, detected_scores_at_ith_class, detected_boxes_at_ith_class, detected_scores_at_ith_class,
gt_boxes_at_ith_class, groundtruth_is_difficult_list_at_ith_class) gt_boxes_at_ith_class, groundtruth_is_difficult_list_at_ith_class,
groundtruth_is_group_of_list_at_ith_class)
result_scores.append(scores) result_scores.append(scores)
result_tp_fp_labels.append(tp_fp_labels) result_tp_fp_labels.append(tp_fp_labels)
return result_scores, result_tp_fp_labels return result_scores, result_tp_fp_labels
...@@ -207,9 +221,9 @@ class PerImageEvaluation(object): ...@@ -207,9 +221,9 @@ class PerImageEvaluation(object):
return (detected_boxes[valid_indices, :], detected_scores[valid_indices], return (detected_boxes[valid_indices, :], detected_scores[valid_indices],
detected_class_labels[valid_indices]) detected_class_labels[valid_indices])
def _compute_tp_fp_for_single_class(self, detected_boxes, detected_scores, def _compute_tp_fp_for_single_class(
groundtruth_boxes, self, detected_boxes, detected_scores, groundtruth_boxes,
groundtruth_is_difficult_list): groundtruth_is_difficult_list, groundtruth_is_group_of_list):
"""Labels boxes detected with the same class from the same image as tp/fp. """Labels boxes detected with the same class from the same image as tp/fp.
Args: Args:
...@@ -220,10 +234,19 @@ class PerImageEvaluation(object): ...@@ -220,10 +234,19 @@ class PerImageEvaluation(object):
groundtruth_boxes: A numpy array of shape [M, 4] representing ground truth groundtruth_boxes: A numpy array of shape [M, 4] representing ground truth
box coordinates box coordinates
groundtruth_is_difficult_list: A boolean numpy array of length M denoting groundtruth_is_difficult_list: A boolean numpy array of length M denoting
whether a ground truth box is a difficult instance or not whether a ground truth box is a difficult instance or not. If a
groundtruth box is difficult, every detection matching this box
is ignored.
groundtruth_is_group_of_list: A boolean numpy array of length M denoting
whether a ground truth box has group-of tag. If a groundtruth box
is group-of box, every detection matching this box is ignored.
Returns: Returns:
scores: A numpy array representing the detection scores Two arrays of the same size, containing all boxes that were evaluated as
being true positives or false positives; if a box matched to a difficult
box or to a group-of box, it is ignored.
scores: A numpy array representing the detection scores.
tp_fp_labels: a boolean numpy array indicating whether a detection is a tp_fp_labels: a boolean numpy array indicating whether a detection is a
true positive. true positive.
...@@ -239,22 +262,51 @@ class PerImageEvaluation(object): ...@@ -239,22 +262,51 @@ class PerImageEvaluation(object):
if groundtruth_boxes.size == 0: if groundtruth_boxes.size == 0:
return scores, np.zeros(detected_boxlist.num_boxes(), dtype=bool) return scores, np.zeros(detected_boxlist.num_boxes(), dtype=bool)
gt_boxlist = np_box_list.BoxList(groundtruth_boxes)
iou = np_box_list_ops.iou(detected_boxlist, gt_boxlist)
max_overlap_gt_ids = np.argmax(iou, axis=1)
is_gt_box_detected = np.zeros(gt_boxlist.num_boxes(), dtype=bool)
tp_fp_labels = np.zeros(detected_boxlist.num_boxes(), dtype=bool) tp_fp_labels = np.zeros(detected_boxlist.num_boxes(), dtype=bool)
is_matched_to_difficult_box = np.zeros( is_matched_to_difficult_box = np.zeros(
detected_boxlist.num_boxes(), dtype=bool) detected_boxlist.num_boxes(), dtype=bool)
is_matched_to_group_of_box = np.zeros(
detected_boxlist.num_boxes(), dtype=bool)
# The evaluation is done in two stages:
# 1. All detections are matched to non group-of boxes; true positives are
# determined and detections matched to difficult boxes are ignored.
# 2. Detections that are determined as false positives are matched against
# group-of boxes and ignored if matched.
# Tp-fp evaluation for non-group of boxes (if any).
gt_non_group_of_boxlist = np_box_list.BoxList(
groundtruth_boxes[~groundtruth_is_group_of_list, :])
if gt_non_group_of_boxlist.num_boxes() > 0:
groundtruth_nongroup_of_is_difficult_list = groundtruth_is_difficult_list[
~groundtruth_is_group_of_list]
iou = np_box_list_ops.iou(detected_boxlist, gt_non_group_of_boxlist)
max_overlap_gt_ids = np.argmax(iou, axis=1)
is_gt_box_detected = np.zeros(
gt_non_group_of_boxlist.num_boxes(), dtype=bool)
for i in range(detected_boxlist.num_boxes()): for i in range(detected_boxlist.num_boxes()):
gt_id = max_overlap_gt_ids[i] gt_id = max_overlap_gt_ids[i]
if iou[i, gt_id] >= self.matching_iou_threshold: if iou[i, gt_id] >= self.matching_iou_threshold:
if not groundtruth_is_difficult_list[gt_id]: if not groundtruth_nongroup_of_is_difficult_list[gt_id]:
if not is_gt_box_detected[gt_id]: if not is_gt_box_detected[gt_id]:
tp_fp_labels[i] = True tp_fp_labels[i] = True
is_gt_box_detected[gt_id] = True is_gt_box_detected[gt_id] = True
else: else:
is_matched_to_difficult_box[i] = True is_matched_to_difficult_box[i] = True
return scores[~is_matched_to_difficult_box], tp_fp_labels[
~is_matched_to_difficult_box] # Tp-fp evaluation for group of boxes.
gt_group_of_boxlist = np_box_list.BoxList(
groundtruth_boxes[groundtruth_is_group_of_list, :])
if gt_group_of_boxlist.num_boxes() > 0:
ioa = np_box_list_ops.ioa(gt_group_of_boxlist, detected_boxlist)
max_overlap_group_of_gt = np.max(ioa, axis=0)
for i in range(detected_boxlist.num_boxes()):
if (not tp_fp_labels[i] and not is_matched_to_difficult_box[i] and
max_overlap_group_of_gt[i] >= self.matching_iou_threshold):
is_matched_to_group_of_box[i] = True
return scores[~is_matched_to_difficult_box
& ~is_matched_to_group_of_box], tp_fp_labels[
~is_matched_to_difficult_box
& ~is_matched_to_group_of_box]
...@@ -41,9 +41,12 @@ class SingleClassTpFpWithDifficultBoxesTest(tf.test.TestCase): ...@@ -41,9 +41,12 @@ class SingleClassTpFpWithDifficultBoxesTest(tf.test.TestCase):
def test_match_to_not_difficult_box(self): def test_match_to_not_difficult_box(self):
groundtruth_groundtruth_is_difficult_list = np.array([False, True], groundtruth_groundtruth_is_difficult_list = np.array([False, True],
dtype=bool) dtype=bool)
groundtruth_groundtruth_is_group_of_list = np.array(
[False, False], dtype=bool)
scores, tp_fp_labels = self.eval._compute_tp_fp_for_single_class( scores, tp_fp_labels = self.eval._compute_tp_fp_for_single_class(
self.detected_boxes, self.detected_scores, self.groundtruth_boxes, self.detected_boxes, self.detected_scores, self.groundtruth_boxes,
groundtruth_groundtruth_is_difficult_list) groundtruth_groundtruth_is_difficult_list,
groundtruth_groundtruth_is_group_of_list)
expected_scores = np.array([0.8, 0.6, 0.5], dtype=float) expected_scores = np.array([0.8, 0.6, 0.5], dtype=float)
expected_tp_fp_labels = np.array([False, True, False], dtype=bool) expected_tp_fp_labels = np.array([False, True, False], dtype=bool)
self.assertTrue(np.allclose(expected_scores, scores)) self.assertTrue(np.allclose(expected_scores, scores))
...@@ -52,15 +55,64 @@ class SingleClassTpFpWithDifficultBoxesTest(tf.test.TestCase): ...@@ -52,15 +55,64 @@ class SingleClassTpFpWithDifficultBoxesTest(tf.test.TestCase):
def test_match_to_difficult_box(self): def test_match_to_difficult_box(self):
groundtruth_groundtruth_is_difficult_list = np.array([True, False], groundtruth_groundtruth_is_difficult_list = np.array([True, False],
dtype=bool) dtype=bool)
groundtruth_groundtruth_is_group_of_list = np.array(
[False, False], dtype=bool)
scores, tp_fp_labels = self.eval._compute_tp_fp_for_single_class( scores, tp_fp_labels = self.eval._compute_tp_fp_for_single_class(
self.detected_boxes, self.detected_scores, self.groundtruth_boxes, self.detected_boxes, self.detected_scores, self.groundtruth_boxes,
groundtruth_groundtruth_is_difficult_list) groundtruth_groundtruth_is_difficult_list,
groundtruth_groundtruth_is_group_of_list)
expected_scores = np.array([0.8, 0.5], dtype=float) expected_scores = np.array([0.8, 0.5], dtype=float)
expected_tp_fp_labels = np.array([False, False], dtype=bool) expected_tp_fp_labels = np.array([False, False], dtype=bool)
self.assertTrue(np.allclose(expected_scores, scores)) self.assertTrue(np.allclose(expected_scores, scores))
self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels)) self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
class SingleClassTpFpWithGroupOfBoxesTest(tf.test.TestCase):
def setUp(self):
num_groundtruth_classes = 1
matching_iou_threshold = 0.5
nms_iou_threshold = 1.0
nms_max_output_boxes = 10000
self.eval = per_image_evaluation.PerImageEvaluation(
num_groundtruth_classes, matching_iou_threshold, nms_iou_threshold,
nms_max_output_boxes)
self.detected_boxes = np.array(
[[0, 0, 1, 1], [0, 0, 2, 1], [0, 0, 3, 1]], dtype=float)
self.detected_scores = np.array([0.8, 0.6, 0.5], dtype=float)
self.groundtruth_boxes = np.array(
[[0, 0, 1, 1], [0, 0, 5, 5], [10, 10, 20, 20]], dtype=float)
def test_match_to_non_group_of_and_group_of_box(self):
groundtruth_groundtruth_is_difficult_list = np.array(
[False, False, False], dtype=bool)
groundtruth_groundtruth_is_group_of_list = np.array(
[False, True, True], dtype=bool)
expected_scores = np.array([0.8], dtype=float)
expected_tp_fp_labels = np.array([True], dtype=bool)
scores, tp_fp_labels = self.eval._compute_tp_fp_for_single_class(
self.detected_boxes, self.detected_scores, self.groundtruth_boxes,
groundtruth_groundtruth_is_difficult_list,
groundtruth_groundtruth_is_group_of_list)
self.assertTrue(np.allclose(expected_scores, scores))
self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
def test_match_two_to_group_of_box(self):
groundtruth_groundtruth_is_difficult_list = np.array(
[False, False, False], dtype=bool)
groundtruth_groundtruth_is_group_of_list = np.array(
[True, False, True], dtype=bool)
expected_scores = np.array([0.5], dtype=float)
expected_tp_fp_labels = np.array([False], dtype=bool)
scores, tp_fp_labels = self.eval._compute_tp_fp_for_single_class(
self.detected_boxes, self.detected_scores, self.groundtruth_boxes,
groundtruth_groundtruth_is_difficult_list,
groundtruth_groundtruth_is_group_of_list)
self.assertTrue(np.allclose(expected_scores, scores))
self.assertTrue(np.allclose(expected_tp_fp_labels, tp_fp_labels))
class SingleClassTpFpNoDifficultBoxesTest(tf.test.TestCase): class SingleClassTpFpNoDifficultBoxesTest(tf.test.TestCase):
def setUp(self): def setUp(self):
...@@ -84,9 +136,11 @@ class SingleClassTpFpNoDifficultBoxesTest(tf.test.TestCase): ...@@ -84,9 +136,11 @@ class SingleClassTpFpNoDifficultBoxesTest(tf.test.TestCase):
def test_no_true_positives(self): def test_no_true_positives(self):
groundtruth_boxes = np.array([[100, 100, 105, 105]], dtype=float) groundtruth_boxes = np.array([[100, 100, 105, 105]], dtype=float)
groundtruth_groundtruth_is_difficult_list = np.zeros(1, dtype=bool) groundtruth_groundtruth_is_difficult_list = np.zeros(1, dtype=bool)
groundtruth_groundtruth_is_group_of_list = np.array([False], dtype=bool)
scores, tp_fp_labels = self.eval1._compute_tp_fp_for_single_class( scores, tp_fp_labels = self.eval1._compute_tp_fp_for_single_class(
self.detected_boxes, self.detected_scores, groundtruth_boxes, self.detected_boxes, self.detected_scores, groundtruth_boxes,
groundtruth_groundtruth_is_difficult_list) groundtruth_groundtruth_is_difficult_list,
groundtruth_groundtruth_is_group_of_list)
expected_scores = np.array([0.8, 0.6, 0.5], dtype=float) expected_scores = np.array([0.8, 0.6, 0.5], dtype=float)
expected_tp_fp_labels = np.array([False, False, False], dtype=bool) expected_tp_fp_labels = np.array([False, False, False], dtype=bool)
self.assertTrue(np.allclose(expected_scores, scores)) self.assertTrue(np.allclose(expected_scores, scores))
...@@ -95,9 +149,11 @@ class SingleClassTpFpNoDifficultBoxesTest(tf.test.TestCase): ...@@ -95,9 +149,11 @@ class SingleClassTpFpNoDifficultBoxesTest(tf.test.TestCase):
def test_one_true_positives_with_large_iou_threshold(self): def test_one_true_positives_with_large_iou_threshold(self):
groundtruth_boxes = np.array([[0, 0, 1, 1]], dtype=float) groundtruth_boxes = np.array([[0, 0, 1, 1]], dtype=float)
groundtruth_groundtruth_is_difficult_list = np.zeros(1, dtype=bool) groundtruth_groundtruth_is_difficult_list = np.zeros(1, dtype=bool)
groundtruth_groundtruth_is_group_of_list = np.array([False], dtype=bool)
scores, tp_fp_labels = self.eval1._compute_tp_fp_for_single_class( scores, tp_fp_labels = self.eval1._compute_tp_fp_for_single_class(
self.detected_boxes, self.detected_scores, groundtruth_boxes, self.detected_boxes, self.detected_scores, groundtruth_boxes,
groundtruth_groundtruth_is_difficult_list) groundtruth_groundtruth_is_difficult_list,
groundtruth_groundtruth_is_group_of_list)
expected_scores = np.array([0.8, 0.6, 0.5], dtype=float) expected_scores = np.array([0.8, 0.6, 0.5], dtype=float)
expected_tp_fp_labels = np.array([False, True, False], dtype=bool) expected_tp_fp_labels = np.array([False, True, False], dtype=bool)
self.assertTrue(np.allclose(expected_scores, scores)) self.assertTrue(np.allclose(expected_scores, scores))
...@@ -106,9 +162,11 @@ class SingleClassTpFpNoDifficultBoxesTest(tf.test.TestCase): ...@@ -106,9 +162,11 @@ class SingleClassTpFpNoDifficultBoxesTest(tf.test.TestCase):
def test_one_true_positives_with_very_small_iou_threshold(self): def test_one_true_positives_with_very_small_iou_threshold(self):
groundtruth_boxes = np.array([[0, 0, 1, 1]], dtype=float) groundtruth_boxes = np.array([[0, 0, 1, 1]], dtype=float)
groundtruth_groundtruth_is_difficult_list = np.zeros(1, dtype=bool) groundtruth_groundtruth_is_difficult_list = np.zeros(1, dtype=bool)
groundtruth_groundtruth_is_group_of_list = np.array([False], dtype=bool)
scores, tp_fp_labels = self.eval2._compute_tp_fp_for_single_class( scores, tp_fp_labels = self.eval2._compute_tp_fp_for_single_class(
self.detected_boxes, self.detected_scores, groundtruth_boxes, self.detected_boxes, self.detected_scores, groundtruth_boxes,
groundtruth_groundtruth_is_difficult_list) groundtruth_groundtruth_is_difficult_list,
groundtruth_groundtruth_is_group_of_list)
expected_scores = np.array([0.8, 0.6, 0.5], dtype=float) expected_scores = np.array([0.8, 0.6, 0.5], dtype=float)
expected_tp_fp_labels = np.array([True, False, False], dtype=bool) expected_tp_fp_labels = np.array([True, False, False], dtype=bool)
self.assertTrue(np.allclose(expected_scores, scores)) self.assertTrue(np.allclose(expected_scores, scores))
...@@ -117,9 +175,12 @@ class SingleClassTpFpNoDifficultBoxesTest(tf.test.TestCase): ...@@ -117,9 +175,12 @@ class SingleClassTpFpNoDifficultBoxesTest(tf.test.TestCase):
def test_two_true_positives_with_large_iou_threshold(self): def test_two_true_positives_with_large_iou_threshold(self):
groundtruth_boxes = np.array([[0, 0, 1, 1], [0, 0, 3.5, 3.5]], dtype=float) groundtruth_boxes = np.array([[0, 0, 1, 1], [0, 0, 3.5, 3.5]], dtype=float)
groundtruth_groundtruth_is_difficult_list = np.zeros(2, dtype=bool) groundtruth_groundtruth_is_difficult_list = np.zeros(2, dtype=bool)
groundtruth_groundtruth_is_group_of_list = np.array(
[False, False], dtype=bool)
scores, tp_fp_labels = self.eval1._compute_tp_fp_for_single_class( scores, tp_fp_labels = self.eval1._compute_tp_fp_for_single_class(
self.detected_boxes, self.detected_scores, groundtruth_boxes, self.detected_boxes, self.detected_scores, groundtruth_boxes,
groundtruth_groundtruth_is_difficult_list) groundtruth_groundtruth_is_difficult_list,
groundtruth_groundtruth_is_group_of_list)
expected_scores = np.array([0.8, 0.6, 0.5], dtype=float) expected_scores = np.array([0.8, 0.6, 0.5], dtype=float)
expected_tp_fp_labels = np.array([False, True, True], dtype=bool) expected_tp_fp_labels = np.array([False, True, True], dtype=bool)
self.assertTrue(np.allclose(expected_scores, scores)) self.assertTrue(np.allclose(expected_scores, scores))
...@@ -145,10 +206,13 @@ class MultiClassesTpFpTest(tf.test.TestCase): ...@@ -145,10 +206,13 @@ class MultiClassesTpFpTest(tf.test.TestCase):
groundtruth_boxes = np.array([[0, 0, 1, 1], [0, 0, 3.5, 3.5]], dtype=float) groundtruth_boxes = np.array([[0, 0, 1, 1], [0, 0, 3.5, 3.5]], dtype=float)
groundtruth_class_labels = np.array([0, 2], dtype=int) groundtruth_class_labels = np.array([0, 2], dtype=int)
groundtruth_groundtruth_is_difficult_list = np.zeros(2, dtype=float) groundtruth_groundtruth_is_difficult_list = np.zeros(2, dtype=float)
groundtruth_groundtruth_is_group_of_list = np.array(
[False, False], dtype=bool)
scores, tp_fp_labels, _ = eval1.compute_object_detection_metrics( scores, tp_fp_labels, _ = eval1.compute_object_detection_metrics(
detected_boxes, detected_scores, detected_class_labels, detected_boxes, detected_scores, detected_class_labels,
groundtruth_boxes, groundtruth_class_labels, groundtruth_boxes, groundtruth_class_labels,
groundtruth_groundtruth_is_difficult_list) groundtruth_groundtruth_is_difficult_list,
groundtruth_groundtruth_is_group_of_list)
expected_scores = [np.array([0.8], dtype=float)] * 3 expected_scores = [np.array([0.8], dtype=float)] * 3
expected_tp_fp_labels = [np.array([True]), np.array([False]), np.array([True expected_tp_fp_labels = [np.array([True]), np.array([False]), np.array([True
])] ])]
......
...@@ -20,6 +20,8 @@ The functions do not return a value, instead they modify the image itself. ...@@ -20,6 +20,8 @@ The functions do not return a value, instead they modify the image itself.
""" """
import collections import collections
import functools
import matplotlib.pyplot as plt
import numpy as np import numpy as np
import PIL.Image as Image import PIL.Image as Image
import PIL.ImageColor as ImageColor import PIL.ImageColor as ImageColor
...@@ -132,6 +134,8 @@ def draw_bounding_box_on_image(image, ...@@ -132,6 +134,8 @@ def draw_bounding_box_on_image(image,
Each string in display_str_list is displayed on a separate line above the Each string in display_str_list is displayed on a separate line above the
bounding box in black text on a rectangle filled with the input 'color'. bounding box in black text on a rectangle filled with the input 'color'.
If the top of the bounding box extends to the edge of the image, the strings
are displayed below the bounding box.
Args: Args:
image: a PIL.Image object. image: a PIL.Image object.
...@@ -161,7 +165,17 @@ def draw_bounding_box_on_image(image, ...@@ -161,7 +165,17 @@ def draw_bounding_box_on_image(image,
except IOError: except IOError:
font = ImageFont.load_default() font = ImageFont.load_default()
# If the total height of the display strings added to the top of the bounding
# box exceeds the top of the image, stack the strings below the bounding box
# instead of above.
display_str_heights = [font.getsize(ds)[1] for ds in display_str_list]
# Each display_str has a top and bottom margin of 0.05x.
total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)
if top > total_display_str_height:
text_bottom = top text_bottom = top
else:
text_bottom = bottom + total_display_str_height
# Reverse list and print from bottom to top. # Reverse list and print from bottom to top.
for display_str in display_str_list[::-1]: for display_str in display_str_list[::-1]:
text_width, text_height = font.getsize(display_str) text_width, text_height = font.getsize(display_str)
...@@ -241,6 +255,53 @@ def draw_bounding_boxes_on_image(image, ...@@ -241,6 +255,53 @@ def draw_bounding_boxes_on_image(image,
boxes[i, 3], color, thickness, display_str_list) boxes[i, 3], color, thickness, display_str_list)
def draw_bounding_boxes_on_image_tensors(images,
boxes,
classes,
scores,
category_index,
max_boxes_to_draw=20,
min_score_thresh=0.2):
"""Draws bounding boxes on batch of image tensors.
Args:
images: A 4D uint8 image tensor of shape [N, H, W, C].
boxes: [N, max_detections, 4] float32 tensor of detection boxes.
classes: [N, max_detections] int tensor of detection classes. Note that
classes are 1-indexed.
scores: [N, max_detections] float32 tensor of detection scores.
category_index: a dict that maps integer ids to category dicts. e.g.
{1: {1: 'dog'}, 2: {2: 'cat'}, ...}
max_boxes_to_draw: Maximum number of boxes to draw on an image. Default 20.
min_score_thresh: Minimum score threshold for visualization. Default 0.2.
Returns:
4D image tensor of type uint8, with boxes drawn on top.
"""
visualize_boxes_fn = functools.partial(
visualize_boxes_and_labels_on_image_array,
category_index=category_index,
instance_masks=None,
keypoints=None,
use_normalized_coordinates=True,
max_boxes_to_draw=max_boxes_to_draw,
min_score_thresh=min_score_thresh,
agnostic_mode=False,
line_thickness=4)
def draw_boxes((image, boxes, classes, scores)):
"""Draws boxes on image."""
image_with_boxes = tf.py_func(visualize_boxes_fn,
[image, boxes, classes, scores], tf.uint8)
return image_with_boxes
images = tf.map_fn(
draw_boxes, (images, boxes, classes, scores),
dtype=tf.uint8,
back_prop=False)
return images
def draw_keypoints_on_image_array(image, def draw_keypoints_on_image_array(image,
keypoints, keypoints,
color='red', color='red',
...@@ -295,8 +356,8 @@ def draw_mask_on_image_array(image, mask, color='red', alpha=0.7): ...@@ -295,8 +356,8 @@ def draw_mask_on_image_array(image, mask, color='red', alpha=0.7):
Args: Args:
image: uint8 numpy array with shape (img_height, img_height, 3) image: uint8 numpy array with shape (img_height, img_height, 3)
mask: a float numpy array of shape (img_height, img_height) with mask: a uint8 numpy array of shape (img_height, img_height) with
values between 0 and 1 values between either 0 or 1.
color: color to draw the keypoints with. Default is red. color: color to draw the keypoints with. Default is red.
alpha: transparency value between 0 and 1. (default: 0.7) alpha: transparency value between 0 and 1. (default: 0.7)
...@@ -305,9 +366,9 @@ def draw_mask_on_image_array(image, mask, color='red', alpha=0.7): ...@@ -305,9 +366,9 @@ def draw_mask_on_image_array(image, mask, color='red', alpha=0.7):
""" """
if image.dtype != np.uint8: if image.dtype != np.uint8:
raise ValueError('`image` not of type np.uint8') raise ValueError('`image` not of type np.uint8')
if mask.dtype != np.float32: if mask.dtype != np.uint8:
raise ValueError('`mask` not of type np.float32') raise ValueError('`mask` not of type np.uint8')
if np.any(np.logical_or(mask > 1.0, mask < 0.0)): if np.any(np.logical_and(mask != 1, mask != 0)):
raise ValueError('`mask` elements should be in [0, 1]') raise ValueError('`mask` elements should be in [0, 1]')
rgb = ImageColor.getrgb(color) rgb = ImageColor.getrgb(color)
pil_image = Image.fromarray(image) pil_image = Image.fromarray(image)
...@@ -336,13 +397,14 @@ def visualize_boxes_and_labels_on_image_array(image, ...@@ -336,13 +397,14 @@ def visualize_boxes_and_labels_on_image_array(image,
This function groups boxes that correspond to the same location This function groups boxes that correspond to the same location
and creates a display string for each detection and overlays these and creates a display string for each detection and overlays these
on the image. Note that this function modifies the image array in-place on the image. Note that this function modifies the image in place, and returns
and does not return anything. that same image.
Args: Args:
image: uint8 numpy array with shape (img_height, img_width, 3) image: uint8 numpy array with shape (img_height, img_width, 3)
boxes: a numpy array of shape [N, 4] boxes: a numpy array of shape [N, 4]
classes: a numpy array of shape [N] classes: a numpy array of shape [N]. Note that class indices are 1-based,
and match the keys in the label map.
scores: a numpy array of shape [N] or None. If scores=None, then scores: a numpy array of shape [N] or None. If scores=None, then
this function assumes that the boxes to be plotted are groundtruth this function assumes that the boxes to be plotted are groundtruth
boxes and plot all boxes as black with no classes or scores. boxes and plot all boxes as black with no classes or scores.
...@@ -361,6 +423,9 @@ def visualize_boxes_and_labels_on_image_array(image, ...@@ -361,6 +423,9 @@ def visualize_boxes_and_labels_on_image_array(image,
class-agnostic mode or not. This mode will display scores but ignore class-agnostic mode or not. This mode will display scores but ignore
classes. classes.
line_thickness: integer (default: 4) controlling line width of the boxes. line_thickness: integer (default: 4) controlling line width of the boxes.
Returns:
uint8 numpy array with shape (img_height, img_width, 3) with overlaid boxes.
""" """
# Create a display string (and color) for every box location, group any boxes # Create a display string (and color) for every box location, group any boxes
# that correspond to the same location. # that correspond to the same location.
...@@ -423,3 +488,36 @@ def visualize_boxes_and_labels_on_image_array(image, ...@@ -423,3 +488,36 @@ def visualize_boxes_and_labels_on_image_array(image,
color=color, color=color,
radius=line_thickness / 2, radius=line_thickness / 2,
use_normalized_coordinates=use_normalized_coordinates) use_normalized_coordinates=use_normalized_coordinates)
return image
def add_cdf_image_summary(values, name):
"""Adds a tf.summary.image for a CDF plot of the values.
Normalizes `values` such that they sum to 1, plots the cumulative distribution
function and creates a tf image summary.
Args:
values: a 1-D float32 tensor containing the values.
name: name for the image summary.
"""
def cdf_plot(values):
"""Numpy function to plot CDF."""
normalized_values = values / np.sum(values)
sorted_values = np.sort(normalized_values)
cumulative_values = np.cumsum(sorted_values)
fraction_of_examples = (np.arange(cumulative_values.size, dtype=np.float32)
/ cumulative_values.size)
fig = plt.figure(frameon=False)
ax = fig.add_subplot('111')
ax.plot(fraction_of_examples, cumulative_values)
ax.set_ylabel('cumulative normalized values')
ax.set_xlabel('fraction of examples')
fig.canvas.draw()
width, height = fig.get_size_inches() * fig.get_dpi()
image = np.fromstring(fig.canvas.tostring_rgb(), dtype='uint8').reshape(
1, height, width, 3)
return image
cdf_plot = tf.py_func(cdf_plot, [values], tf.uint8)
tf.summary.image(name, cdf_plot)
...@@ -20,6 +20,7 @@ https://drive.google.com/a/google.com/file/d/0B5HnKS_hMsNARERpU3MtU3I5RFE/view?u ...@@ -20,6 +20,7 @@ https://drive.google.com/a/google.com/file/d/0B5HnKS_hMsNARERpU3MtU3I5RFE/view?u
""" """
import os
import numpy as np import numpy as np
import PIL.Image as Image import PIL.Image as Image
...@@ -27,6 +28,8 @@ import tensorflow as tf ...@@ -27,6 +28,8 @@ import tensorflow as tf
from object_detection.utils import visualization_utils from object_detection.utils import visualization_utils
_TESTDATA_PATH = 'object_detection/test_images'
class VisualizationUtilsTest(tf.test.TestCase): class VisualizationUtilsTest(tf.test.TestCase):
...@@ -110,6 +113,42 @@ class VisualizationUtilsTest(tf.test.TestCase): ...@@ -110,6 +113,42 @@ class VisualizationUtilsTest(tf.test.TestCase):
self.assertEqual(width_original, width_final) self.assertEqual(width_original, width_final)
self.assertEqual(height_original, height_final) self.assertEqual(height_original, height_final)
def test_draw_bounding_boxes_on_image_tensors(self):
"""Tests that bounding box utility produces reasonable results."""
category_index = {1: {'id': 1, 'name': 'dog'}, 2: {'id': 2, 'name': 'cat'}}
fname = os.path.join(_TESTDATA_PATH, 'image1.jpg')
image_np = np.array(Image.open(fname))
images_np = np.stack((image_np, image_np), axis=0)
with tf.Graph().as_default():
images_tensor = tf.constant(value=images_np, dtype=tf.uint8)
boxes = tf.constant([[[0.4, 0.25, 0.75, 0.75], [0.5, 0.3, 0.6, 0.9]],
[[0.25, 0.25, 0.75, 0.75], [0.1, 0.3, 0.6, 1.0]]])
classes = tf.constant([[1, 1], [1, 2]], dtype=tf.int64)
scores = tf.constant([[0.8, 0.1], [0.6, 0.5]])
images_with_boxes = (
visualization_utils.draw_bounding_boxes_on_image_tensors(
images_tensor,
boxes,
classes,
scores,
category_index,
min_score_thresh=0.2))
with self.test_session() as sess:
sess.run(tf.global_variables_initializer())
# Write output images for visualization.
images_with_boxes_np = sess.run(images_with_boxes)
self.assertEqual(images_np.shape, images_with_boxes_np.shape)
for i in range(images_with_boxes_np.shape[0]):
img_name = 'image_' + str(i) + '.png'
output_file = os.path.join(self.get_temp_dir(), img_name)
print 'Writing output image %d to %s' % (i, output_file)
image_pil = Image.fromarray(images_with_boxes_np[i, ...])
image_pil.save(output_file)
def test_draw_keypoints_on_image(self): def test_draw_keypoints_on_image(self):
test_image = self.create_colorful_test_image() test_image = self.create_colorful_test_image()
test_image = Image.fromarray(test_image) test_image = Image.fromarray(test_image)
...@@ -138,14 +177,21 @@ class VisualizationUtilsTest(tf.test.TestCase): ...@@ -138,14 +177,21 @@ class VisualizationUtilsTest(tf.test.TestCase):
def test_draw_mask_on_image_array(self): def test_draw_mask_on_image_array(self):
test_image = np.asarray([[[0, 0, 0], [0, 0, 0]], test_image = np.asarray([[[0, 0, 0], [0, 0, 0]],
[[0, 0, 0], [0, 0, 0]]], dtype=np.uint8) [[0, 0, 0], [0, 0, 0]]], dtype=np.uint8)
mask = np.asarray([[0.0, 1.0], mask = np.asarray([[0, 1],
[1.0, 1.0]], dtype=np.float32) [1, 1]], dtype=np.uint8)
expected_result = np.asarray([[[0, 0, 0], [0, 0, 127]], expected_result = np.asarray([[[0, 0, 0], [0, 0, 127]],
[[0, 0, 127], [0, 0, 127]]], dtype=np.uint8) [[0, 0, 127], [0, 0, 127]]], dtype=np.uint8)
visualization_utils.draw_mask_on_image_array(test_image, mask, visualization_utils.draw_mask_on_image_array(test_image, mask,
color='Blue', alpha=.5) color='Blue', alpha=.5)
self.assertAllEqual(test_image, expected_result) self.assertAllEqual(test_image, expected_result)
def test_add_cdf_image_summary(self):
values = [0.1, 0.2, 0.3, 0.4, 0.42, 0.44, 0.46, 0.48, 0.50]
visualization_utils.add_cdf_image_summary(values, 'PositiveAnchorLoss')
cdf_image_summary = tf.get_collection(key=tf.GraphKeys.SUMMARIES)[0]
with self.test_session():
cdf_image_summary.eval()
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment