Commit a4944a57 authored by derekjchow's avatar derekjchow Committed by Sergio Guadarrama
Browse files

Add Tensorflow Object Detection API. (#1561)

For details see our paper:
"Speed/accuracy trade-offs for modern convolutional object detectors."
Huang J, Rathod V, Sun C, Zhu M, Korattikara A, Fathi A, Fischer I,
Wojna Z, Song Y, Guadarrama S, Murphy K, CVPR 2017
https://arxiv.org/abs/1611.10012
parent 60c3ed2e
# SSD with Inception v2 configured for Oxford-IIT Pets Dataset.
# Users should configure the fine_tune_checkpoint field in the train config as
# well as the label_map_path and input_path fields in the train_input_reader and
# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
# should be configured.
model {
ssd {
num_classes: 37
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
}
}
similarity_calculator {
iou_similarity {
}
}
anchor_generator {
ssd_anchor_generator {
num_layers: 6
min_scale: 0.2
max_scale: 0.95
aspect_ratios: 1.0
aspect_ratios: 2.0
aspect_ratios: 0.5
aspect_ratios: 3.0
aspect_ratios: 0.3333
reduce_boxes_in_lowest_layer: true
}
}
image_resizer {
fixed_shape_resizer {
height: 300
width: 300
}
}
box_predictor {
convolutional_box_predictor {
min_depth: 0
max_depth: 0
num_layers_before_predictor: 0
use_dropout: false
dropout_keep_probability: 0.8
kernel_size: 3
box_code_size: 4
apply_sigmoid_to_scores: false
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
}
}
}
feature_extractor {
type: 'ssd_inception_v2'
min_depth: 16
depth_multiplier: 1.0
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
train: true,
scale: true,
center: true,
decay: 0.9997,
epsilon: 0.001,
}
}
}
loss {
classification_loss {
weighted_sigmoid {
anchorwise_output: true
}
}
localization_loss {
weighted_smooth_l1 {
anchorwise_output: true
}
}
hard_example_miner {
num_hard_examples: 3000
iou_threshold: 0.99
loss_type: CLASSIFICATION
max_negatives_per_positive: 3
min_negatives_per_image: 0
}
classification_weight: 1.0
localization_weight: 1.0
}
normalize_loss_by_num_matches: true
post_processing {
batch_non_max_suppression {
score_threshold: 1e-8
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
}
}
train_config: {
batch_size: 32
optimizer {
rms_prop_optimizer: {
learning_rate: {
exponential_decay_learning_rate {
initial_learning_rate: 0.004
decay_steps: 800720
decay_factor: 0.95
}
}
momentum_optimizer_value: 0.9
decay: 0.9
epsilon: 1.0
}
}
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
from_detection_checkpoint: true
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
ssd_random_crop {
}
}
}
train_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/pet_train.record"
}
label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
}
eval_config: {
num_examples: 2000
}
eval_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/pet_val.record"
}
label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
}
# SSD with Mobilenet v1, configured for Oxford-IIT Pets Dataset.
# Users should configure the fine_tune_checkpoint field in the train config as
# well as the label_map_path and input_path fields in the train_input_reader and
# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
# should be configured.
model {
ssd {
num_classes: 37
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
}
}
similarity_calculator {
iou_similarity {
}
}
anchor_generator {
ssd_anchor_generator {
num_layers: 6
min_scale: 0.2
max_scale: 0.95
aspect_ratios: 1.0
aspect_ratios: 2.0
aspect_ratios: 0.5
aspect_ratios: 3.0
aspect_ratios: 0.3333
}
}
image_resizer {
fixed_shape_resizer {
height: 300
width: 300
}
}
box_predictor {
convolutional_box_predictor {
min_depth: 0
max_depth: 0
num_layers_before_predictor: 0
use_dropout: false
dropout_keep_probability: 0.8
kernel_size: 1
box_code_size: 4
apply_sigmoid_to_scores: false
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
train: true,
scale: true,
center: true,
decay: 0.9997,
epsilon: 0.001,
}
}
}
}
feature_extractor {
type: 'ssd_mobilenet_v1'
min_depth: 16
depth_multiplier: 1.0
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
train: true,
scale: true,
center: true,
decay: 0.9997,
epsilon: 0.001,
}
}
}
loss {
classification_loss {
weighted_sigmoid {
anchorwise_output: true
}
}
localization_loss {
weighted_smooth_l1 {
anchorwise_output: true
}
}
hard_example_miner {
num_hard_examples: 3000
iou_threshold: 0.99
loss_type: CLASSIFICATION
max_negatives_per_positive: 3
min_negatives_per_image: 0
}
classification_weight: 1.0
localization_weight: 1.0
}
normalize_loss_by_num_matches: true
post_processing {
batch_non_max_suppression {
score_threshold: 1e-8
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
}
}
train_config: {
batch_size: 32
optimizer {
rms_prop_optimizer: {
learning_rate: {
exponential_decay_learning_rate {
initial_learning_rate: 0.004
decay_steps: 800720
decay_factor: 0.95
}
}
momentum_optimizer_value: 0.9
decay: 0.9
epsilon: 1.0
}
}
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/model.ckpt"
from_detection_checkpoint: true
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
ssd_random_crop {
}
}
}
train_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/pet_train.record"
}
label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
}
eval_config: {
num_examples: 2000
}
eval_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/pet_val.record"
}
label_map_path: "PATH_TO_BE_CONFIGURED/pet_label_map.pbtxt"
}
Image provenance:
image1.jpg: https://commons.wikimedia.org/wiki/File:Baegle_dwa.jpg
image2.jpg: Michael Miley,
https://www.flickr.com/photos/mike_miley/4678754542/in/photolist-88rQHL-88oBVp-88oC2B-88rS6J-88rSqm-88oBLv-88oBC4
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Training executable for detection models.
This executable is used to train DetectionModels. There are two ways of
configuring the training job:
1) A single pipeline_pb2.TrainEvalPipelineConfig configuration file
can be specified by --pipeline_config_path.
Example usage:
./train \
--logtostderr \
--train_dir=path/to/train_dir \
--pipeline_config_path=pipeline_config.pbtxt
2) Three configuration files can be provided: a model_pb2.DetectionModel
configuration file to define what type of DetectionModel is being trained, an
input_reader_pb2.InputReader file to specify what training data will be used and
a train_pb2.TrainConfig file to configure training parameters.
Example usage:
./train \
--logtostderr \
--train_dir=path/to/train_dir \
--model_config_path=model_config.pbtxt \
--train_config_path=train_config.pbtxt \
--input_config_path=train_input_config.pbtxt
"""
import functools
import json
import os
import tensorflow as tf
from google.protobuf import text_format
from object_detection import trainer
from object_detection.builders import input_reader_builder
from object_detection.builders import model_builder
from object_detection.protos import input_reader_pb2
from object_detection.protos import model_pb2
from object_detection.protos import pipeline_pb2
from object_detection.protos import train_pb2
tf.logging.set_verbosity(tf.logging.INFO)
flags = tf.app.flags
flags.DEFINE_string('master', '', 'BNS name of the TensorFlow master to use.')
flags.DEFINE_integer('task', 0, 'task id')
flags.DEFINE_integer('num_clones', 1, 'Number of clones to deploy per worker.')
flags.DEFINE_boolean('clone_on_cpu', False,
'Force clones to be deployed on CPU. Note that even if '
'set to False (allowing ops to run on gpu), some ops may '
'still be run on the CPU if they have no GPU kernel.')
flags.DEFINE_integer('worker_replicas', 1, 'Number of worker+trainer '
'replicas.')
flags.DEFINE_integer('ps_tasks', 0,
'Number of parameter server tasks. If None, does not use '
'a parameter server.')
flags.DEFINE_string('train_dir', '',
'Directory to save the checkpoints and training summaries.')
flags.DEFINE_string('pipeline_config_path', '',
'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
'file. If provided, other configs are ignored')
flags.DEFINE_string('train_config_path', '',
'Path to a train_pb2.TrainConfig config file.')
flags.DEFINE_string('input_config_path', '',
'Path to an input_reader_pb2.InputReader config file.')
flags.DEFINE_string('model_config_path', '',
'Path to a model_pb2.DetectionModel config file.')
FLAGS = flags.FLAGS
def get_configs_from_pipeline_file():
"""Reads training configuration from a pipeline_pb2.TrainEvalPipelineConfig.
Reads training config from file specified by pipeline_config_path flag.
Returns:
model_config: model_pb2.DetectionModel
train_config: train_pb2.TrainConfig
input_config: input_reader_pb2.InputReader
"""
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
with tf.gfile.GFile(FLAGS.pipeline_config_path, 'r') as f:
text_format.Merge(f.read(), pipeline_config)
model_config = pipeline_config.model
train_config = pipeline_config.train_config
input_config = pipeline_config.train_input_reader
return model_config, train_config, input_config
def get_configs_from_multiple_files():
"""Reads training configuration from multiple config files.
Reads the training config from the following files:
model_config: Read from --model_config_path
train_config: Read from --train_config_path
input_config: Read from --input_config_path
Returns:
model_config: model_pb2.DetectionModel
train_config: train_pb2.TrainConfig
input_config: input_reader_pb2.InputReader
"""
train_config = train_pb2.TrainConfig()
with tf.gfile.GFile(FLAGS.train_config_path, 'r') as f:
text_format.Merge(f.read(), train_config)
model_config = model_pb2.DetectionModel()
with tf.gfile.GFile(FLAGS.model_config_path, 'r') as f:
text_format.Merge(f.read(), model_config)
input_config = input_reader_pb2.InputReader()
with tf.gfile.GFile(FLAGS.input_config_path, 'r') as f:
text_format.Merge(f.read(), input_config)
return model_config, train_config, input_config
def main(_):
assert FLAGS.train_dir, '`train_dir` is missing.'
if FLAGS.pipeline_config_path:
model_config, train_config, input_config = get_configs_from_pipeline_file()
else:
model_config, train_config, input_config = get_configs_from_multiple_files()
model_fn = functools.partial(
model_builder.build,
model_config=model_config,
is_training=True)
create_input_dict_fn = functools.partial(
input_reader_builder.build, input_config)
env = json.loads(os.environ.get('TF_CONFIG', '{}'))
cluster_data = env.get('cluster', None)
cluster = tf.train.ClusterSpec(cluster_data) if cluster_data else None
task_data = env.get('task', None) or {'type': 'master', 'index': 0}
task_info = type('TaskSpec', (object,), task_data)
# Parameters for a single worker.
ps_tasks = 0
worker_replicas = 1
worker_job_name = 'lonely_worker'
task = 0
is_chief = True
master = ''
if cluster_data and 'worker' in cluster_data:
# Number of total worker replicas include "worker"s and the "master".
worker_replicas = len(cluster_data['worker']) + 1
if cluster_data and 'ps' in cluster_data:
ps_tasks = len(cluster_data['ps'])
if worker_replicas > 1 and ps_tasks < 1:
raise ValueError('At least 1 ps task is needed for distributed training.')
if worker_replicas >= 1 and ps_tasks > 0:
# Set up distributed training.
server = tf.train.Server(tf.train.ClusterSpec(cluster), protocol='grpc',
job_name=task_info.type,
task_index=task_info.index)
if task_info.type == 'ps':
server.join()
return
worker_job_name = '%s/task:%d' % (task_info.type, task_info.index)
task = task_info.index
is_chief = (task_info.type == 'master')
master = server.target
trainer.train(create_input_dict_fn, model_fn, train_config, master, task,
FLAGS.num_clones, worker_replicas, FLAGS.clone_on_cpu, ps_tasks,
worker_job_name, is_chief, FLAGS.train_dir)
if __name__ == '__main__':
tf.app.run()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Detection model trainer.
This file provides a generic training method that can be used to train a
DetectionModel.
"""
import functools
import tensorflow as tf
from object_detection.builders import optimizer_builder
from object_detection.builders import preprocessor_builder
from object_detection.core import batcher
from object_detection.core import preprocessor
from object_detection.core import standard_fields as fields
from object_detection.utils import ops as util_ops
from object_detection.utils import variables_helper
from deployment import model_deploy
slim = tf.contrib.slim
def _create_input_queue(batch_size_per_clone, create_tensor_dict_fn,
batch_queue_capacity, num_batch_queue_threads,
prefetch_queue_capacity, data_augmentation_options):
"""Sets up reader, prefetcher and returns input queue.
Args:
batch_size_per_clone: batch size to use per clone.
create_tensor_dict_fn: function to create tensor dictionary.
batch_queue_capacity: maximum number of elements to store within a queue.
num_batch_queue_threads: number of threads to use for batching.
prefetch_queue_capacity: maximum capacity of the queue used to prefetch
assembled batches.
data_augmentation_options: a list of tuples, where each tuple contains a
data augmentation function and a dictionary containing arguments and their
values (see preprocessor.py).
Returns:
input queue: a batcher.BatchQueue object holding enqueued tensor_dicts
(which hold images, boxes and targets). To get a batch of tensor_dicts,
call input_queue.Dequeue().
"""
tensor_dict = create_tensor_dict_fn()
tensor_dict[fields.InputDataFields.image] = tf.expand_dims(
tensor_dict[fields.InputDataFields.image], 0)
images = tensor_dict[fields.InputDataFields.image]
float_images = tf.to_float(images)
tensor_dict[fields.InputDataFields.image] = float_images
if data_augmentation_options:
tensor_dict = preprocessor.preprocess(tensor_dict,
data_augmentation_options)
input_queue = batcher.BatchQueue(
tensor_dict,
batch_size=batch_size_per_clone,
batch_queue_capacity=batch_queue_capacity,
num_batch_queue_threads=num_batch_queue_threads,
prefetch_queue_capacity=prefetch_queue_capacity)
return input_queue
def _get_inputs(input_queue, num_classes):
"""Dequeue batch and construct inputs to object detection model.
Args:
input_queue: BatchQueue object holding enqueued tensor_dicts.
num_classes: Number of classes.
Returns:
images: a list of 3-D float tensor of images.
locations_list: a list of tensors of shape [num_boxes, 4]
containing the corners of the groundtruth boxes.
classes_list: a list of padded one-hot tensors containing target classes.
masks_list: a list of 3-D float tensors of shape [num_boxes, image_height,
image_width] containing instance masks for objects if present in the
input_queue. Else returns None.
"""
read_data_list = input_queue.dequeue()
label_id_offset = 1
def extract_images_and_targets(read_data):
image = read_data[fields.InputDataFields.image]
location_gt = read_data[fields.InputDataFields.groundtruth_boxes]
classes_gt = tf.cast(read_data[fields.InputDataFields.groundtruth_classes],
tf.int32)
classes_gt -= label_id_offset
classes_gt = util_ops.padded_one_hot_encoding(indices=classes_gt,
depth=num_classes, left_pad=0)
masks_gt = read_data.get(fields.InputDataFields.groundtruth_instance_masks)
return image, location_gt, classes_gt, masks_gt
return zip(*map(extract_images_and_targets, read_data_list))
def _create_losses(input_queue, create_model_fn):
"""Creates loss function for a DetectionModel.
Args:
input_queue: BatchQueue object holding enqueued tensor_dicts.
create_model_fn: A function to create the DetectionModel.
"""
detection_model = create_model_fn()
(images, groundtruth_boxes_list, groundtruth_classes_list,
groundtruth_masks_list
) = _get_inputs(input_queue, detection_model.num_classes)
images = [detection_model.preprocess(image) for image in images]
images = tf.concat(images, 0)
if any(mask is None for mask in groundtruth_masks_list):
groundtruth_masks_list = None
detection_model.provide_groundtruth(groundtruth_boxes_list,
groundtruth_classes_list,
groundtruth_masks_list)
prediction_dict = detection_model.predict(images)
losses_dict = detection_model.loss(prediction_dict)
for loss_tensor in losses_dict.values():
tf.losses.add_loss(loss_tensor)
def train(create_tensor_dict_fn, create_model_fn, train_config, master, task,
num_clones, worker_replicas, clone_on_cpu, ps_tasks, worker_job_name,
is_chief, train_dir):
"""Training function for detection models.
Args:
create_tensor_dict_fn: a function to create a tensor input dictionary.
create_model_fn: a function that creates a DetectionModel and generates
losses.
train_config: a train_pb2.TrainConfig protobuf.
master: BNS name of the TensorFlow master to use.
task: The task id of this training instance.
num_clones: The number of clones to run per machine.
worker_replicas: The number of work replicas to train with.
clone_on_cpu: True if clones should be forced to run on CPU.
ps_tasks: Number of parameter server tasks.
worker_job_name: Name of the worker job.
is_chief: Whether this replica is the chief replica.
train_dir: Directory to write checkpoints and training summaries to.
"""
detection_model = create_model_fn()
data_augmentation_options = [
preprocessor_builder.build(step)
for step in train_config.data_augmentation_options]
with tf.Graph().as_default():
# Build a configuration specifying multi-GPU and multi-replicas.
deploy_config = model_deploy.DeploymentConfig(
num_clones=num_clones,
clone_on_cpu=clone_on_cpu,
replica_id=task,
num_replicas=worker_replicas,
num_ps_tasks=ps_tasks,
worker_job_name=worker_job_name)
# Place the global step on the device storing the variables.
with tf.device(deploy_config.variables_device()):
global_step = slim.create_global_step()
with tf.device(deploy_config.inputs_device()):
input_queue = _create_input_queue(train_config.batch_size // num_clones,
create_tensor_dict_fn,
train_config.batch_queue_capacity,
train_config.num_batch_queue_threads,
train_config.prefetch_queue_capacity,
data_augmentation_options)
# Gather initial summaries.
summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))
global_summaries = set([])
model_fn = functools.partial(_create_losses,
create_model_fn=create_model_fn)
clones = model_deploy.create_clones(deploy_config, model_fn, [input_queue])
first_clone_scope = clones[0].scope
# Gather update_ops from the first clone. These contain, for example,
# the updates for the batch_norm variables created by model_fn.
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope)
with tf.device(deploy_config.optimizer_device()):
training_optimizer = optimizer_builder.build(train_config.optimizer,
global_summaries)
sync_optimizer = None
if train_config.sync_replicas:
training_optimizer = tf.SyncReplicasOptimizer(
training_optimizer,
replicas_to_aggregate=train_config.replicas_to_aggregate,
total_num_replicas=train_config.worker_replicas)
sync_optimizer = training_optimizer
# Create ops required to initialize the model from a given checkpoint.
init_fn = None
if train_config.fine_tune_checkpoint:
init_fn = detection_model.restore_fn(
train_config.fine_tune_checkpoint,
from_detection_checkpoint=train_config.from_detection_checkpoint)
with tf.device(deploy_config.optimizer_device()):
total_loss, grads_and_vars = model_deploy.optimize_clones(
clones, training_optimizer, regularization_losses=None)
total_loss = tf.check_numerics(total_loss, 'LossTensor is inf or nan.')
# Optionally multiply bias gradients by train_config.bias_grad_multiplier.
if train_config.bias_grad_multiplier:
biases_regex_list = ['.*/biases']
grads_and_vars = variables_helper.multiply_gradients_matching_regex(
grads_and_vars,
biases_regex_list,
multiplier=train_config.bias_grad_multiplier)
# Optionally freeze some layers by setting their gradients to be zero.
if train_config.freeze_variables:
grads_and_vars = variables_helper.freeze_gradients_matching_regex(
grads_and_vars, train_config.freeze_variables)
# Optionally clip gradients
if train_config.gradient_clipping_by_norm > 0:
with tf.name_scope('clip_grads'):
grads_and_vars = slim.learning.clip_gradient_norms(
grads_and_vars, train_config.gradient_clipping_by_norm)
# Create gradient updates.
grad_updates = training_optimizer.apply_gradients(grads_and_vars,
global_step=global_step)
update_ops.append(grad_updates)
update_op = tf.group(*update_ops)
with tf.control_dependencies([update_op]):
train_tensor = tf.identity(total_loss, name='train_op')
# Add summaries.
for model_var in slim.get_model_variables():
global_summaries.add(tf.summary.histogram(model_var.op.name, model_var))
for loss_tensor in tf.losses.get_losses():
global_summaries.add(tf.summary.scalar(loss_tensor.op.name, loss_tensor))
global_summaries.add(
tf.summary.scalar('TotalLoss', tf.losses.get_total_loss()))
# Add the summaries from the first clone. These contain the summaries
# created by model_fn and either optimize_clones() or _gather_clone_loss().
summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES,
first_clone_scope))
summaries |= global_summaries
# Merge all summaries together.
summary_op = tf.summary.merge(list(summaries), name='summary_op')
# Soft placement allows placing on CPU ops without GPU implementation.
session_config = tf.ConfigProto(allow_soft_placement=True,
log_device_placement=False)
# Save checkpoints regularly.
keep_checkpoint_every_n_hours = train_config.keep_checkpoint_every_n_hours
saver = tf.train.Saver(
keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours)
slim.learning.train(
train_tensor,
logdir=train_dir,
master=master,
is_chief=is_chief,
session_config=session_config,
startup_delay_steps=train_config.startup_delay_steps,
init_fn=init_fn,
summary_op=summary_op,
number_of_steps=(
train_config.num_steps if train_config.num_steps else None),
save_summaries_secs=120,
sync_optimizer=sync_optimizer,
saver=saver)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.trainer."""
import tensorflow as tf
from google.protobuf import text_format
from object_detection import trainer
from object_detection.core import losses
from object_detection.core import model
from object_detection.core import standard_fields as fields
from object_detection.protos import train_pb2
NUMBER_OF_CLASSES = 2
def get_input_function():
"""A function to get test inputs. Returns an image with one box."""
image = tf.random_uniform([32, 32, 3], dtype=tf.float32)
class_label = tf.random_uniform(
[1], minval=0, maxval=NUMBER_OF_CLASSES, dtype=tf.int32)
box_label = tf.random_uniform(
[1, 4], minval=0.4, maxval=0.6, dtype=tf.float32)
return {
fields.InputDataFields.image: image,
fields.InputDataFields.groundtruth_classes: class_label,
fields.InputDataFields.groundtruth_boxes: box_label
}
class FakeDetectionModel(model.DetectionModel):
"""A simple (and poor) DetectionModel for use in test."""
def __init__(self):
super(FakeDetectionModel, self).__init__(num_classes=NUMBER_OF_CLASSES)
self._classification_loss = losses.WeightedSigmoidClassificationLoss(
anchorwise_output=True)
self._localization_loss = losses.WeightedSmoothL1LocalizationLoss(
anchorwise_output=True)
def preprocess(self, inputs):
"""Input preprocessing, resizes images to 28x28.
Args:
inputs: a [batch, height_in, width_in, channels] float32 tensor
representing a batch of images with values between 0 and 255.0.
Returns:
preprocessed_inputs: a [batch, 28, 28, channels] float32 tensor.
"""
return tf.image.resize_images(inputs, [28, 28])
def predict(self, preprocessed_inputs):
"""Prediction tensors from inputs tensor.
Args:
preprocessed_inputs: a [batch, 28, 28, channels] float32 tensor.
Returns:
prediction_dict: a dictionary holding prediction tensors to be
passed to the Loss or Postprocess functions.
"""
flattened_inputs = tf.contrib.layers.flatten(preprocessed_inputs)
class_prediction = tf.contrib.layers.fully_connected(
flattened_inputs, self._num_classes)
box_prediction = tf.contrib.layers.fully_connected(flattened_inputs, 4)
return {
'class_predictions_with_background': tf.reshape(
class_prediction, [-1, 1, self._num_classes]),
'box_encodings': tf.reshape(box_prediction, [-1, 1, 4])
}
def postprocess(self, prediction_dict, **params):
"""Convert predicted output tensors to final detections. Unused.
Args:
prediction_dict: a dictionary holding prediction tensors.
**params: Additional keyword arguments for specific implementations of
DetectionModel.
Returns:
detections: a dictionary with empty fields.
"""
return {
'detection_boxes': None,
'detection_scores': None,
'detection_classes': None,
'num_detections': None
}
def loss(self, prediction_dict):
"""Compute scalar loss tensors with respect to provided groundtruth.
Calling this function requires that groundtruth tensors have been
provided via the provide_groundtruth function.
Args:
prediction_dict: a dictionary holding predicted tensors
Returns:
a dictionary mapping strings (loss names) to scalar tensors representing
loss values.
"""
batch_reg_targets = tf.stack(
self.groundtruth_lists(fields.BoxListFields.boxes))
batch_cls_targets = tf.stack(
self.groundtruth_lists(fields.BoxListFields.classes))
weights = tf.constant(
1.0, dtype=tf.float32,
shape=[len(self.groundtruth_lists(fields.BoxListFields.boxes)), 1])
location_losses = self._localization_loss(
prediction_dict['box_encodings'], batch_reg_targets,
weights=weights)
cls_losses = self._classification_loss(
prediction_dict['class_predictions_with_background'], batch_cls_targets,
weights=weights)
loss_dict = {
'localization_loss': tf.reduce_sum(location_losses),
'classification_loss': tf.reduce_sum(cls_losses),
}
return loss_dict
def restore_fn(self, checkpoint_path, from_detection_checkpoint=True):
"""Return callable for loading a checkpoint into the tensorflow graph.
Args:
checkpoint_path: path to checkpoint to restore.
from_detection_checkpoint: whether to restore from a full detection
checkpoint (with compatible variable names) or to restore from a
classification checkpoint for initialization prior to training.
Returns:
a callable which takes a tf.Session and does nothing.
"""
def restore(unused_sess):
return
return restore
class TrainerTest(tf.test.TestCase):
def test_configure_trainer_and_train_two_steps(self):
train_config_text_proto = """
optimizer {
adam_optimizer {
learning_rate {
constant_learning_rate {
learning_rate: 0.01
}
}
}
}
data_augmentation_options {
random_adjust_brightness {
max_delta: 0.2
}
}
data_augmentation_options {
random_adjust_contrast {
min_delta: 0.7
max_delta: 1.1
}
}
num_steps: 2
"""
train_config = train_pb2.TrainConfig()
text_format.Merge(train_config_text_proto, train_config)
train_dir = self.get_temp_dir()
trainer.train(create_tensor_dict_fn=get_input_function,
create_model_fn=FakeDetectionModel,
train_config=train_config,
master='',
task=0,
num_clones=1,
worker_replicas=1,
clone_on_cpu=True,
ps_tasks=0,
worker_job_name='worker',
is_chief=True,
train_dir=train_dir)
if __name__ == '__main__':
tf.test.main()
# Tensorflow Object Detection API: Utility functions.
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"])
# Apache 2.0
py_library(
name = "category_util",
srcs = ["category_util.py"],
deps = ["//tensorflow"],
)
py_library(
name = "dataset_util",
srcs = ["dataset_util.py"],
deps = [
"//tensorflow",
],
)
py_library(
name = "label_map_util",
srcs = ["label_map_util.py"],
deps = [
"//third_party/py/google/protobuf",
"//tensorflow",
"//tensorflow_models/object_detection/protos:string_int_label_map_py_pb2",
],
)
py_library(
name = "learning_schedules",
srcs = ["learning_schedules.py"],
deps = ["//tensorflow"],
)
py_library(
name = "metrics",
srcs = ["metrics.py"],
deps = ["//third_party/py/numpy"],
)
py_library(
name = "np_box_list",
srcs = ["np_box_list.py"],
deps = ["//tensorflow"],
)
py_library(
name = "np_box_list_ops",
srcs = ["np_box_list_ops.py"],
deps = [
":np_box_list",
":np_box_ops",
"//tensorflow",
],
)
py_library(
name = "np_box_ops",
srcs = ["np_box_ops.py"],
deps = ["//tensorflow"],
)
py_library(
name = "object_detection_evaluation",
srcs = ["object_detection_evaluation.py"],
deps = [
":metrics",
":per_image_evaluation",
"//tensorflow",
],
)
py_library(
name = "ops",
srcs = ["ops.py"],
deps = [
":static_shape",
"//tensorflow",
"//tensorflow_models/object_detection/core:box_list",
"//tensorflow_models/object_detection/core:box_list_ops",
"//tensorflow_models/object_detection/core:standard_fields",
],
)
py_library(
name = "per_image_evaluation",
srcs = ["per_image_evaluation.py"],
deps = [
":np_box_list",
":np_box_list_ops",
"//tensorflow",
],
)
py_library(
name = "shape_utils",
srcs = ["shape_utils.py"],
deps = ["//tensorflow"],
)
py_library(
name = "static_shape",
srcs = ["static_shape.py"],
deps = [],
)
py_library(
name = "test_utils",
srcs = ["test_utils.py"],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection/core:anchor_generator",
"//tensorflow_models/object_detection/core:box_coder",
"//tensorflow_models/object_detection/core:box_list",
"//tensorflow_models/object_detection/core:box_predictor",
"//tensorflow_models/object_detection/core:matcher",
],
)
py_library(
name = "variables_helper",
srcs = ["variables_helper.py"],
deps = [
"//tensorflow",
],
)
py_library(
name = "visualization_utils",
srcs = ["visualization_utils.py"],
deps = [
"//third_party/py/PIL:pil",
"//tensorflow",
],
)
py_test(
name = "category_util_test",
srcs = ["category_util_test.py"],
deps = [
":category_util",
"//tensorflow",
],
)
py_test(
name = "dataset_util_test",
srcs = ["dataset_util_test.py"],
deps = [
":dataset_util",
"//tensorflow",
],
)
py_test(
name = "label_map_util_test",
srcs = ["label_map_util_test.py"],
deps = [
":label_map_util",
"//tensorflow",
],
)
py_test(
name = "learning_schedules_test",
srcs = ["learning_schedules_test.py"],
deps = [
":learning_schedules",
"//tensorflow",
],
)
py_test(
name = "metrics_test",
srcs = ["metrics_test.py"],
deps = [
":metrics",
"//tensorflow",
],
)
py_test(
name = "np_box_list_test",
srcs = ["np_box_list_test.py"],
deps = [
":np_box_list",
"//tensorflow",
],
)
py_test(
name = "np_box_list_ops_test",
srcs = ["np_box_list_ops_test.py"],
deps = [
":np_box_list",
":np_box_list_ops",
"//tensorflow",
],
)
py_test(
name = "np_box_ops_test",
srcs = ["np_box_ops_test.py"],
deps = [
":np_box_ops",
"//tensorflow",
],
)
py_test(
name = "object_detection_evaluation_test",
srcs = ["object_detection_evaluation_test.py"],
deps = [
":object_detection_evaluation",
"//tensorflow",
],
)
py_test(
name = "ops_test",
srcs = ["ops_test.py"],
deps = [
":ops",
"//tensorflow",
"//tensorflow_models/object_detection/core:standard_fields",
],
)
py_test(
name = "per_image_evaluation_test",
srcs = ["per_image_evaluation_test.py"],
deps = [
":per_image_evaluation",
"//tensorflow",
],
)
py_test(
name = "shape_utils_test",
srcs = ["shape_utils_test.py"],
deps = [
":shape_utils",
"//tensorflow",
],
)
py_test(
name = "static_shape_test",
srcs = ["static_shape_test.py"],
deps = [
":static_shape",
"//tensorflow",
],
)
py_test(
name = "test_utils_test",
srcs = ["test_utils_test.py"],
deps = [
":test_utils",
"//tensorflow",
],
)
py_test(
name = "variables_helper_test",
srcs = ["variables_helper_test.py"],
deps = [
":variables_helper",
"//tensorflow",
],
)
py_test(
name = "visualization_utils_test",
srcs = ["visualization_utils_test.py"],
deps = [
":visualization_utils",
"//third_party/py/PIL:pil",
],
)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Functions for importing/exporting Object Detection categories."""
import csv
import tensorflow as tf
def load_categories_from_csv_file(csv_path):
"""Loads categories from a csv file.
The CSV file should have one comma delimited numeric category id and string
category name pair per line. For example:
0,"cat"
1,"dog"
2,"bird"
...
Args:
csv_path: Path to the csv file to be parsed into categories.
Returns:
categories: A list of dictionaries representing all possible categories.
The categories will contain an integer 'id' field and a string
'name' field.
Raises:
ValueError: If the csv file is incorrectly formatted.
"""
categories = []
with tf.gfile.Open(csv_path, 'r') as csvfile:
reader = csv.reader(csvfile, delimiter=',', quotechar='"')
for row in reader:
if not row:
continue
if len(row) != 2:
raise ValueError('Expected 2 fields per row in csv: %s' % ','.join(row))
category_id = int(row[0])
category_name = row[1]
categories.append({'id': category_id, 'name': category_name})
return categories
def save_categories_to_csv_file(categories, csv_path):
"""Saves categories to a csv file.
Args:
categories: A list of dictionaries representing categories to save to file.
Each category must contain an 'id' and 'name' field.
csv_path: Path to the csv file to be parsed into categories.
"""
categories.sort(key=lambda x: x['id'])
with tf.gfile.Open(csv_path, 'w') as csvfile:
writer = csv.writer(csvfile, delimiter=',', quotechar='"')
for category in categories:
writer.writerow([category['id'], category['name']])
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.utils.category_util."""
import os
import tensorflow as tf
from object_detection.utils import category_util
class EvalUtilTest(tf.test.TestCase):
def test_load_categories_from_csv_file(self):
csv_data = """
0,"cat"
1,"dog"
2,"bird"
""".strip(' ')
csv_path = os.path.join(self.get_temp_dir(), 'test.csv')
with tf.gfile.Open(csv_path, 'wb') as f:
f.write(csv_data)
categories = category_util.load_categories_from_csv_file(csv_path)
self.assertTrue({'id': 0, 'name': 'cat'} in categories)
self.assertTrue({'id': 1, 'name': 'dog'} in categories)
self.assertTrue({'id': 2, 'name': 'bird'} in categories)
def test_save_categories_to_csv_file(self):
categories = [
{'id': 0, 'name': 'cat'},
{'id': 1, 'name': 'dog'},
{'id': 2, 'name': 'bird'},
]
csv_path = os.path.join(self.get_temp_dir(), 'test.csv')
category_util.save_categories_to_csv_file(categories, csv_path)
saved_categories = category_util.load_categories_from_csv_file(csv_path)
self.assertEqual(saved_categories, categories)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utility functions for creating TFRecord data sets."""
import tensorflow as tf
def int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def int64_list_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
def bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def bytes_list_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
def float_list_feature(value):
return tf.train.Feature(float_list=tf.train.FloatList(value=value))
def read_examples_list(path):
"""Read list of training or validation examples.
The file is assumed to contain a single example per line where the first
token in the line is an identifier that allows us to find the image and
annotation xml for that example.
For example, the line:
xyz 3
would allow us to find files xyz.jpg and xyz.xml (the 3 would be ignored).
Args:
path: absolute path to examples list file.
Returns:
list of example identifiers (strings).
"""
with tf.gfile.GFile(path) as fid:
lines = fid.readlines()
return [line.strip().split(' ')[0] for line in lines]
def recursive_parse_xml_to_dict(xml):
"""Recursively parses XML contents to python dict.
We assume that `object` tags are the only ones that can appear
multiple times at the same level of a tree.
Args:
xml: xml tree obtained by parsing XML file contents using lxml.etree
Returns:
Python dictionary holding XML contents.
"""
if not xml:
return {xml.tag: xml.text}
result = {}
for child in xml:
child_result = recursive_parse_xml_to_dict(child)
if child.tag != 'object':
result[child.tag] = child_result[child.tag]
else:
if child.tag not in result:
result[child.tag] = []
result[child.tag].append(child_result[child.tag])
return {xml.tag: result}
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.utils.dataset_util."""
import os
import tensorflow as tf
from object_detection.utils import dataset_util
class DatasetUtilTest(tf.test.TestCase):
def test_read_examples_list(self):
example_list_data = """example1 1\nexample2 2"""
example_list_path = os.path.join(self.get_temp_dir(), 'examples.txt')
with tf.gfile.Open(example_list_path, 'wb') as f:
f.write(example_list_data)
examples = dataset_util.read_examples_list(example_list_path)
self.assertListEqual(['example1', 'example2'], examples)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Label map utility functions."""
import logging
import tensorflow as tf
from google.protobuf import text_format
from object_detection.protos import string_int_label_map_pb2
def create_category_index(categories):
"""Creates dictionary of COCO compatible categories keyed by category id.
Args:
categories: a list of dicts, each of which has the following keys:
'id': (required) an integer id uniquely identifying this category.
'name': (required) string representing category name
e.g., 'cat', 'dog', 'pizza'.
Returns:
category_index: a dict containing the same entries as categories, but keyed
by the 'id' field of each category.
"""
category_index = {}
for cat in categories:
category_index[cat['id']] = cat
return category_index
def convert_label_map_to_categories(label_map,
max_num_classes,
use_display_name=True):
"""Loads label map proto and returns categories list compatible with eval.
This function loads a label map and returns a list of dicts, each of which
has the following keys:
'id': (required) an integer id uniquely identifying this category.
'name': (required) string representing category name
e.g., 'cat', 'dog', 'pizza'.
We only allow class into the list if its id-label_id_offset is
between 0 (inclusive) and max_num_classes (exclusive).
If there are several items mapping to the same id in the label map,
we will only keep the first one in the categories list.
Args:
label_map: a StringIntLabelMapProto or None. If None, a default categories
list is created with max_num_classes categories.
max_num_classes: maximum number of (consecutive) label indices to include.
use_display_name: (boolean) choose whether to load 'display_name' field
as category name. If False of if the display_name field does not exist,
uses 'name' field as category names instead.
Returns:
categories: a list of dictionaries representing all possible categories.
"""
categories = []
list_of_ids_already_added = []
if not label_map:
label_id_offset = 1
for class_id in range(max_num_classes):
categories.append({
'id': class_id + label_id_offset,
'name': 'category_{}'.format(class_id + label_id_offset)
})
return categories
for item in label_map.item:
if not 0 < item.id <= max_num_classes:
logging.info('Ignore item %d since it falls outside of requested '
'label range.', item.id)
continue
if use_display_name and item.HasField('display_name'):
name = item.display_name
else:
name = item.name
if item.id not in list_of_ids_already_added:
list_of_ids_already_added.append(item.id)
categories.append({'id': item.id, 'name': name})
return categories
# TODO: double check documentaion.
def load_labelmap(path):
"""Loads label map proto.
Args:
path: path to StringIntLabelMap proto text file.
Returns:
a StringIntLabelMapProto
"""
with tf.gfile.GFile(path, 'r') as fid:
label_map_string = fid.read()
label_map = string_int_label_map_pb2.StringIntLabelMap()
try:
text_format.Merge(label_map_string, label_map)
except text_format.ParseError:
label_map.ParseFromString(label_map_string)
return label_map
def get_label_map_dict(label_map_path):
"""Reads a label map and returns a dictionary of label names to id.
Args:
label_map_path: path to label_map.
Returns:
A dictionary mapping label names to id.
"""
label_map = load_labelmap(label_map_path)
label_map_dict = {}
for item in label_map.item:
label_map_dict[item.name] = item.id
return label_map_dict
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.utils.label_map_util."""
import os
import tensorflow as tf
from google.protobuf import text_format
from object_detection.protos import string_int_label_map_pb2
from object_detection.utils import label_map_util
class LabelMapUtilTest(tf.test.TestCase):
def _generate_label_map(self, num_classes):
label_map_proto = string_int_label_map_pb2.StringIntLabelMap()
for i in range(1, num_classes + 1):
item = label_map_proto.item.add()
item.id = i
item.name = 'label_' + str(i)
item.display_name = str(i)
return label_map_proto
def test_get_label_map_dict(self):
label_map_string = """
item {
id:2
name:'cat'
}
item {
id:1
name:'dog'
}
"""
label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
with tf.gfile.Open(label_map_path, 'wb') as f:
f.write(label_map_string)
label_map_dict = label_map_util.get_label_map_dict(label_map_path)
self.assertEqual(label_map_dict['dog'], 1)
self.assertEqual(label_map_dict['cat'], 2)
def test_keep_categories_with_unique_id(self):
label_map_proto = string_int_label_map_pb2.StringIntLabelMap()
label_map_string = """
item {
id:2
name:'cat'
}
item {
id:1
name:'child'
}
item {
id:1
name:'person'
}
item {
id:1
name:'n00007846'
}
"""
text_format.Merge(label_map_string, label_map_proto)
categories = label_map_util.convert_label_map_to_categories(
label_map_proto, max_num_classes=3)
self.assertListEqual([{
'id': 2,
'name': u'cat'
}, {
'id': 1,
'name': u'child'
}], categories)
def test_convert_label_map_to_categories_no_label_map(self):
categories = label_map_util.convert_label_map_to_categories(
None, max_num_classes=3)
expected_categories_list = [{
'name': u'category_1',
'id': 1
}, {
'name': u'category_2',
'id': 2
}, {
'name': u'category_3',
'id': 3
}]
self.assertListEqual(expected_categories_list, categories)
def test_convert_label_map_to_coco_categories(self):
label_map_proto = self._generate_label_map(num_classes=4)
categories = label_map_util.convert_label_map_to_categories(
label_map_proto, max_num_classes=3)
expected_categories_list = [{
'name': u'1',
'id': 1
}, {
'name': u'2',
'id': 2
}, {
'name': u'3',
'id': 3
}]
self.assertListEqual(expected_categories_list, categories)
def test_convert_label_map_to_coco_categories_with_few_classes(self):
label_map_proto = self._generate_label_map(num_classes=4)
cat_no_offset = label_map_util.convert_label_map_to_categories(
label_map_proto, max_num_classes=2)
expected_categories_list = [{
'name': u'1',
'id': 1
}, {
'name': u'2',
'id': 2
}]
self.assertListEqual(expected_categories_list, cat_no_offset)
def test_create_category_index(self):
categories = [{'name': u'1', 'id': 1}, {'name': u'2', 'id': 2}]
category_index = label_map_util.create_category_index(categories)
self.assertDictEqual({
1: {
'name': u'1',
'id': 1
},
2: {
'name': u'2',
'id': 2
}
}, category_index)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Library of common learning rate schedules."""
import tensorflow as tf
def exponential_decay_with_burnin(global_step,
learning_rate_base,
learning_rate_decay_steps,
learning_rate_decay_factor,
burnin_learning_rate=0.0,
burnin_steps=0):
"""Exponential decay schedule with burn-in period.
In this schedule, learning rate is fixed at burnin_learning_rate
for a fixed period, before transitioning to a regular exponential
decay schedule.
Args:
global_step: int tensor representing global step.
learning_rate_base: base learning rate.
learning_rate_decay_steps: steps to take between decaying the learning rate.
Note that this includes the number of burn-in steps.
learning_rate_decay_factor: multiplicative factor by which to decay
learning rate.
burnin_learning_rate: initial learning rate during burn-in period. If
0.0 (which is the default), then the burn-in learning rate is simply
set to learning_rate_base.
burnin_steps: number of steps to use burnin learning rate.
Returns:
a (scalar) float tensor representing learning rate
"""
if burnin_learning_rate == 0:
burnin_learning_rate = learning_rate_base
post_burnin_learning_rate = tf.train.exponential_decay(
learning_rate_base,
global_step,
learning_rate_decay_steps,
learning_rate_decay_factor,
staircase=True)
return tf.cond(
tf.less(global_step, burnin_steps),
lambda: tf.convert_to_tensor(burnin_learning_rate),
lambda: post_burnin_learning_rate)
def manual_stepping(global_step, boundaries, rates):
"""Manually stepped learning rate schedule.
This function provides fine grained control over learning rates. One must
specify a sequence of learning rates as well as a set of integer steps
at which the current learning rate must transition to the next. For example,
if boundaries = [5, 10] and rates = [.1, .01, .001], then the learning
rate returned by this function is .1 for global_step=0,...,4, .01 for
global_step=5...9, and .001 for global_step=10 and onward.
Args:
global_step: int64 (scalar) tensor representing global step.
boundaries: a list of global steps at which to switch learning
rates. This list is assumed to consist of increasing positive integers.
rates: a list of (float) learning rates corresponding to intervals between
the boundaries. The length of this list must be exactly
len(boundaries) + 1.
Returns:
a (scalar) float tensor representing learning rate
Raises:
ValueError: if one of the following checks fails:
1. boundaries is a strictly increasing list of positive integers
2. len(rates) == len(boundaries) + 1
"""
if any([b < 0 for b in boundaries]) or any(
[not isinstance(b, int) for b in boundaries]):
raise ValueError('boundaries must be a list of positive integers')
if any([bnext <= b for bnext, b in zip(boundaries[1:], boundaries[:-1])]):
raise ValueError('Entries in boundaries must be strictly increasing.')
if any([not isinstance(r, float) for r in rates]):
raise ValueError('Learning rates must be floats')
if len(rates) != len(boundaries) + 1:
raise ValueError('Number of provided learning rates must exceed '
'number of boundary points by exactly 1.')
step_boundaries = tf.constant(boundaries, tf.int64)
learning_rates = tf.constant(rates, tf.float32)
unreached_boundaries = tf.reshape(tf.where(
tf.greater(step_boundaries, global_step)), [-1])
unreached_boundaries = tf.concat([unreached_boundaries, [len(boundaries)]], 0)
index = tf.reshape(tf.reduce_min(unreached_boundaries), [1])
return tf.reshape(tf.slice(learning_rates, index, [1]), [])
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.utils.learning_schedules."""
import tensorflow as tf
from object_detection.utils import learning_schedules
class LearningSchedulesTest(tf.test.TestCase):
def testExponentialDecayWithBurnin(self):
global_step = tf.placeholder(tf.int32, [])
learning_rate_base = 1.0
learning_rate_decay_steps = 3
learning_rate_decay_factor = .1
burnin_learning_rate = .5
burnin_steps = 2
exp_rates = [.5, .5, 1, .1, .1, .1, .01, .01]
learning_rate = learning_schedules.exponential_decay_with_burnin(
global_step, learning_rate_base, learning_rate_decay_steps,
learning_rate_decay_factor, burnin_learning_rate, burnin_steps)
with self.test_session() as sess:
output_rates = []
for input_global_step in range(8):
output_rate = sess.run(learning_rate,
feed_dict={global_step: input_global_step})
output_rates.append(output_rate)
self.assertAllClose(output_rates, exp_rates)
def testManualStepping(self):
global_step = tf.placeholder(tf.int64, [])
boundaries = [2, 3, 7]
rates = [1.0, 2.0, 3.0, 4.0]
exp_rates = [1.0, 1.0, 2.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0]
learning_rate = learning_schedules.manual_stepping(global_step, boundaries,
rates)
with self.test_session() as sess:
output_rates = []
for input_global_step in range(10):
output_rate = sess.run(learning_rate,
feed_dict={global_step: input_global_step})
output_rates.append(output_rate)
self.assertAllClose(output_rates, exp_rates)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Functions for computing metrics like precision, recall, CorLoc and etc."""
from __future__ import division
import numpy as np
def compute_precision_recall(scores, labels, num_gt):
"""Compute precision and recall.
Args:
scores: A float numpy array representing detection score
labels: A boolean numpy array representing true/false positive labels
num_gt: Number of ground truth instances
Raises:
ValueError: if the input is not of the correct format
Returns:
precision: Fraction of positive instances over detected ones. This value is
None if no ground truth labels are present.
recall: Fraction of detected positive instance over all positive instances.
This value is None if no ground truth labels are present.
"""
if not isinstance(
labels, np.ndarray) or labels.dtype != np.bool or len(labels.shape) != 1:
raise ValueError("labels must be single dimension bool numpy array")
if not isinstance(
scores, np.ndarray) or len(scores.shape) != 1:
raise ValueError("scores must be single dimension numpy array")
if num_gt < np.sum(labels):
raise ValueError("Number of true positives must be smaller than num_gt.")
if len(scores) != len(labels):
raise ValueError("scores and labels must be of the same size.")
if num_gt == 0:
return None, None
sorted_indices = np.argsort(scores)
sorted_indices = sorted_indices[::-1]
labels = labels.astype(int)
true_positive_labels = labels[sorted_indices]
false_positive_labels = 1 - true_positive_labels
cum_true_positives = np.cumsum(true_positive_labels)
cum_false_positives = np.cumsum(false_positive_labels)
precision = cum_true_positives.astype(float) / (
cum_true_positives + cum_false_positives)
recall = cum_true_positives.astype(float) / num_gt
return precision, recall
def compute_average_precision(precision, recall):
"""Compute Average Precision according to the definition in VOCdevkit.
Precision is modified to ensure that it does not decrease as recall
decrease.
Args:
precision: A float [N, 1] numpy array of precisions
recall: A float [N, 1] numpy array of recalls
Raises:
ValueError: if the input is not of the correct format
Returns:
average_precison: The area under the precision recall curve. NaN if
precision and recall are None.
"""
if precision is None:
if recall is not None:
raise ValueError("If precision is None, recall must also be None")
return np.NAN
if not isinstance(precision, np.ndarray) or not isinstance(recall,
np.ndarray):
raise ValueError("precision and recall must be numpy array")
if precision.dtype != np.float or recall.dtype != np.float:
raise ValueError("input must be float numpy array.")
if len(precision) != len(recall):
raise ValueError("precision and recall must be of the same size.")
if not precision.size:
return 0.0
if np.amin(precision) < 0 or np.amax(precision) > 1:
raise ValueError("Precision must be in the range of [0, 1].")
if np.amin(recall) < 0 or np.amax(recall) > 1:
raise ValueError("recall must be in the range of [0, 1].")
if not all(recall[i] <= recall[i + 1] for i in xrange(len(recall) - 1)):
raise ValueError("recall must be a non-decreasing array")
recall = np.concatenate([[0], recall, [1]])
precision = np.concatenate([[0], precision, [0]])
# Preprocess precision to be a non-decreasing array
for i in range(len(precision) - 2, -1, -1):
precision[i] = np.maximum(precision[i], precision[i + 1])
indices = np.where(recall[1:] != recall[:-1])[0] + 1
average_precision = np.sum(
(recall[indices] - recall[indices - 1]) * precision[indices])
return average_precision
def compute_cor_loc(num_gt_imgs_per_class,
num_images_correctly_detected_per_class):
"""Compute CorLoc according to the definition in the following paper.
https://www.robots.ox.ac.uk/~vgg/rg/papers/deselaers-eccv10.pdf
Returns nans if there are no ground truth images for a class.
Args:
num_gt_imgs_per_class: 1D array, representing number of images containing
at least one object instance of a particular class
num_images_correctly_detected_per_class: 1D array, representing number of
images that are correctly detected at least one object instance of a
particular class
Returns:
corloc_per_class: A float numpy array represents the corloc score of each
class
"""
return np.where(
num_gt_imgs_per_class == 0,
np.nan,
num_images_correctly_detected_per_class / num_gt_imgs_per_class)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.metrics."""
import numpy as np
import tensorflow as tf
from object_detection.utils import metrics
class MetricsTest(tf.test.TestCase):
def test_compute_cor_loc(self):
num_gt_imgs_per_class = np.array([100, 1, 5, 1, 1], dtype=int)
num_images_correctly_detected_per_class = np.array([10, 0, 1, 0, 0],
dtype=int)
corloc = metrics.compute_cor_loc(num_gt_imgs_per_class,
num_images_correctly_detected_per_class)
expected_corloc = np.array([0.1, 0, 0.2, 0, 0], dtype=float)
self.assertTrue(np.allclose(corloc, expected_corloc))
def test_compute_cor_loc_nans(self):
num_gt_imgs_per_class = np.array([100, 0, 0, 1, 1], dtype=int)
num_images_correctly_detected_per_class = np.array([10, 0, 1, 0, 0],
dtype=int)
corloc = metrics.compute_cor_loc(num_gt_imgs_per_class,
num_images_correctly_detected_per_class)
expected_corloc = np.array([0.1, np.nan, np.nan, 0, 0], dtype=float)
self.assertAllClose(corloc, expected_corloc)
def test_compute_precision_recall(self):
num_gt = 10
scores = np.array([0.4, 0.3, 0.6, 0.2, 0.7, 0.1], dtype=float)
labels = np.array([0, 1, 1, 0, 0, 1], dtype=bool)
accumulated_tp_count = np.array([0, 1, 1, 2, 2, 3], dtype=float)
expected_precision = accumulated_tp_count / np.array([1, 2, 3, 4, 5, 6])
expected_recall = accumulated_tp_count / num_gt
precision, recall = metrics.compute_precision_recall(scores, labels, num_gt)
self.assertAllClose(precision, expected_precision)
self.assertAllClose(recall, expected_recall)
def test_compute_average_precision(self):
precision = np.array([0.8, 0.76, 0.9, 0.65, 0.7, 0.5, 0.55, 0], dtype=float)
recall = np.array([0.3, 0.3, 0.4, 0.4, 0.45, 0.45, 0.5, 0.5], dtype=float)
processed_precision = np.array([0.9, 0.9, 0.9, 0.7, 0.7, 0.55, 0.55, 0],
dtype=float)
recall_interval = np.array([0.3, 0, 0.1, 0, 0.05, 0, 0.05, 0], dtype=float)
expected_mean_ap = np.sum(recall_interval * processed_precision)
mean_ap = metrics.compute_average_precision(precision, recall)
self.assertAlmostEqual(expected_mean_ap, mean_ap)
def test_compute_precision_recall_and_ap_no_groundtruth(self):
num_gt = 0
scores = np.array([0.4, 0.3, 0.6, 0.2, 0.7, 0.1], dtype=float)
labels = np.array([0, 0, 0, 0, 0, 0], dtype=bool)
expected_precision = None
expected_recall = None
precision, recall = metrics.compute_precision_recall(scores, labels, num_gt)
self.assertEquals(precision, expected_precision)
self.assertEquals(recall, expected_recall)
ap = metrics.compute_average_precision(precision, recall)
self.assertTrue(np.isnan(ap))
if __name__ == '__main__':
tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment