Commit c308c03c authored by Mehdi Sharifzadeh's avatar Mehdi Sharifzadeh Committed by Taylor Robie
Browse files

Mask R-CNN model added to models/research/mlperf_object_detection/Mask_RCNN (#4678)

* Create README.md

* readme changed

* readme changed

* ResNet backbone completed.

* FPN added

* Create README.md

* initial commit

* files removed

* initial commit

* protobuf file removed
parent 32e7d660
Mask RCNN Implimentation adopted from models/research/object_detection/
# Mask R-CNN with Resnet-50 (v1), Atrous version
# Configured for MSCOCO Dataset.
# Users should configure the fine_tune_checkpoint field in the train config as
# well as the label_map_path and input_path fields in the train_input_reader and
# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
# should be configured.
model {
faster_rcnn {
num_classes: 81
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 800
max_dimension: 1365
}
}
number_of_stages: 3
feature_extractor {
type: 'faster_rcnn_resnet50'
first_stage_features_stride: 8
}
first_stage_anchor_generator {
grid_anchor_generator {
scales: [0.125, 0.25, 0.5, 1.0, 2.0] # base size=256**2 => anchor sizes=32 64 128 256 512
aspect_ratios: [0.5, 1.0, 2.0]
height_stride: 8
width_stride: 8
}
}
first_stage_atrous_rate: 2
first_stage_box_predictor_conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
truncated_normal_initializer {
stddev: 0.01
}
}
}
first_stage_nms_score_threshold: 0.0
first_stage_nms_iou_threshold: 0.7
first_stage_max_proposals: 512
first_stage_localization_loss_weight: 2.0
first_stage_objectness_loss_weight: 1.0
initial_crop_size: 14
maxpool_kernel_size: 2
maxpool_stride: 2
second_stage_batch_size: 512
second_stage_box_predictor {
mask_rcnn_box_predictor {
use_dropout: false
dropout_keep_probability: 1.0
predict_instance_masks: true
mask_height: 14
mask_width: 14
mask_prediction_conv_depth: 0
mask_prediction_num_conv_layers: 3 #from mask rcnn heads
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
variance_scaling_initializer {
factor: 1.0
uniform: true
mode: FAN_AVG
}
}
}
conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
truncated_normal_initializer {
stddev: 0.01
}
}
}
}
}
second_stage_post_processing {
batch_non_max_suppression {
score_threshold: 0.0
iou_threshold: 0.6
max_detections_per_class: 2000
max_total_detections: 2000
}
score_converter: SOFTMAX
}
second_stage_localization_loss_weight: 2.0
second_stage_classification_loss_weight: 1.0
second_stage_mask_prediction_loss_weight: 4.0
}
}
train_config: {
batch_size: 4
optimizer {
momentum_optimizer: {
learning_rate: {
manual_step_learning_rate {
initial_learning_rate: 0.01
schedule {
step: 120000
learning_rate: .001
}
schedule {
step: 160000
learning_rate: .0001
}
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
gradient_clipping_by_norm: 10.0
#fine_tune_checkpoint: "/home/mehdisharif/data/coco/resnet_v1_50.ckpt"
#from_detection_checkpoint: True
# Note: The below line limits the training process to 200K steps, which we
# empirically found to be sufficient enough to train the pets dataset. This
# effectively bypasses the learning rate schedule (the learning rate will
# never decay). Remove the below line to train indefinitely.
num_steps: 20000000
data_augmentation_options {
random_horizontal_flip {
}
}
}
train_input_reader: {
tf_record_input_reader {
input_path: "/home/mehdisharif/data/coco/output2017/coco_train.record"
}
label_map_path: "/home/mehdisharif/data/coco/output2017/mscoco_label_map.pbtxt"
load_instance_masks: true
mask_type: PNG_MASKS
}
eval_config: {
metrics_set: ['coco_detection_metrics', 'coco_mask_metrics']
num_examples: 50
# Note: The below line limits the evaluation process to 10 evaluations.
# Remove the below line to evaluate indefinitely.
max_evals: 1
num_visualizations: 50
eval_interval_secs: 120
}
eval_input_reader: {
tf_record_input_reader {
input_path: "/home/mehdisharif/data/coco/output2017/coco_val.record"
}
label_map_path: "/home/mehdisharif/data/coco/output2017/mscoco_label_map.pbtxt"
load_instance_masks: true
mask_type: PNG_MASKS
shuffle: false
num_readers: 1
}
# Mask R-CNN with Resnet-50 (v1), Atrous version
# Configured for MSCOCO Dataset.
# Users should configure the fine_tune_checkpoint field in the train config as
# well as the label_map_path and input_path fields in the train_input_reader and
# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
# should be configured.
model {
faster_rcnn {
num_classes: 90
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 800
max_dimension: 1365
}
}
number_of_stages: 3
feature_extractor {
type: 'faster_rcnn_resnet50'
first_stage_features_stride: 8
}
first_stage_anchor_generator {
grid_anchor_generator {
scales: [0.25, 0.5, 1.0, 2.0]
aspect_ratios: [0.5, 1.0, 2.0]
height_stride: 8
width_stride: 8
}
}
first_stage_atrous_rate: 2
first_stage_box_predictor_conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
truncated_normal_initializer {
stddev: 0.01
}
}
}
first_stage_nms_score_threshold: 0.0
first_stage_nms_iou_threshold: 0.7
first_stage_max_proposals: 300
first_stage_localization_loss_weight: 2.0
first_stage_objectness_loss_weight: 1.0
initial_crop_size: 14
maxpool_kernel_size: 2
maxpool_stride: 2
second_stage_box_predictor {
mask_rcnn_box_predictor {
use_dropout: false
dropout_keep_probability: 1.0
predict_instance_masks: true
mask_height: 33
mask_width: 33
mask_prediction_conv_depth: 0
mask_prediction_num_conv_layers: 4
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
variance_scaling_initializer {
factor: 1.0
uniform: true
mode: FAN_AVG
}
}
}
conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
truncated_normal_initializer {
stddev: 0.01
}
}
}
}
}
second_stage_post_processing {
batch_non_max_suppression {
score_threshold: 0.0
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 300
}
score_converter: SOFTMAX
}
second_stage_localization_loss_weight: 2.0
second_stage_classification_loss_weight: 1.0
second_stage_mask_prediction_loss_weight: 4.0
}
}
train_config: {
batch_size: 2
optimizer {
momentum_optimizer: {
learning_rate: {
manual_step_learning_rate {
initial_learning_rate: 0.0003
schedule {
step: 900000
learning_rate: .00003
}
schedule {
step: 1200000
learning_rate: .000003
}
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
gradient_clipping_by_norm: 10.0
#fine_tune_checkpoint: ""
from_detection_checkpoint: false
# Note: The below line limits the training process to 200K steps, which we
# empirically found to be sufficient enough to train the pets dataset. This
# effectively bypasses the learning rate schedule (the learning rate will
# never decay). Remove the below line to train indefinitely.
#num_steps: 200000
data_augmentation_options {
random_horizontal_flip {
}
}
}
train_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/coco_train.record"
}
label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
load_instance_masks: true
mask_type: PNG_MASKS
}
eval_config: {
metrics_set: ['coco_detection_metrics', 'coco_mask_metrics']
num_examples: 50
# Note: The below line limits the evaluation process to 10 evaluations.
# Remove the below line to evaluate indefinitely.
max_evals: 1
num_visualizations: 50
eval_interval_secs: 120
}
eval_input_reader: {
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/coco_val.record"
}
label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
load_instance_masks: true
mask_type: PNG_MASKS
shuffle: true
num_readers: 1
}
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Training and evaluation for Mask_RCNN.
This module repeatedly runs 1 training epoch and then evaluation
##add explanation for all the options!!!!!!!
"""
import functools
import json
import os
from object_detection import evaluator
from object_detection import trainer
from object_detection.builders import dataset_builder
from object_detection.builders import graph_rewriter_builder
from object_detection.builders import model_builder
from object_detection.utils import config_util
from object_detection.utils import dataset_util
from object_detection.utils import label_map_util
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.INFO)
flags = tf.app.flags
flags.DEFINE_string('master', '', 'Name of the TensorFlow master to use.')
flags.DEFINE_integer('task', 0, 'task id')
flags.DEFINE_integer('num_clones', 1, 'Number of clones to deploy per worker.')
flags.DEFINE_boolean('clone_on_cpu', False,
'Force clones to be deployed on CPU. Note that even if '
'set to False (allowing ops to run on gpu), some ops may '
'still be run on the CPU if they have no GPU kernel.')
flags.DEFINE_integer('worker_replicas', 1, 'Number of worker+trainer '
'replicas.')
flags.DEFINE_integer('parameter_server_tasks', 0,
'Number of parameter server tasks. If None, does not use '
'a parameter server.')
flags.DEFINE_string('train_dir', '',
'Directory to save the checkpoints and training summaries.')
flags.DEFINE_string('pipeline_config_path', '',
'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
'file. If provided, other configs are ignored')
flags.DEFINE_boolean('eval_training_data', False,
'If training data should be evaluated for this job.')
flags.DEFINE_string('eval_dir', '',
'Directory to write eval summaries to.')
flags.DEFINE_boolean('run_once', False, 'Option to only run a single pass of '
'evaluation. Overrides the `max_evals`'
' parameter in the provided config.')
flags.DEFINE_float('box_min_ap', -1, 'Option to run until the box average'
'precision reaches this number')
flags.DEFINE_float('mask_min_ap', -1, 'Option to run until the mask average'
'precision reaches this number')
flags.DEFINE_integer('epochs_between_evals', 1, 'Number of training epochs to '
'run before running eval.')
FLAGS = flags.FLAGS
def stopping_criteria_met(eval_metrics, mask_min_ap, box_min_ap):
"""Returns true if both of the min precision criteria are met in the given
evaluation metrics.
Args:
eval_metrics: dict of metrics names as keys and their corresponding values,
containing "DetectionMasks_Precision/mAP", and
"DetectionBoxes_Precision/mAP" fields.
mask_min_ap: minimum desired mask average precision, will be ignored if -1
box_min_ap: minimum desired box average precision, will be ignored if -1
Returns:
True if non -1 criteria are met, false o.w.
"""
assert mask_min_ap == -1 or 0 < mask_min_ap < 1
assert box_min_ap == -1 or 0 < box_min_ap < 1
try:
mask_mAP_reached = eval_metrics['DetectionMasks_Precision/mAP']
box_mAP_reached = eval_metrics['DetectionBoxes_Precision/mAP']
except KeyError as err:
raise Exception('eval_metrics dict does not contain the mAP field') from err
return (mask_min_ap == -1 or mask_mAP_reached > mask_min_ap) & \
(box_min_ap == -1 or box_mAP_reached > box_min_ap) & \
(mask_min_ap != -1 or box_min_ap != -1)
def main(_):
assert FLAGS.train_dir, '`train_dir` is missing.'
assert FLAGS.pipeline_config_path, '`pipeline_config_path` is missing'
assert FLAGS.eval_dir, '`eval_dir` is missing.'
configs = config_util.get_configs_from_pipeline_file(
FLAGS.pipeline_config_path)
if FLAGS.task == 0:
tf.gfile.MakeDirs(FLAGS.train_dir)
tf.gfile.Copy(FLAGS.pipeline_config_path,
os.path.join(FLAGS.train_dir, 'pipeline.config'),
overwrite=True)
tf.gfile.MakeDirs(FLAGS.eval_dir)
tf.gfile.Copy(FLAGS.pipeline_config_path,
os.path.join(FLAGS.eval_dir, 'pipeline.config'),
overwrite=True)
model_config = configs['model']
train_config = configs['train_config']
train_input_config = configs['train_input_config']
eval_config = configs['eval_config']
if FLAGS.eval_training_data:
eval_input_config = configs['train_input_config']
else:
eval_input_config = configs['eval_input_config']
# setting to run evaluation after EPOCHS_BETWEEN_EVALS epochs of training.
# total number of training is set to total_num_epochs provided in the config
if train_config.num_steps:
total_num_epochs = train_config.num_steps
train_config.num_steps = FLAGS.epochs_between_evals
total_training_cycle = total_num_epochs // train_config.num_steps
else:
# TODO(mehdi): make it run indef
total_num_epochs = 20000000
train_config.num_steps = FLAGS.epochs_between_evals
total_training_cycle = total_num_epochs // train_config.num_steps
train_model_fn = functools.partial(model_builder.build,
model_config=model_config,
is_training=True)
eval_model_fn = functools.partial(model_builder.build,
model_config=model_config,
is_training=False)
def get_next(config):
return dataset_util.make_initializable_iterator(
dataset_builder.build(config)).get_next()
# functions to create a tensor input dictionary for both training & evaluation
train_input_dict_fn = functools.partial(get_next, train_input_config)
eval_input_dict_fn = functools.partial(get_next, eval_input_config)
# If not explicitly specified in the constructor and the TF_CONFIG
# environment variable is present, load cluster_spec from TF_CONFIG.
env = json.loads(os.environ.get('TF_CONFIG', '{}'))
cluster_data = env.get('cluster', None)
cluster = tf.train.ClusterSpec(cluster_data) if cluster_data else None
task_data = env.get('task', {'type': 'master', 'index': 0})
task_info = type('TaskSpec', (object,), task_data)
# Parameters for a single worker.
parameter_server_tasks = 0
worker_replicas = 1
worker_job_name = 'lonely_worker'
task = 0
is_chief = True
master = ''
if cluster_data and 'worker' in cluster_data:
# Number of total worker replicas include "worker"s and the "master".
worker_replicas = len(cluster_data['worker']) + 1
if cluster_data and 'ps' in cluster_data:
parameter_server_tasks = len(cluster_data['ps'])
if worker_replicas > 1 and parameter_server_tasks < 1:
raise ValueError('At least 1 ps task is needed for distributed training.')
if worker_replicas >= 1 and parameter_server_tasks > 0:
# Set up distributed training.
server = tf.train.Server(tf.train.ClusterSpec(cluster), protocol='grpc',
job_name=task_info.type,
task_index=task_info.index)
if task_info.type == 'ps':
server.join()
return
worker_job_name = '%s/task:%d' % (task_info.type, task_info.index)
task = task_info.index
is_chief = (task_info.type == 'master')
master = server.target
label_map = label_map_util.load_labelmap(eval_input_config.label_map_path)
max_num_classes = max([item.id for item in label_map.item])
categories = label_map_util.convert_label_map_to_categories(label_map,
max_num_classes)
if FLAGS.run_once:
eval_config.max_evals = 1
train_graph_rewriter_fn = eval_graph_rewriter_fn = None
if 'graph_rewriter_config' in configs:
train_graph_rewriter_fn = graph_rewriter_builder.build(
configs['graph_rewriter_config'], is_training=True)
eval_graph_rewriter_fn = graph_rewriter_builder.build(
configs['eval_rewriter_config'], is_training=False)
def train():
return trainer.train(create_tensor_dict_fn=train_input_dict_fn,
create_model_fn=train_model_fn,
train_config=train_config, master=master, task=task,
num_clones=FLAGS.num_clones,
worker_replicas=worker_replicas,
clone_on_cpu=FLAGS.clone_on_cpu,
ps_tasks=parameter_server_tasks,
worker_job_name=worker_job_name,
is_chief=is_chief, train_dir=FLAGS.train_dir,
graph_hook_fn=train_graph_rewriter_fn)
def evaluate():
return evaluator.evaluate(eval_input_dict_fn, eval_model_fn, eval_config,
categories, FLAGS.train_dir, FLAGS.eval_dir,
graph_hook_fn=eval_graph_rewriter_fn)
for cycle_index in range(total_training_cycle):
tf.logging.info('Starting a training cycle: %d/%d',
cycle_index, total_training_cycle)
train()
tf.logging.info('Starting to evaluate.')
eval_metrics = evaluate()
if stopping_criteria_met(eval_metrics, FLAGS.mask_min_ap, FLAGS.box_min_ap):
tf.logging.info('Stopping criteria met. Training stopped')
break
if __name__ == '__main__':
tf.app.run()
# Contributing to the Tensorflow Object Detection API
Patches to Tensorflow Object Detection API are welcome!
We require contributors to fill out either the individual or corporate
Contributor License Agreement (CLA).
* If you are an individual writing original source code and you're sure you own the intellectual property, then you'll need to sign an [individual CLA](http://code.google.com/legal/individual-cla-v1.0.html).
* If you work for a company that wants to allow you to contribute your work, then you'll need to sign a [corporate CLA](http://code.google.com/legal/corporate-cla-v1.0.html).
Please follow the
[Tensorflow contributing guidelines](https://github.com/tensorflow/tensorflow/blob/master/CONTRIBUTING.md)
when submitting pull requests.
# Tensorflow Object Detection API
Creating accurate machine learning models capable of localizing and identifying
multiple objects in a single image remains a core challenge in computer vision.
The TensorFlow Object Detection API is an open source framework built on top of
TensorFlow that makes it easy to construct, train and deploy object detection
models. At Google we’ve certainly found this codebase to be useful for our
computer vision needs, and we hope that you will as well.
<p align="center">
<img src="g3doc/img/kites_detections_output.jpg" width=676 height=450>
</p>
Contributions to the codebase are welcome and we would love to hear back from
you if you find this API useful. Finally if you use the Tensorflow Object
Detection API for a research publication, please consider citing:
```
"Speed/accuracy trade-offs for modern convolutional object detectors."
Huang J, Rathod V, Sun C, Zhu M, Korattikara A, Fathi A, Fischer I, Wojna Z,
Song Y, Guadarrama S, Murphy K, CVPR 2017
```
\[[link](https://arxiv.org/abs/1611.10012)\]\[[bibtex](
https://scholar.googleusercontent.com/scholar.bib?q=info:l291WsrB-hQJ:scholar.google.com/&output=citation&scisig=AAGBfm0AAAAAWUIIlnPZ_L9jxvPwcC49kDlELtaeIyU-&scisf=4&ct=citation&cd=-1&hl=en&scfhb=1)\]
<p align="center">
<img src="g3doc/img/tf-od-api-logo.png" width=140 height=195>
</p>
## Maintainers
* Jonathan Huang, github: [jch1](https://github.com/jch1)
* Vivek Rathod, github: [tombstone](https://github.com/tombstone)
* Ronny Votel, github: [ronnyvotel](https://github.com/ronnyvotel)
* Derek Chow, github: [derekjchow](https://github.com/derekjchow)
* Chen Sun, github: [jesu9](https://github.com/jesu9)
* Menglong Zhu, github: [dreamdragon](https://github.com/dreamdragon)
* Alireza Fathi, github: [afathi3](https://github.com/afathi3)
* Zhichao Lu, github: [pkulzc](https://github.com/pkulzc)
## Table of contents
Quick Start:
* <a href='object_detection_tutorial.ipynb'>
Quick Start: Jupyter notebook for off-the-shelf inference</a><br>
* <a href="g3doc/running_pets.md">Quick Start: Training a pet detector</a><br>
Setup:
* <a href='g3doc/installation.md'>Installation</a><br>
* <a href='g3doc/configuring_jobs.md'>
Configuring an object detection pipeline</a><br>
* <a href='g3doc/preparing_inputs.md'>Preparing inputs</a><br>
Running:
* <a href='g3doc/running_locally.md'>Running locally</a><br>
* <a href='g3doc/running_on_cloud.md'>Running on the cloud</a><br>
Extras:
* <a href='g3doc/detection_model_zoo.md'>Tensorflow detection model zoo</a><br>
* <a href='g3doc/exporting_models.md'>
Exporting a trained model for inference</a><br>
* <a href='g3doc/defining_your_own_model.md'>
Defining your own model architecture</a><br>
* <a href='g3doc/using_your_own_dataset.md'>
Bringing in your own dataset</a><br>
* <a href='g3doc/evaluation_protocols.md'>
Supported object detection evaluation protocols</a><br>
* <a href='g3doc/oid_inference_and_evaluation.md'>
Inference and evaluation on the Open Images dataset</a><br>
* <a href='g3doc/instance_segmentation.md'>
Run an instance segmentation model</a><br>
## Getting Help
To get help with issues you may encounter using the Tensorflow Object Detection
API, create a new question on [StackOverflow](https://stackoverflow.com/) with
the tags "tensorflow" and "object-detection".
Please report bugs (actually broken code, not usage questions) to the
tensorflow/models GitHub
[issue tracker](https://github.com/tensorflow/models/issues), prefixing the
issue name with "object_detection".
Please check [FAQ](g3doc/faq.md) for frequently asked questions before
reporting an issue.
## Release information
### April 30, 2018
We have released a Faster R-CNN detector with ResNet-101 feature extractor trained on [AVA](https://research.google.com/ava/) v2.1.
Compared with other commonly used object detectors, it changes the action classification loss function to per-class Sigmoid loss to handle boxes with multiple labels.
The model is trained on the training split of AVA v2.1 for 1.5M iterations, it achieves mean AP of 11.25% over 60 classes on the validation split of AVA v2.1.
For more details please refer to this [paper](https://arxiv.org/abs/1705.08421).
<b>Thanks to contributors</b>: Chen Sun, David Ross
### April 2, 2018
Supercharge your mobile phones with the next generation mobile object detector!
We are adding support for MobileNet V2 with SSDLite presented in
[MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381).
This model is 35% faster than Mobilenet V1 SSD on a Google Pixel phone CPU (200ms vs. 270ms) at the same accuracy.
Along with the model definition, we are also releasing a model checkpoint trained on the COCO dataset.
<b>Thanks to contributors</b>: Menglong Zhu, Mark Sandler, Zhichao Lu, Vivek Rathod, Jonathan Huang
### February 9, 2018
We now support instance segmentation!! In this API update we support a number of instance segmentation models similar to those discussed in the [Mask R-CNN paper](https://arxiv.org/abs/1703.06870). For further details refer to
[our slides](http://presentations.cocodataset.org/Places17-GMRI.pdf) from the 2017 Coco + Places Workshop.
Refer to the section on [Running an Instance Segmentation Model](g3doc/instance_segmentation.md) for instructions on how to configure a model
that predicts masks in addition to object bounding boxes.
<b>Thanks to contributors</b>: Alireza Fathi, Zhichao Lu, Vivek Rathod, Ronny Votel, Jonathan Huang
### November 17, 2017
As a part of the Open Images V3 release we have released:
* An implementation of the Open Images evaluation metric and the [protocol](g3doc/evaluation_protocols.md#open-images).
* Additional tools to separate inference of detection and evaluation (see [this tutorial](g3doc/oid_inference_and_evaluation.md)).
* A new detection model trained on the Open Images V2 data release (see [Open Images model](g3doc/detection_model_zoo.md#open-images-models)).
See more information on the [Open Images website](https://github.com/openimages/dataset)!
<b>Thanks to contributors</b>: Stefan Popov, Alina Kuznetsova
### November 6, 2017
We have re-released faster versions of our (pre-trained) models in the
<a href='g3doc/detection_model_zoo.md'>model zoo</a>. In addition to what
was available before, we are also adding Faster R-CNN models trained on COCO
with Inception V2 and Resnet-50 feature extractors, as well as a Faster R-CNN
with Resnet-101 model trained on the KITTI dataset.
<b>Thanks to contributors</b>: Jonathan Huang, Vivek Rathod, Derek Chow,
Tal Remez, Chen Sun.
### October 31, 2017
We have released a new state-of-the-art model for object detection using
the Faster-RCNN with the
[NASNet-A image featurization](https://arxiv.org/abs/1707.07012). This
model achieves mAP of 43.1% on the test-dev validation dataset for COCO,
improving on the best available model in the zoo by 6% in terms
of absolute mAP.
<b>Thanks to contributors</b>: Barret Zoph, Vijay Vasudevan, Jonathon Shlens, Quoc Le
### August 11, 2017
We have released an update to the [Android Detect
demo](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android)
which will now run models trained using the Tensorflow Object
Detection API on an Android device. By default, it currently runs a
frozen SSD w/Mobilenet detector trained on COCO, but we encourage
you to try out other detection models!
<b>Thanks to contributors</b>: Jonathan Huang, Andrew Harp
### June 15, 2017
In addition to our base Tensorflow detection model definitions, this
release includes:
* A selection of trainable detection models, including:
* Single Shot Multibox Detector (SSD) with MobileNet,
* SSD with Inception V2,
* Region-Based Fully Convolutional Networks (R-FCN) with Resnet 101,
* Faster RCNN with Resnet 101,
* Faster RCNN with Inception Resnet v2
* Frozen weights (trained on the COCO dataset) for each of the above models to
be used for out-of-the-box inference purposes.
* A [Jupyter notebook](object_detection_tutorial.ipynb) for performing
out-of-the-box inference with one of our released models
* Convenient [local training](g3doc/running_locally.md) scripts as well as
distributed training and evaluation pipelines via
[Google Cloud](g3doc/running_on_cloud.md).
<b>Thanks to contributors</b>: Jonathan Huang, Vivek Rathod, Derek Chow,
Chen Sun, Menglong Zhu, Matthew Tang, Anoop Korattikara, Alireza Fathi, Ian Fischer, Zbigniew Wojna, Yang Song, Sergio Guadarrama, Jasper Uijlings,
Viacheslav Kovalevskyi, Kevin Murphy
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Generates grid anchors on the fly as used in Faster RCNN.
Generates grid anchors on the fly as described in:
"Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks"
Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun.
"""
import tensorflow as tf
from object_detection.core import anchor_generator
from object_detection.core import box_list
from object_detection.utils import ops
class GridAnchorGenerator(anchor_generator.AnchorGenerator):
"""Generates a grid of anchors at given scales and aspect ratios."""
def __init__(self,
scales=(0.5, 1.0, 2.0),
aspect_ratios=(0.5, 1.0, 2.0),
base_anchor_size=None,
anchor_stride=None,
anchor_offset=None):
"""Constructs a GridAnchorGenerator.
Args:
scales: a list of (float) scales, default=(0.5, 1.0, 2.0)
aspect_ratios: a list of (float) aspect ratios, default=(0.5, 1.0, 2.0)
base_anchor_size: base anchor size as height, width (
(length-2 float32 list or tensor, default=[256, 256])
anchor_stride: difference in centers between base anchors for adjacent
grid positions (length-2 float32 list or tensor,
default=[16, 16])
anchor_offset: center of the anchor with scale and aspect ratio 1 for the
upper left element of the grid, this should be zero for
feature networks with only VALID padding and even receptive
field size, but may need additional calculation if other
padding is used (length-2 float32 list or tensor,
default=[0, 0])
"""
# Handle argument defaults
if base_anchor_size is None:
base_anchor_size = [256, 256]
base_anchor_size = tf.to_float(tf.convert_to_tensor(base_anchor_size))
if anchor_stride is None:
anchor_stride = [16, 16]
anchor_stride = tf.to_float(tf.convert_to_tensor(anchor_stride))
if anchor_offset is None:
anchor_offset = [0, 0]
anchor_offset = tf.to_float(tf.convert_to_tensor(anchor_offset))
self._scales = scales
self._aspect_ratios = aspect_ratios
self._base_anchor_size = base_anchor_size
self._anchor_stride = anchor_stride
self._anchor_offset = anchor_offset
def name_scope(self):
return 'GridAnchorGenerator'
def num_anchors_per_location(self):
"""Returns the number of anchors per spatial location.
Returns:
a list of integers, one for each expected feature map to be passed to
the `generate` function.
"""
return [len(self._scales) * len(self._aspect_ratios)]
def _generate(self, feature_map_shape_list):
"""Generates a collection of bounding boxes to be used as anchors.
Args:
feature_map_shape_list: list of pairs of convnet layer resolutions in the
format [(height_0, width_0)]. For example, setting
feature_map_shape_list=[(8, 8)] asks for anchors that correspond
to an 8x8 layer. For this anchor generator, only lists of length 1 are
allowed.
Returns:
boxes_list: a list of BoxLists each holding anchor boxes corresponding to
the input feature map shapes.
Raises:
ValueError: if feature_map_shape_list, box_specs_list do not have the same
length.
ValueError: if feature_map_shape_list does not consist of pairs of
integers
"""
if not (isinstance(feature_map_shape_list, list)
and len(feature_map_shape_list) == 1):
raise ValueError('feature_map_shape_list must be a list of length 1.')
if not all([isinstance(list_item, tuple) and len(list_item) == 2
for list_item in feature_map_shape_list]):
raise ValueError('feature_map_shape_list must be a list of pairs.')
grid_height, grid_width = feature_map_shape_list[0]
scales_grid, aspect_ratios_grid = ops.meshgrid(self._scales,
self._aspect_ratios)
scales_grid = tf.reshape(scales_grid, [-1])
aspect_ratios_grid = tf.reshape(aspect_ratios_grid, [-1])
anchors = tile_anchors(grid_height,
grid_width,
scales_grid,
aspect_ratios_grid,
self._base_anchor_size,
self._anchor_stride,
self._anchor_offset)
num_anchors = anchors.num_boxes_static()
if num_anchors is None:
num_anchors = anchors.num_boxes()
anchor_indices = tf.zeros([num_anchors])
anchors.add_field('feature_map_index', anchor_indices)
return [anchors]
def tile_anchors(grid_height,
grid_width,
scales,
aspect_ratios,
base_anchor_size,
anchor_stride,
anchor_offset):
"""Create a tiled set of anchors strided along a grid in image space.
This op creates a set of anchor boxes by placing a "basis" collection of
boxes with user-specified scales and aspect ratios centered at evenly
distributed points along a grid. The basis collection is specified via the
scale and aspect_ratios arguments. For example, setting scales=[.1, .2, .2]
and aspect ratios = [2,2,1/2] means that we create three boxes: one with scale
.1, aspect ratio 2, one with scale .2, aspect ratio 2, and one with scale .2
and aspect ratio 1/2. Each box is multiplied by "base_anchor_size" before
placing it over its respective center.
Grid points are specified via grid_height, grid_width parameters as well as
the anchor_stride and anchor_offset parameters.
Args:
grid_height: size of the grid in the y direction (int or int scalar tensor)
grid_width: size of the grid in the x direction (int or int scalar tensor)
scales: a 1-d (float) tensor representing the scale of each box in the
basis set.
aspect_ratios: a 1-d (float) tensor representing the aspect ratio of each
box in the basis set. The length of the scales and aspect_ratios tensors
must be equal.
base_anchor_size: base anchor size as [height, width]
(float tensor of shape [2])
anchor_stride: difference in centers between base anchors for adjacent grid
positions (float tensor of shape [2])
anchor_offset: center of the anchor with scale and aspect ratio 1 for the
upper left element of the grid, this should be zero for
feature networks with only VALID padding and even receptive
field size, but may need some additional calculation if other
padding is used (float tensor of shape [2])
Returns:
a BoxList holding a collection of N anchor boxes
"""
ratio_sqrts = tf.sqrt(aspect_ratios)
heights = scales / ratio_sqrts * base_anchor_size[0]
widths = scales * ratio_sqrts * base_anchor_size[1]
# Get a grid of box centers
y_centers = tf.to_float(tf.range(grid_height))
y_centers = y_centers * anchor_stride[0] + anchor_offset[0]
x_centers = tf.to_float(tf.range(grid_width))
x_centers = x_centers * anchor_stride[1] + anchor_offset[1]
x_centers, y_centers = ops.meshgrid(x_centers, y_centers)
widths_grid, x_centers_grid = ops.meshgrid(widths, x_centers)
heights_grid, y_centers_grid = ops.meshgrid(heights, y_centers)
bbox_centers = tf.stack([y_centers_grid, x_centers_grid], axis=3)
bbox_sizes = tf.stack([heights_grid, widths_grid], axis=3)
bbox_centers = tf.reshape(bbox_centers, [-1, 2])
bbox_sizes = tf.reshape(bbox_sizes, [-1, 2])
bbox_corners = _center_size_bbox_to_corners_bbox(bbox_centers, bbox_sizes)
return box_list.BoxList(bbox_corners)
def _center_size_bbox_to_corners_bbox(centers, sizes):
"""Converts bbox center-size representation to corners representation.
Args:
centers: a tensor with shape [N, 2] representing bounding box centers
sizes: a tensor with shape [N, 2] representing bounding boxes
Returns:
corners: tensor with shape [N, 4] representing bounding boxes in corners
representation
"""
return tf.concat([centers - .5 * sizes, centers + .5 * sizes], 1)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.grid_anchor_generator."""
import numpy as np
import tensorflow as tf
from object_detection.anchor_generators import grid_anchor_generator
from object_detection.utils import test_case
class GridAnchorGeneratorTest(test_case.TestCase):
def test_construct_single_anchor(self):
"""Builds a 1x1 anchor grid to test the size of the output boxes."""
def graph_fn():
scales = [0.5, 1.0, 2.0]
aspect_ratios = [0.25, 1.0, 4.0]
anchor_offset = [7, -3]
anchor_generator = grid_anchor_generator.GridAnchorGenerator(
scales, aspect_ratios, anchor_offset=anchor_offset)
anchors_list = anchor_generator.generate(feature_map_shape_list=[(1, 1)])
anchor_corners = anchors_list[0].get()
return (anchor_corners,)
exp_anchor_corners = [[-121, -35, 135, 29], [-249, -67, 263, 61],
[-505, -131, 519, 125], [-57, -67, 71, 61],
[-121, -131, 135, 125], [-249, -259, 263, 253],
[-25, -131, 39, 125], [-57, -259, 71, 253],
[-121, -515, 135, 509]]
anchor_corners_out = self.execute(graph_fn, [])
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_anchor_grid(self):
def graph_fn():
base_anchor_size = [10, 10]
anchor_stride = [19, 19]
anchor_offset = [0, 0]
scales = [0.5, 1.0, 2.0]
aspect_ratios = [1.0]
anchor_generator = grid_anchor_generator.GridAnchorGenerator(
scales,
aspect_ratios,
base_anchor_size=base_anchor_size,
anchor_stride=anchor_stride,
anchor_offset=anchor_offset)
anchors_list = anchor_generator.generate(feature_map_shape_list=[(2, 2)])
anchor_corners = anchors_list[0].get()
return (anchor_corners,)
exp_anchor_corners = [[-2.5, -2.5, 2.5, 2.5], [-5., -5., 5., 5.],
[-10., -10., 10., 10.], [-2.5, 16.5, 2.5, 21.5],
[-5., 14., 5, 24], [-10., 9., 10, 29],
[16.5, -2.5, 21.5, 2.5], [14., -5., 24, 5],
[9., -10., 29, 10], [16.5, 16.5, 21.5, 21.5],
[14., 14., 24, 24], [9., 9., 29, 29]]
anchor_corners_out = self.execute(graph_fn, [])
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_anchor_grid_with_dynamic_feature_map_shapes(self):
def graph_fn(feature_map_height, feature_map_width):
base_anchor_size = [10, 10]
anchor_stride = [19, 19]
anchor_offset = [0, 0]
scales = [0.5, 1.0, 2.0]
aspect_ratios = [1.0]
anchor_generator = grid_anchor_generator.GridAnchorGenerator(
scales,
aspect_ratios,
base_anchor_size=base_anchor_size,
anchor_stride=anchor_stride,
anchor_offset=anchor_offset)
anchors_list = anchor_generator.generate(
feature_map_shape_list=[(feature_map_height, feature_map_width)])
anchor_corners = anchors_list[0].get()
return (anchor_corners,)
exp_anchor_corners = [[-2.5, -2.5, 2.5, 2.5], [-5., -5., 5., 5.],
[-10., -10., 10., 10.], [-2.5, 16.5, 2.5, 21.5],
[-5., 14., 5, 24], [-10., 9., 10, 29],
[16.5, -2.5, 21.5, 2.5], [14., -5., 24, 5],
[9., -10., 29, 10], [16.5, 16.5, 21.5, 21.5],
[14., 14., 24, 24], [9., 9., 29, 29]]
anchor_corners_out = self.execute_cpu(graph_fn,
[np.array(2, dtype=np.int32),
np.array(2, dtype=np.int32)])
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Generates grid anchors on the fly corresponding to multiple CNN layers.
Generates grid anchors on the fly corresponding to multiple CNN layers as
described in:
"SSD: Single Shot MultiBox Detector"
Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed,
Cheng-Yang Fu, Alexander C. Berg
(see Section 2.2: Choosing scales and aspect ratios for default boxes)
"""
import numpy as np
import tensorflow as tf
from object_detection.anchor_generators import grid_anchor_generator
from object_detection.core import anchor_generator
from object_detection.core import box_list_ops
class MultipleGridAnchorGenerator(anchor_generator.AnchorGenerator):
"""Generate a grid of anchors for multiple CNN layers."""
def __init__(self,
box_specs_list,
base_anchor_size=None,
anchor_strides=None,
anchor_offsets=None,
clip_window=None):
"""Constructs a MultipleGridAnchorGenerator.
To construct anchors, at multiple grid resolutions, one must provide a
list of feature_map_shape_list (e.g., [(8, 8), (4, 4)]), and for each grid
size, a corresponding list of (scale, aspect ratio) box specifications.
For example:
box_specs_list = [[(.1, 1.0), (.1, 2.0)], # for 8x8 grid
[(.2, 1.0), (.3, 1.0), (.2, 2.0)]] # for 4x4 grid
To support the fully convolutional setting, we pass grid sizes in at
generation time, while scale and aspect ratios are fixed at construction
time.
Args:
box_specs_list: list of list of (scale, aspect ratio) pairs with the
outside list having the same number of entries as feature_map_shape_list
(which is passed in at generation time).
base_anchor_size: base anchor size as [height, width]
(length-2 float tensor, default=[1.0, 1.0]).
The height and width values are normalized to the
minimum dimension of the input height and width, so that
when the base anchor height equals the base anchor
width, the resulting anchor is square even if the input
image is not square.
anchor_strides: list of pairs of strides in pixels (in y and x directions
respectively). For example, setting anchor_strides=[(25, 25), (50, 50)]
means that we want the anchors corresponding to the first layer to be
strided by 25 pixels and those in the second layer to be strided by 50
pixels in both y and x directions. If anchor_strides=None, they are set
to be the reciprocal of the corresponding feature map shapes.
anchor_offsets: list of pairs of offsets in pixels (in y and x directions
respectively). The offset specifies where we want the center of the
(0, 0)-th anchor to lie for each layer. For example, setting
anchor_offsets=[(10, 10), (20, 20)]) means that we want the
(0, 0)-th anchor of the first layer to lie at (10, 10) in pixel space
and likewise that we want the (0, 0)-th anchor of the second layer to
lie at (25, 25) in pixel space. If anchor_offsets=None, then they are
set to be half of the corresponding anchor stride.
clip_window: a tensor of shape [4] specifying a window to which all
anchors should be clipped. If clip_window is None, then no clipping
is performed.
Raises:
ValueError: if box_specs_list is not a list of list of pairs
ValueError: if clip_window is not either None or a tensor of shape [4]
"""
if isinstance(box_specs_list, list) and all(
[isinstance(list_item, list) for list_item in box_specs_list]):
self._box_specs = box_specs_list
else:
raise ValueError('box_specs_list is expected to be a '
'list of lists of pairs')
if base_anchor_size is None:
base_anchor_size = tf.constant([256, 256], dtype=tf.float32)
self._base_anchor_size = base_anchor_size
self._anchor_strides = anchor_strides
self._anchor_offsets = anchor_offsets
if clip_window is not None and clip_window.get_shape().as_list() != [4]:
raise ValueError('clip_window must either be None or a shape [4] tensor')
self._clip_window = clip_window
self._scales = []
self._aspect_ratios = []
for box_spec in self._box_specs:
if not all([isinstance(entry, tuple) and len(entry) == 2
for entry in box_spec]):
raise ValueError('box_specs_list is expected to be a '
'list of lists of pairs')
scales, aspect_ratios = zip(*box_spec)
self._scales.append(scales)
self._aspect_ratios.append(aspect_ratios)
for arg, arg_name in zip([self._anchor_strides, self._anchor_offsets],
['anchor_strides', 'anchor_offsets']):
if arg and not (isinstance(arg, list) and
len(arg) == len(self._box_specs)):
raise ValueError('%s must be a list with the same length '
'as self._box_specs' % arg_name)
if arg and not all([
isinstance(list_item, tuple) and len(list_item) == 2
for list_item in arg
]):
raise ValueError('%s must be a list of pairs.' % arg_name)
def name_scope(self):
return 'MultipleGridAnchorGenerator'
def num_anchors_per_location(self):
"""Returns the number of anchors per spatial location.
Returns:
a list of integers, one for each expected feature map to be passed to
the Generate function.
"""
return [len(box_specs) for box_specs in self._box_specs]
def _generate(self, feature_map_shape_list, im_height=1, im_width=1):
"""Generates a collection of bounding boxes to be used as anchors.
The number of anchors generated for a single grid with shape MxM where we
place k boxes over each grid center is k*M^2 and thus the total number of
anchors is the sum over all grids. In our box_specs_list example
(see the constructor docstring), we would place two boxes over each grid
point on an 8x8 grid and three boxes over each grid point on a 4x4 grid and
thus end up with 2*8^2 + 3*4^2 = 176 anchors in total. The layout of the
output anchors follows the order of how the grid sizes and box_specs are
specified (with box_spec index varying the fastest, followed by width
index, then height index, then grid index).
Args:
feature_map_shape_list: list of pairs of convnet layer resolutions in the
format [(height_0, width_0), (height_1, width_1), ...]. For example,
setting feature_map_shape_list=[(8, 8), (7, 7)] asks for anchors that
correspond to an 8x8 layer followed by a 7x7 layer.
im_height: the height of the image to generate the grid for. If both
im_height and im_width are 1, the generated anchors default to
normalized coordinates, otherwise absolute coordinates are used for the
grid.
im_width: the width of the image to generate the grid for. If both
im_height and im_width are 1, the generated anchors default to
normalized coordinates, otherwise absolute coordinates are used for the
grid.
Returns:
boxes_list: a list of BoxLists each holding anchor boxes corresponding to
the input feature map shapes.
Raises:
ValueError: if feature_map_shape_list, box_specs_list do not have the same
length.
ValueError: if feature_map_shape_list does not consist of pairs of
integers
"""
if not (isinstance(feature_map_shape_list, list)
and len(feature_map_shape_list) == len(self._box_specs)):
raise ValueError('feature_map_shape_list must be a list with the same '
'length as self._box_specs')
if not all([isinstance(list_item, tuple) and len(list_item) == 2
for list_item in feature_map_shape_list]):
raise ValueError('feature_map_shape_list must be a list of pairs.')
im_height = tf.to_float(im_height)
im_width = tf.to_float(im_width)
if not self._anchor_strides:
anchor_strides = [(1.0 / tf.to_float(pair[0]), 1.0 / tf.to_float(pair[1]))
for pair in feature_map_shape_list]
else:
anchor_strides = [(tf.to_float(stride[0]) / im_height,
tf.to_float(stride[1]) / im_width)
for stride in self._anchor_strides]
if not self._anchor_offsets:
anchor_offsets = [(0.5 * stride[0], 0.5 * stride[1])
for stride in anchor_strides]
else:
anchor_offsets = [(tf.to_float(offset[0]) / im_height,
tf.to_float(offset[1]) / im_width)
for offset in self._anchor_offsets]
for arg, arg_name in zip([anchor_strides, anchor_offsets],
['anchor_strides', 'anchor_offsets']):
if not (isinstance(arg, list) and len(arg) == len(self._box_specs)):
raise ValueError('%s must be a list with the same length '
'as self._box_specs' % arg_name)
if not all([isinstance(list_item, tuple) and len(list_item) == 2
for list_item in arg]):
raise ValueError('%s must be a list of pairs.' % arg_name)
anchor_grid_list = []
min_im_shape = tf.minimum(im_height, im_width)
scale_height = min_im_shape / im_height
scale_width = min_im_shape / im_width
base_anchor_size = [
scale_height * self._base_anchor_size[0],
scale_width * self._base_anchor_size[1]
]
for feature_map_index, (grid_size, scales, aspect_ratios, stride,
offset) in enumerate(
zip(feature_map_shape_list, self._scales,
self._aspect_ratios, anchor_strides,
anchor_offsets)):
tiled_anchors = grid_anchor_generator.tile_anchors(
grid_height=grid_size[0],
grid_width=grid_size[1],
scales=scales,
aspect_ratios=aspect_ratios,
base_anchor_size=base_anchor_size,
anchor_stride=stride,
anchor_offset=offset)
if self._clip_window is not None:
tiled_anchors = box_list_ops.clip_to_window(
tiled_anchors, self._clip_window, filter_nonoverlapping=False)
num_anchors_in_layer = tiled_anchors.num_boxes_static()
if num_anchors_in_layer is None:
num_anchors_in_layer = tiled_anchors.num_boxes()
anchor_indices = feature_map_index * tf.ones([num_anchors_in_layer])
tiled_anchors.add_field('feature_map_index', anchor_indices)
anchor_grid_list.append(tiled_anchors)
return anchor_grid_list
def create_ssd_anchors(num_layers=6,
min_scale=0.2,
max_scale=0.95,
scales=None,
aspect_ratios=(1.0, 2.0, 3.0, 1.0 / 2, 1.0 / 3),
interpolated_scale_aspect_ratio=1.0,
base_anchor_size=None,
anchor_strides=None,
anchor_offsets=None,
reduce_boxes_in_lowest_layer=True):
"""Creates MultipleGridAnchorGenerator for SSD anchors.
This function instantiates a MultipleGridAnchorGenerator that reproduces
``default box`` construction proposed by Liu et al in the SSD paper.
See Section 2.2 for details. Grid sizes are assumed to be passed in
at generation time from finest resolution to coarsest resolution --- this is
used to (linearly) interpolate scales of anchor boxes corresponding to the
intermediate grid sizes.
Anchors that are returned by calling the `generate` method on the returned
MultipleGridAnchorGenerator object are always in normalized coordinates
and clipped to the unit square: (i.e. all coordinates lie in [0, 1]x[0, 1]).
Args:
num_layers: integer number of grid layers to create anchors for (actual
grid sizes passed in at generation time)
min_scale: scale of anchors corresponding to finest resolution (float)
max_scale: scale of anchors corresponding to coarsest resolution (float)
scales: As list of anchor scales to use. When not None and not empty,
min_scale and max_scale are not used.
aspect_ratios: list or tuple of (float) aspect ratios to place on each
grid point.
interpolated_scale_aspect_ratio: An additional anchor is added with this
aspect ratio and a scale interpolated between the scale for a layer
and the scale for the next layer (1.0 for the last layer).
This anchor is not included if this value is 0.
base_anchor_size: base anchor size as [height, width].
The height and width values are normalized to the minimum dimension of the
input height and width, so that when the base anchor height equals the
base anchor width, the resulting anchor is square even if the input image
is not square.
anchor_strides: list of pairs of strides in pixels (in y and x directions
respectively). For example, setting anchor_strides=[(25, 25), (50, 50)]
means that we want the anchors corresponding to the first layer to be
strided by 25 pixels and those in the second layer to be strided by 50
pixels in both y and x directions. If anchor_strides=None, they are set to
be the reciprocal of the corresponding feature map shapes.
anchor_offsets: list of pairs of offsets in pixels (in y and x directions
respectively). The offset specifies where we want the center of the
(0, 0)-th anchor to lie for each layer. For example, setting
anchor_offsets=[(10, 10), (20, 20)]) means that we want the
(0, 0)-th anchor of the first layer to lie at (10, 10) in pixel space
and likewise that we want the (0, 0)-th anchor of the second layer to lie
at (25, 25) in pixel space. If anchor_offsets=None, then they are set to
be half of the corresponding anchor stride.
reduce_boxes_in_lowest_layer: a boolean to indicate whether the fixed 3
boxes per location is used in the lowest layer.
Returns:
a MultipleGridAnchorGenerator
"""
if base_anchor_size is None:
base_anchor_size = [1.0, 1.0]
base_anchor_size = tf.constant(base_anchor_size, dtype=tf.float32)
box_specs_list = []
if scales is None or not scales:
scales = [min_scale + (max_scale - min_scale) * i / (num_layers - 1)
for i in range(num_layers)] + [1.0]
else:
# Add 1.0 to the end, which will only be used in scale_next below and used
# for computing an interpolated scale for the largest scale in the list.
scales += [1.0]
for layer, scale, scale_next in zip(
range(num_layers), scales[:-1], scales[1:]):
layer_box_specs = []
if layer == 0 and reduce_boxes_in_lowest_layer:
layer_box_specs = [(0.1, 1.0), (scale, 2.0), (scale, 0.5)]
else:
for aspect_ratio in aspect_ratios:
layer_box_specs.append((scale, aspect_ratio))
# Add one more anchor, with a scale between the current scale, and the
# scale for the next layer, with a specified aspect ratio (1.0 by
# default).
if interpolated_scale_aspect_ratio > 0.0:
layer_box_specs.append((np.sqrt(scale*scale_next),
interpolated_scale_aspect_ratio))
box_specs_list.append(layer_box_specs)
return MultipleGridAnchorGenerator(box_specs_list, base_anchor_size,
anchor_strides, anchor_offsets)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for anchor_generators.multiple_grid_anchor_generator_test.py."""
import numpy as np
import tensorflow as tf
from object_detection.anchor_generators import multiple_grid_anchor_generator as ag
from object_detection.utils import test_case
class MultipleGridAnchorGeneratorTest(test_case.TestCase):
def test_construct_single_anchor_grid(self):
"""Builds a 1x1 anchor grid to test the size of the output boxes."""
def graph_fn():
box_specs_list = [[(.5, .25), (1.0, .25), (2.0, .25),
(.5, 1.0), (1.0, 1.0), (2.0, 1.0),
(.5, 4.0), (1.0, 4.0), (2.0, 4.0)]]
anchor_generator = ag.MultipleGridAnchorGenerator(
box_specs_list,
base_anchor_size=tf.constant([256, 256], dtype=tf.float32),
anchor_strides=[(16, 16)],
anchor_offsets=[(7, -3)])
anchors_list = anchor_generator.generate(feature_map_shape_list=[(1, 1)])
return anchors_list[0].get()
exp_anchor_corners = [[-121, -35, 135, 29], [-249, -67, 263, 61],
[-505, -131, 519, 125], [-57, -67, 71, 61],
[-121, -131, 135, 125], [-249, -259, 263, 253],
[-25, -131, 39, 125], [-57, -259, 71, 253],
[-121, -515, 135, 509]]
anchor_corners_out = self.execute(graph_fn, [])
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_anchor_grid(self):
def graph_fn():
box_specs_list = [[(0.5, 1.0), (1.0, 1.0), (2.0, 1.0)]]
anchor_generator = ag.MultipleGridAnchorGenerator(
box_specs_list,
base_anchor_size=tf.constant([10, 10], dtype=tf.float32),
anchor_strides=[(19, 19)],
anchor_offsets=[(0, 0)])
anchors_list = anchor_generator.generate(feature_map_shape_list=[(2, 2)])
return anchors_list[0].get()
exp_anchor_corners = [[-2.5, -2.5, 2.5, 2.5], [-5., -5., 5., 5.],
[-10., -10., 10., 10.], [-2.5, 16.5, 2.5, 21.5],
[-5., 14., 5, 24], [-10., 9., 10, 29],
[16.5, -2.5, 21.5, 2.5], [14., -5., 24, 5],
[9., -10., 29, 10], [16.5, 16.5, 21.5, 21.5],
[14., 14., 24, 24], [9., 9., 29, 29]]
anchor_corners_out = self.execute(graph_fn, [])
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_anchor_grid_non_square(self):
def graph_fn():
box_specs_list = [[(1.0, 1.0)]]
anchor_generator = ag.MultipleGridAnchorGenerator(
box_specs_list, base_anchor_size=tf.constant([1, 1],
dtype=tf.float32))
anchors_list = anchor_generator.generate(feature_map_shape_list=[(
tf.constant(1, dtype=tf.int32), tf.constant(2, dtype=tf.int32))])
return anchors_list[0].get()
exp_anchor_corners = [[0., -0.25, 1., 0.75], [0., 0.25, 1., 1.25]]
anchor_corners_out = self.execute(graph_fn, [])
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_dynamic_size_anchor_grid(self):
def graph_fn(height, width):
box_specs_list = [[(1.0, 1.0)]]
anchor_generator = ag.MultipleGridAnchorGenerator(
box_specs_list, base_anchor_size=tf.constant([1, 1],
dtype=tf.float32))
anchors_list = anchor_generator.generate(feature_map_shape_list=[(height,
width)])
return anchors_list[0].get()
exp_anchor_corners = [[0., -0.25, 1., 0.75], [0., 0.25, 1., 1.25]]
anchor_corners_out = self.execute_cpu(graph_fn,
[np.array(1, dtype=np.int32),
np.array(2, dtype=np.int32)])
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_anchor_grid_normalized(self):
def graph_fn():
box_specs_list = [[(1.0, 1.0)]]
anchor_generator = ag.MultipleGridAnchorGenerator(
box_specs_list, base_anchor_size=tf.constant([1, 1],
dtype=tf.float32))
anchors_list = anchor_generator.generate(
feature_map_shape_list=[(tf.constant(1, dtype=tf.int32), tf.constant(
2, dtype=tf.int32))],
im_height=320,
im_width=640)
return anchors_list[0].get()
exp_anchor_corners = [[0., 0., 1., 0.5], [0., 0.5, 1., 1.]]
anchor_corners_out = self.execute(graph_fn, [])
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_multiple_grids(self):
def graph_fn():
box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
[(1.0, 1.0), (1.0, 0.5)]]
anchor_generator = ag.MultipleGridAnchorGenerator(
box_specs_list,
base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
anchor_strides=[(.25, .25), (.5, .5)],
anchor_offsets=[(.125, .125), (.25, .25)])
anchors_list = anchor_generator.generate(feature_map_shape_list=[(4, 4), (
2, 2)])
return [anchors.get() for anchors in anchors_list]
# height and width of box with .5 aspect ratio
h = np.sqrt(2)
w = 1.0/np.sqrt(2)
exp_small_grid_corners = [[-.25, -.25, .75, .75],
[.25-.5*h, .25-.5*w, .25+.5*h, .25+.5*w],
[-.25, .25, .75, 1.25],
[.25-.5*h, .75-.5*w, .25+.5*h, .75+.5*w],
[.25, -.25, 1.25, .75],
[.75-.5*h, .25-.5*w, .75+.5*h, .25+.5*w],
[.25, .25, 1.25, 1.25],
[.75-.5*h, .75-.5*w, .75+.5*h, .75+.5*w]]
# only test first entry of larger set of anchors
exp_big_grid_corners = [[.125-.5, .125-.5, .125+.5, .125+.5],
[.125-1.0, .125-1.0, .125+1.0, .125+1.0],
[.125-.5*h, .125-.5*w, .125+.5*h, .125+.5*w],]
anchor_corners_out = np.concatenate(self.execute(graph_fn, []), axis=0)
self.assertEquals(anchor_corners_out.shape, (56, 4))
big_grid_corners = anchor_corners_out[0:3, :]
small_grid_corners = anchor_corners_out[48:, :]
self.assertAllClose(small_grid_corners, exp_small_grid_corners)
self.assertAllClose(big_grid_corners, exp_big_grid_corners)
def test_construct_multiple_grids_with_clipping(self):
def graph_fn():
box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
[(1.0, 1.0), (1.0, 0.5)]]
clip_window = tf.constant([0, 0, 1, 1], dtype=tf.float32)
anchor_generator = ag.MultipleGridAnchorGenerator(
box_specs_list,
base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
clip_window=clip_window)
anchors_list = anchor_generator.generate(feature_map_shape_list=[(4, 4), (
2, 2)])
return [anchors.get() for anchors in anchors_list]
# height and width of box with .5 aspect ratio
h = np.sqrt(2)
w = 1.0/np.sqrt(2)
exp_small_grid_corners = [[0, 0, .75, .75],
[0, 0, .25+.5*h, .25+.5*w],
[0, .25, .75, 1],
[0, .75-.5*w, .25+.5*h, 1],
[.25, 0, 1, .75],
[.75-.5*h, 0, 1, .25+.5*w],
[.25, .25, 1, 1],
[.75-.5*h, .75-.5*w, 1, 1]]
anchor_corners_out = np.concatenate(self.execute(graph_fn, []), axis=0)
small_grid_corners = anchor_corners_out[48:, :]
self.assertAllClose(small_grid_corners, exp_small_grid_corners)
def test_invalid_box_specs(self):
# not all box specs are pairs
box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
[(1.0, 1.0), (1.0, 0.5, .3)]]
with self.assertRaises(ValueError):
ag.MultipleGridAnchorGenerator(box_specs_list)
# box_specs_list is not a list of lists
box_specs_list = [(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)]
with self.assertRaises(ValueError):
ag.MultipleGridAnchorGenerator(box_specs_list)
def test_invalid_generate_arguments(self):
box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
[(1.0, 1.0), (1.0, 0.5)]]
# incompatible lengths with box_specs_list
with self.assertRaises(ValueError):
anchor_generator = ag.MultipleGridAnchorGenerator(
box_specs_list,
base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
anchor_strides=[(.25, .25)],
anchor_offsets=[(.125, .125), (.25, .25)])
anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)])
with self.assertRaises(ValueError):
anchor_generator = ag.MultipleGridAnchorGenerator(
box_specs_list,
base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
anchor_strides=[(.25, .25), (.5, .5)],
anchor_offsets=[(.125, .125), (.25, .25)])
anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2), (1, 1)])
with self.assertRaises(ValueError):
anchor_generator = ag.MultipleGridAnchorGenerator(
box_specs_list,
base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
anchor_strides=[(.5, .5)],
anchor_offsets=[(.25, .25)])
anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)])
# not pairs
with self.assertRaises(ValueError):
anchor_generator = ag.MultipleGridAnchorGenerator(
box_specs_list,
base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
anchor_strides=[(.25, .25), (.5, .5)],
anchor_offsets=[(.125, .125), (.25, .25)])
anchor_generator.generate(feature_map_shape_list=[(4, 4, 4), (2, 2)])
with self.assertRaises(ValueError):
anchor_generator = ag.MultipleGridAnchorGenerator(
box_specs_list,
base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
anchor_strides=[(.25, .25, .1), (.5, .5)],
anchor_offsets=[(.125, .125), (.25, .25)])
anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)])
with self.assertRaises(ValueError):
anchor_generator = ag.MultipleGridAnchorGenerator(
box_specs_list,
base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
anchor_strides=[(.25, .25), (.5, .5)],
anchor_offsets=[(.125, .125), (.25, .25)])
anchor_generator.generate(feature_map_shape_list=[(4), (2, 2)])
class CreateSSDAnchorsTest(test_case.TestCase):
def test_create_ssd_anchors_returns_correct_shape(self):
def graph_fn1():
anchor_generator = ag.create_ssd_anchors(
num_layers=6,
min_scale=0.2,
max_scale=0.95,
aspect_ratios=(1.0, 2.0, 3.0, 1.0 / 2, 1.0 / 3),
reduce_boxes_in_lowest_layer=True)
feature_map_shape_list = [(38, 38), (19, 19), (10, 10),
(5, 5), (3, 3), (1, 1)]
anchors_list = anchor_generator.generate(
feature_map_shape_list=feature_map_shape_list)
return [anchors.get() for anchors in anchors_list]
anchor_corners_out = np.concatenate(self.execute(graph_fn1, []), axis=0)
self.assertEquals(anchor_corners_out.shape, (7308, 4))
def graph_fn2():
anchor_generator = ag.create_ssd_anchors(
num_layers=6, min_scale=0.2, max_scale=0.95,
aspect_ratios=(1.0, 2.0, 3.0, 1.0/2, 1.0/3),
reduce_boxes_in_lowest_layer=False)
feature_map_shape_list = [(38, 38), (19, 19), (10, 10),
(5, 5), (3, 3), (1, 1)]
anchors_list = anchor_generator.generate(
feature_map_shape_list=feature_map_shape_list)
return [anchors.get() for anchors in anchors_list]
anchor_corners_out = np.concatenate(self.execute(graph_fn2, []), axis=0)
self.assertEquals(anchor_corners_out.shape, (11640, 4))
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Generates grid anchors on the fly corresponding to multiple CNN layers.
Generates grid anchors on the fly corresponding to multiple CNN layers as
described in:
"Focal Loss for Dense Object Detection" (https://arxiv.org/abs/1708.02002)
T.-Y. Lin, P. Goyal, R. Girshick, K. He, P. Dollar
"""
from object_detection.anchor_generators import grid_anchor_generator
from object_detection.core import anchor_generator
from object_detection.core import box_list_ops
class MultiscaleGridAnchorGenerator(anchor_generator.AnchorGenerator):
"""Generate a grid of anchors for multiple CNN layers of different scale."""
def __init__(self, min_level, max_level, anchor_scale, aspect_ratios,
scales_per_octave, normalize_coordinates=True):
"""Constructs a MultiscaleGridAnchorGenerator.
To construct anchors, at multiple scale resolutions, one must provide a
the minimum level and maximum levels on a scale pyramid. To define the size
of anchor, the anchor scale is provided to decide the size relatively to the
stride of the corresponding feature map. The generator allows one pixel
location on feature map maps to multiple anchors, that have different aspect
ratios and intermediate scales.
Args:
min_level: minimum level in feature pyramid.
max_level: maximum level in feature pyramid.
anchor_scale: anchor scale and feature stride define the size of the base
anchor on an image. For example, given a feature pyramid with strides
[2^3, ..., 2^7] and anchor scale 4. The base anchor size is
4 * [2^3, ..., 2^7].
aspect_ratios: list or tuple of (float) aspect ratios to place on each
grid point.
scales_per_octave: integer number of intermediate scales per scale octave.
normalize_coordinates: whether to produce anchors in normalized
coordinates. (defaults to True).
"""
self._anchor_grid_info = []
self._aspect_ratios = aspect_ratios
self._scales_per_octave = scales_per_octave
self._normalize_coordinates = normalize_coordinates
for level in range(min_level, max_level + 1):
anchor_stride = [2**level, 2**level]
scales = []
aspects = []
for scale in range(scales_per_octave):
scales.append(2**(float(scale) / scales_per_octave))
for aspect_ratio in aspect_ratios:
aspects.append(aspect_ratio)
base_anchor_size = [2**level * anchor_scale, 2**level * anchor_scale]
self._anchor_grid_info.append({
'level': level,
'info': [scales, aspects, base_anchor_size, anchor_stride]
})
def name_scope(self):
return 'MultiscaleGridAnchorGenerator'
def num_anchors_per_location(self):
"""Returns the number of anchors per spatial location.
Returns:
a list of integers, one for each expected feature map to be passed to
the Generate function.
"""
return len(self._anchor_grid_info) * [
len(self._aspect_ratios) * self._scales_per_octave]
def _generate(self, feature_map_shape_list, im_height, im_width):
"""Generates a collection of bounding boxes to be used as anchors.
Currently we require the input image shape to be statically defined. That
is, im_height and im_width should be integers rather than tensors.
Args:
feature_map_shape_list: list of pairs of convnet layer resolutions in the
format [(height_0, width_0), (height_1, width_1), ...]. For example,
setting feature_map_shape_list=[(8, 8), (7, 7)] asks for anchors that
correspond to an 8x8 layer followed by a 7x7 layer.
im_height: the height of the image to generate the grid for.
im_width: the width of the image to generate the grid for.
Returns:
boxes_list: a list of BoxLists each holding anchor boxes corresponding to
the input feature map shapes.
Raises:
ValueError: if im_height and im_width are not integers.
"""
if not isinstance(im_height, int) or not isinstance(im_width, int):
raise ValueError('MultiscaleGridAnchorGenerator currently requires '
'input image shape to be statically defined.')
anchor_grid_list = []
for feat_shape, grid_info in zip(feature_map_shape_list,
self._anchor_grid_info):
# TODO(rathodv) check the feature_map_shape_list is consistent with
# self._anchor_grid_info
level = grid_info['level']
stride = 2**level
scales, aspect_ratios, base_anchor_size, anchor_stride = grid_info['info']
feat_h = feat_shape[0]
feat_w = feat_shape[1]
anchor_offset = [0, 0]
if im_height % 2.0**level == 0:
anchor_offset[0] = stride / 2.0
if im_width % 2.0**level == 0:
anchor_offset[1] = stride / 2.0
ag = grid_anchor_generator.GridAnchorGenerator(
scales,
aspect_ratios,
base_anchor_size=base_anchor_size,
anchor_stride=anchor_stride,
anchor_offset=anchor_offset)
(anchor_grid,) = ag.generate(feature_map_shape_list=[(feat_h, feat_w)])
if self._normalize_coordinates:
anchor_grid = box_list_ops.to_normalized_coordinates(
anchor_grid, im_height, im_width, check_range=False)
anchor_grid_list.append(anchor_grid)
return anchor_grid_list
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for anchor_generators.multiscale_grid_anchor_generator_test.py."""
import numpy as np
import tensorflow as tf
from object_detection.anchor_generators import multiscale_grid_anchor_generator as mg
from object_detection.utils import test_case
class MultiscaleGridAnchorGeneratorTest(test_case.TestCase):
def test_construct_single_anchor(self):
min_level = 5
max_level = 5
anchor_scale = 4.0
aspect_ratios = [1.0]
scales_per_octave = 1
im_height = 64
im_width = 64
feature_map_shape_list = [(2, 2)]
exp_anchor_corners = [[-48, -48, 80, 80],
[-48, -16, 80, 112],
[-16, -48, 112, 80],
[-16, -16, 112, 112]]
anchor_generator = mg.MultiscaleGridAnchorGenerator(
min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
normalize_coordinates=False)
anchors_list = anchor_generator.generate(
feature_map_shape_list, im_height=im_height, im_width=im_width)
anchor_corners = anchors_list[0].get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_single_anchor_in_normalized_coordinates(self):
min_level = 5
max_level = 5
anchor_scale = 4.0
aspect_ratios = [1.0]
scales_per_octave = 1
im_height = 64
im_width = 128
feature_map_shape_list = [(2, 2)]
exp_anchor_corners = [[-48./64, -48./128, 80./64, 80./128],
[-48./64, -16./128, 80./64, 112./128],
[-16./64, -48./128, 112./64, 80./128],
[-16./64, -16./128, 112./64, 112./128]]
anchor_generator = mg.MultiscaleGridAnchorGenerator(
min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
normalize_coordinates=True)
anchors_list = anchor_generator.generate(
feature_map_shape_list, im_height=im_height, im_width=im_width)
anchor_corners = anchors_list[0].get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_num_anchors_per_location(self):
min_level = 5
max_level = 6
anchor_scale = 4.0
aspect_ratios = [1.0, 2.0]
scales_per_octave = 3
anchor_generator = mg.MultiscaleGridAnchorGenerator(
min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
normalize_coordinates=False)
self.assertEqual(anchor_generator.num_anchors_per_location(), [6, 6])
def test_construct_single_anchor_fails_with_tensor_image_size(self):
min_level = 5
max_level = 5
anchor_scale = 4.0
aspect_ratios = [1.0]
scales_per_octave = 1
im_height = tf.constant(64)
im_width = tf.constant(64)
feature_map_shape_list = [(2, 2)]
anchor_generator = mg.MultiscaleGridAnchorGenerator(
min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
normalize_coordinates=False)
with self.assertRaises(ValueError):
anchor_generator.generate(
feature_map_shape_list, im_height=im_height, im_width=im_width)
def test_construct_single_anchor_with_odd_input_dimension(self):
def graph_fn():
min_level = 5
max_level = 5
anchor_scale = 4.0
aspect_ratios = [1.0]
scales_per_octave = 1
im_height = 65
im_width = 65
feature_map_shape_list = [(3, 3)]
anchor_generator = mg.MultiscaleGridAnchorGenerator(
min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
normalize_coordinates=False)
anchors_list = anchor_generator.generate(
feature_map_shape_list, im_height=im_height, im_width=im_width)
anchor_corners = anchors_list[0].get()
return (anchor_corners,)
anchor_corners_out = self.execute(graph_fn, [])
exp_anchor_corners = [[-64, -64, 64, 64],
[-64, -32, 64, 96],
[-64, 0, 64, 128],
[-32, -64, 96, 64],
[-32, -32, 96, 96],
[-32, 0, 96, 128],
[0, -64, 128, 64],
[0, -32, 128, 96],
[0, 0, 128, 128]]
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_single_anchor_on_two_feature_maps(self):
def graph_fn():
min_level = 5
max_level = 6
anchor_scale = 4.0
aspect_ratios = [1.0]
scales_per_octave = 1
im_height = 64
im_width = 64
feature_map_shape_list = [(2, 2), (1, 1)]
anchor_generator = mg.MultiscaleGridAnchorGenerator(
min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
normalize_coordinates=False)
anchors_list = anchor_generator.generate(feature_map_shape_list,
im_height=im_height,
im_width=im_width)
anchor_corners = [anchors.get() for anchors in anchors_list]
return anchor_corners
anchor_corners_out = np.concatenate(self.execute(graph_fn, []), axis=0)
exp_anchor_corners = [[-48, -48, 80, 80],
[-48, -16, 80, 112],
[-16, -48, 112, 80],
[-16, -16, 112, 112],
[-96, -96, 160, 160]]
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_single_anchor_with_two_scales_per_octave(self):
def graph_fn():
min_level = 6
max_level = 6
anchor_scale = 4.0
aspect_ratios = [1.0]
scales_per_octave = 2
im_height = 64
im_width = 64
feature_map_shape_list = [(1, 1)]
anchor_generator = mg.MultiscaleGridAnchorGenerator(
min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
normalize_coordinates=False)
anchors_list = anchor_generator.generate(feature_map_shape_list,
im_height=im_height,
im_width=im_width)
anchor_corners = [anchors.get() for anchors in anchors_list]
return anchor_corners
# There are 4 set of anchors in this configuration. The order is:
# [[2**0.0 intermediate scale + 1.0 aspect],
# [2**0.5 intermediate scale + 1.0 aspect]]
exp_anchor_corners = [[-96., -96., 160., 160.],
[-149.0193, -149.0193, 213.0193, 213.0193]]
anchor_corners_out = self.execute(graph_fn, [])
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_single_anchor_with_two_scales_per_octave_and_aspect(self):
def graph_fn():
min_level = 6
max_level = 6
anchor_scale = 4.0
aspect_ratios = [1.0, 2.0]
scales_per_octave = 2
im_height = 64
im_width = 64
feature_map_shape_list = [(1, 1)]
anchor_generator = mg.MultiscaleGridAnchorGenerator(
min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
normalize_coordinates=False)
anchors_list = anchor_generator.generate(feature_map_shape_list,
im_height=im_height,
im_width=im_width)
anchor_corners = [anchors.get() for anchors in anchors_list]
return anchor_corners
# There are 4 set of anchors in this configuration. The order is:
# [[2**0.0 intermediate scale + 1.0 aspect],
# [2**0.5 intermediate scale + 1.0 aspect],
# [2**0.0 intermediate scale + 2.0 aspect],
# [2**0.5 intermediate scale + 2.0 aspect]]
exp_anchor_corners = [[-96., -96., 160., 160.],
[-149.0193, -149.0193, 213.0193, 213.0193],
[-58.50967, -149.0193, 122.50967, 213.0193],
[-96., -224., 160., 288.]]
anchor_corners_out = self.execute(graph_fn, [])
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_single_anchors_on_feature_maps_with_dynamic_shape(self):
def graph_fn(feature_map1_height, feature_map1_width, feature_map2_height,
feature_map2_width):
min_level = 5
max_level = 6
anchor_scale = 4.0
aspect_ratios = [1.0]
scales_per_octave = 1
im_height = 64
im_width = 64
feature_map_shape_list = [(feature_map1_height, feature_map1_width),
(feature_map2_height, feature_map2_width)]
anchor_generator = mg.MultiscaleGridAnchorGenerator(
min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
normalize_coordinates=False)
anchors_list = anchor_generator.generate(feature_map_shape_list,
im_height=im_height,
im_width=im_width)
anchor_corners = [anchors.get() for anchors in anchors_list]
return anchor_corners
anchor_corners_out = np.concatenate(
self.execute_cpu(graph_fn, [
np.array(2, dtype=np.int32),
np.array(2, dtype=np.int32),
np.array(1, dtype=np.int32),
np.array(1, dtype=np.int32)
]),
axis=0)
exp_anchor_corners = [[-48, -48, 80, 80],
[-48, -16, 80, 112],
[-16, -48, 112, 80],
[-16, -16, 112, 112],
[-96, -96, 160, 160]]
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Faster RCNN box coder.
Faster RCNN box coder follows the coding schema described below:
ty = (y - ya) / ha
tx = (x - xa) / wa
th = log(h / ha)
tw = log(w / wa)
where x, y, w, h denote the box's center coordinates, width and height
respectively. Similarly, xa, ya, wa, ha denote the anchor's center
coordinates, width and height. tx, ty, tw and th denote the anchor-encoded
center, width and height respectively.
See http://arxiv.org/abs/1506.01497 for details.
"""
import tensorflow as tf
from object_detection.core import box_coder
from object_detection.core import box_list
EPSILON = 1e-8
class FasterRcnnBoxCoder(box_coder.BoxCoder):
"""Faster RCNN box coder."""
def __init__(self, scale_factors=None):
"""Constructor for FasterRcnnBoxCoder.
Args:
scale_factors: List of 4 positive scalars to scale ty, tx, th and tw.
If set to None, does not perform scaling. For Faster RCNN,
the open-source implementation recommends using [10.0, 10.0, 5.0, 5.0].
"""
if scale_factors:
assert len(scale_factors) == 4
for scalar in scale_factors:
assert scalar > 0
self._scale_factors = scale_factors
@property
def code_size(self):
return 4
def _encode(self, boxes, anchors):
"""Encode a box collection with respect to anchor collection.
Args:
boxes: BoxList holding N boxes to be encoded.
anchors: BoxList of anchors.
Returns:
a tensor representing N anchor-encoded boxes of the format
[ty, tx, th, tw].
"""
# Convert anchors to the center coordinate representation.
ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
# Avoid NaN in division and log below.
ha += EPSILON
wa += EPSILON
h += EPSILON
w += EPSILON
tx = (xcenter - xcenter_a) / wa
ty = (ycenter - ycenter_a) / ha
tw = tf.log(w / wa)
th = tf.log(h / ha)
# Scales location targets as used in paper for joint training.
if self._scale_factors:
ty *= self._scale_factors[0]
tx *= self._scale_factors[1]
th *= self._scale_factors[2]
tw *= self._scale_factors[3]
return tf.transpose(tf.stack([ty, tx, th, tw]))
def _decode(self, rel_codes, anchors):
"""Decode relative codes to boxes.
Args:
rel_codes: a tensor representing N anchor-encoded boxes.
anchors: BoxList of anchors.
Returns:
boxes: BoxList holding N bounding boxes.
"""
ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
ty, tx, th, tw = tf.unstack(tf.transpose(rel_codes))
if self._scale_factors:
ty /= self._scale_factors[0]
tx /= self._scale_factors[1]
th /= self._scale_factors[2]
tw /= self._scale_factors[3]
w = tf.exp(tw) * wa
h = tf.exp(th) * ha
ycenter = ty * ha + ycenter_a
xcenter = tx * wa + xcenter_a
ymin = ycenter - h / 2.
xmin = xcenter - w / 2.
ymax = ycenter + h / 2.
xmax = xcenter + w / 2.
return box_list.BoxList(tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.box_coder.faster_rcnn_box_coder."""
import tensorflow as tf
from object_detection.box_coders import faster_rcnn_box_coder
from object_detection.core import box_list
class FasterRcnnBoxCoderTest(tf.test.TestCase):
def test_get_correct_relative_codes_after_encoding(self):
boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
expected_rel_codes = [[-0.5, -0.416666, -0.405465, -0.182321],
[-0.083333, -0.222222, -0.693147, -1.098612]]
boxes = box_list.BoxList(tf.constant(boxes))
anchors = box_list.BoxList(tf.constant(anchors))
coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
rel_codes_out, = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
def test_get_correct_relative_codes_after_encoding_with_scaling(self):
boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
scale_factors = [2, 3, 4, 5]
expected_rel_codes = [[-1., -1.25, -1.62186, -0.911608],
[-0.166667, -0.666667, -2.772588, -5.493062]]
boxes = box_list.BoxList(tf.constant(boxes))
anchors = box_list.BoxList(tf.constant(anchors))
coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
scale_factors=scale_factors)
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
rel_codes_out, = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
def test_get_correct_boxes_after_decoding(self):
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
rel_codes = [[-0.5, -0.416666, -0.405465, -0.182321],
[-0.083333, -0.222222, -0.693147, -1.098612]]
expected_boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
anchors = box_list.BoxList(tf.constant(anchors))
coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
boxes = coder.decode(rel_codes, anchors)
with self.test_session() as sess:
boxes_out, = sess.run([boxes.get()])
self.assertAllClose(boxes_out, expected_boxes)
def test_get_correct_boxes_after_decoding_with_scaling(self):
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
rel_codes = [[-1., -1.25, -1.62186, -0.911608],
[-0.166667, -0.666667, -2.772588, -5.493062]]
scale_factors = [2, 3, 4, 5]
expected_boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
anchors = box_list.BoxList(tf.constant(anchors))
coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
scale_factors=scale_factors)
boxes = coder.decode(rel_codes, anchors)
with self.test_session() as sess:
boxes_out, = sess.run([boxes.get()])
self.assertAllClose(boxes_out, expected_boxes)
def test_very_small_Width_nan_after_encoding(self):
boxes = [[10.0, 10.0, 10.0000001, 20.0]]
anchors = [[15.0, 12.0, 30.0, 18.0]]
expected_rel_codes = [[-0.833333, 0., -21.128731, 0.510826]]
boxes = box_list.BoxList(tf.constant(boxes))
anchors = box_list.BoxList(tf.constant(anchors))
coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
rel_codes_out, = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Keypoint box coder.
The keypoint box coder follows the coding schema described below (this is
similar to the FasterRcnnBoxCoder, except that it encodes keypoints in addition
to box coordinates):
ty = (y - ya) / ha
tx = (x - xa) / wa
th = log(h / ha)
tw = log(w / wa)
tky0 = (ky0 - ya) / ha
tkx0 = (kx0 - xa) / wa
tky1 = (ky1 - ya) / ha
tkx1 = (kx1 - xa) / wa
...
where x, y, w, h denote the box's center coordinates, width and height
respectively. Similarly, xa, ya, wa, ha denote the anchor's center
coordinates, width and height. tx, ty, tw and th denote the anchor-encoded
center, width and height respectively. ky0, kx0, ky1, kx1, ... denote the
keypoints' coordinates, and tky0, tkx0, tky1, tkx1, ... denote the
anchor-encoded keypoint coordinates.
"""
import tensorflow as tf
from object_detection.core import box_coder
from object_detection.core import box_list
from object_detection.core import standard_fields as fields
EPSILON = 1e-8
class KeypointBoxCoder(box_coder.BoxCoder):
"""Keypoint box coder."""
def __init__(self, num_keypoints, scale_factors=None):
"""Constructor for KeypointBoxCoder.
Args:
num_keypoints: Number of keypoints to encode/decode.
scale_factors: List of 4 positive scalars to scale ty, tx, th and tw.
In addition to scaling ty and tx, the first 2 scalars are used to scale
the y and x coordinates of the keypoints as well. If set to None, does
not perform scaling.
"""
self._num_keypoints = num_keypoints
if scale_factors:
assert len(scale_factors) == 4
for scalar in scale_factors:
assert scalar > 0
self._scale_factors = scale_factors
self._keypoint_scale_factors = None
if scale_factors is not None:
self._keypoint_scale_factors = tf.expand_dims(tf.tile(
[tf.to_float(scale_factors[0]), tf.to_float(scale_factors[1])],
[num_keypoints]), 1)
@property
def code_size(self):
return 4 + self._num_keypoints * 2
def _encode(self, boxes, anchors):
"""Encode a box and keypoint collection with respect to anchor collection.
Args:
boxes: BoxList holding N boxes and keypoints to be encoded. Boxes are
tensors with the shape [N, 4], and keypoints are tensors with the shape
[N, num_keypoints, 2].
anchors: BoxList of anchors.
Returns:
a tensor representing N anchor-encoded boxes of the format
[ty, tx, th, tw, tky0, tkx0, tky1, tkx1, ...] where tky0 and tkx0
represent the y and x coordinates of the first keypoint, tky1 and tkx1
represent the y and x coordinates of the second keypoint, and so on.
"""
# Convert anchors to the center coordinate representation.
ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
keypoints = boxes.get_field(fields.BoxListFields.keypoints)
keypoints = tf.transpose(tf.reshape(keypoints,
[-1, self._num_keypoints * 2]))
num_boxes = boxes.num_boxes()
# Avoid NaN in division and log below.
ha += EPSILON
wa += EPSILON
h += EPSILON
w += EPSILON
tx = (xcenter - xcenter_a) / wa
ty = (ycenter - ycenter_a) / ha
tw = tf.log(w / wa)
th = tf.log(h / ha)
tiled_anchor_centers = tf.tile(
tf.stack([ycenter_a, xcenter_a]), [self._num_keypoints, 1])
tiled_anchor_sizes = tf.tile(
tf.stack([ha, wa]), [self._num_keypoints, 1])
tkeypoints = (keypoints - tiled_anchor_centers) / tiled_anchor_sizes
# Scales location targets as used in paper for joint training.
if self._scale_factors:
ty *= self._scale_factors[0]
tx *= self._scale_factors[1]
th *= self._scale_factors[2]
tw *= self._scale_factors[3]
tkeypoints *= tf.tile(self._keypoint_scale_factors, [1, num_boxes])
tboxes = tf.stack([ty, tx, th, tw])
return tf.transpose(tf.concat([tboxes, tkeypoints], 0))
def _decode(self, rel_codes, anchors):
"""Decode relative codes to boxes and keypoints.
Args:
rel_codes: a tensor with shape [N, 4 + 2 * num_keypoints] representing N
anchor-encoded boxes and keypoints
anchors: BoxList of anchors.
Returns:
boxes: BoxList holding N bounding boxes and keypoints.
"""
ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
num_codes = tf.shape(rel_codes)[0]
result = tf.unstack(tf.transpose(rel_codes))
ty, tx, th, tw = result[:4]
tkeypoints = result[4:]
if self._scale_factors:
ty /= self._scale_factors[0]
tx /= self._scale_factors[1]
th /= self._scale_factors[2]
tw /= self._scale_factors[3]
tkeypoints /= tf.tile(self._keypoint_scale_factors, [1, num_codes])
w = tf.exp(tw) * wa
h = tf.exp(th) * ha
ycenter = ty * ha + ycenter_a
xcenter = tx * wa + xcenter_a
ymin = ycenter - h / 2.
xmin = xcenter - w / 2.
ymax = ycenter + h / 2.
xmax = xcenter + w / 2.
decoded_boxes_keypoints = box_list.BoxList(
tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
tiled_anchor_centers = tf.tile(
tf.stack([ycenter_a, xcenter_a]), [self._num_keypoints, 1])
tiled_anchor_sizes = tf.tile(
tf.stack([ha, wa]), [self._num_keypoints, 1])
keypoints = tkeypoints * tiled_anchor_sizes + tiled_anchor_centers
keypoints = tf.reshape(tf.transpose(keypoints),
[-1, self._num_keypoints, 2])
decoded_boxes_keypoints.add_field(fields.BoxListFields.keypoints, keypoints)
return decoded_boxes_keypoints
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.box_coder.keypoint_box_coder."""
import tensorflow as tf
from object_detection.box_coders import keypoint_box_coder
from object_detection.core import box_list
from object_detection.core import standard_fields as fields
class KeypointBoxCoderTest(tf.test.TestCase):
def test_get_correct_relative_codes_after_encoding(self):
boxes = [[10., 10., 20., 15.],
[0.2, 0.1, 0.5, 0.4]]
keypoints = [[[15., 12.], [10., 15.]],
[[0.5, 0.3], [0.2, 0.4]]]
num_keypoints = len(keypoints[0])
anchors = [[15., 12., 30., 18.],
[0.1, 0.0, 0.7, 0.9]]
expected_rel_codes = [
[-0.5, -0.416666, -0.405465, -0.182321,
-0.5, -0.5, -0.833333, 0.],
[-0.083333, -0.222222, -0.693147, -1.098612,
0.166667, -0.166667, -0.333333, -0.055556]
]
boxes = box_list.BoxList(tf.constant(boxes))
boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints))
anchors = box_list.BoxList(tf.constant(anchors))
coder = keypoint_box_coder.KeypointBoxCoder(num_keypoints)
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
rel_codes_out, = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
def test_get_correct_relative_codes_after_encoding_with_scaling(self):
boxes = [[10., 10., 20., 15.],
[0.2, 0.1, 0.5, 0.4]]
keypoints = [[[15., 12.], [10., 15.]],
[[0.5, 0.3], [0.2, 0.4]]]
num_keypoints = len(keypoints[0])
anchors = [[15., 12., 30., 18.],
[0.1, 0.0, 0.7, 0.9]]
scale_factors = [2, 3, 4, 5]
expected_rel_codes = [
[-1., -1.25, -1.62186, -0.911608,
-1.0, -1.5, -1.666667, 0.],
[-0.166667, -0.666667, -2.772588, -5.493062,
0.333333, -0.5, -0.666667, -0.166667]
]
boxes = box_list.BoxList(tf.constant(boxes))
boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints))
anchors = box_list.BoxList(tf.constant(anchors))
coder = keypoint_box_coder.KeypointBoxCoder(
num_keypoints, scale_factors=scale_factors)
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
rel_codes_out, = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
def test_get_correct_boxes_after_decoding(self):
anchors = [[15., 12., 30., 18.],
[0.1, 0.0, 0.7, 0.9]]
rel_codes = [
[-0.5, -0.416666, -0.405465, -0.182321,
-0.5, -0.5, -0.833333, 0.],
[-0.083333, -0.222222, -0.693147, -1.098612,
0.166667, -0.166667, -0.333333, -0.055556]
]
expected_boxes = [[10., 10., 20., 15.],
[0.2, 0.1, 0.5, 0.4]]
expected_keypoints = [[[15., 12.], [10., 15.]],
[[0.5, 0.3], [0.2, 0.4]]]
num_keypoints = len(expected_keypoints[0])
anchors = box_list.BoxList(tf.constant(anchors))
coder = keypoint_box_coder.KeypointBoxCoder(num_keypoints)
boxes = coder.decode(rel_codes, anchors)
with self.test_session() as sess:
boxes_out, keypoints_out = sess.run(
[boxes.get(), boxes.get_field(fields.BoxListFields.keypoints)])
self.assertAllClose(boxes_out, expected_boxes)
self.assertAllClose(keypoints_out, expected_keypoints)
def test_get_correct_boxes_after_decoding_with_scaling(self):
anchors = [[15., 12., 30., 18.],
[0.1, 0.0, 0.7, 0.9]]
rel_codes = [
[-1., -1.25, -1.62186, -0.911608,
-1.0, -1.5, -1.666667, 0.],
[-0.166667, -0.666667, -2.772588, -5.493062,
0.333333, -0.5, -0.666667, -0.166667]
]
scale_factors = [2, 3, 4, 5]
expected_boxes = [[10., 10., 20., 15.],
[0.2, 0.1, 0.5, 0.4]]
expected_keypoints = [[[15., 12.], [10., 15.]],
[[0.5, 0.3], [0.2, 0.4]]]
num_keypoints = len(expected_keypoints[0])
anchors = box_list.BoxList(tf.constant(anchors))
coder = keypoint_box_coder.KeypointBoxCoder(
num_keypoints, scale_factors=scale_factors)
boxes = coder.decode(rel_codes, anchors)
with self.test_session() as sess:
boxes_out, keypoints_out = sess.run(
[boxes.get(), boxes.get_field(fields.BoxListFields.keypoints)])
self.assertAllClose(boxes_out, expected_boxes)
self.assertAllClose(keypoints_out, expected_keypoints)
def test_very_small_width_nan_after_encoding(self):
boxes = [[10., 10., 10.0000001, 20.]]
keypoints = [[[10., 10.], [10.0000001, 20.]]]
anchors = [[15., 12., 30., 18.]]
expected_rel_codes = [[-0.833333, 0., -21.128731, 0.510826,
-0.833333, -0.833333, -0.833333, 0.833333]]
boxes = box_list.BoxList(tf.constant(boxes))
boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints))
anchors = box_list.BoxList(tf.constant(anchors))
coder = keypoint_box_coder.KeypointBoxCoder(2)
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
rel_codes_out, = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Mean stddev box coder.
This box coder use the following coding schema to encode boxes:
rel_code = (box_corner - anchor_corner_mean) / anchor_corner_stddev.
"""
from object_detection.core import box_coder
from object_detection.core import box_list
class MeanStddevBoxCoder(box_coder.BoxCoder):
"""Mean stddev box coder."""
def __init__(self, stddev=0.01):
"""Constructor for MeanStddevBoxCoder.
Args:
stddev: The standard deviation used to encode and decode boxes.
"""
self._stddev = stddev
@property
def code_size(self):
return 4
def _encode(self, boxes, anchors):
"""Encode a box collection with respect to anchor collection.
Args:
boxes: BoxList holding N boxes to be encoded.
anchors: BoxList of N anchors.
Returns:
a tensor representing N anchor-encoded boxes
Raises:
ValueError: if the anchors still have deprecated stddev field.
"""
box_corners = boxes.get()
if anchors.has_field('stddev'):
raise ValueError("'stddev' is a parameter of MeanStddevBoxCoder and "
"should not be specified in the box list.")
means = anchors.get()
return (box_corners - means) / self._stddev
def _decode(self, rel_codes, anchors):
"""Decode.
Args:
rel_codes: a tensor representing N anchor-encoded boxes.
anchors: BoxList of anchors.
Returns:
boxes: BoxList holding N bounding boxes
Raises:
ValueError: if the anchors still have deprecated stddev field and expects
the decode method to use stddev value from that field.
"""
means = anchors.get()
if anchors.has_field('stddev'):
raise ValueError("'stddev' is a parameter of MeanStddevBoxCoder and "
"should not be specified in the box list.")
box_corners = rel_codes * self._stddev + means
return box_list.BoxList(box_corners)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment