Commit a4944a57 authored by derekjchow's avatar derekjchow Committed by Sergio Guadarrama
Browse files

Add Tensorflow Object Detection API. (#1561)

For details see our paper:
"Speed/accuracy trade-offs for modern convolutional object detectors."
Huang J, Rathod V, Sun C, Zhu M, Korattikara A, Fathi A, Fischer I,
Wojna Z, Song Y, Guadarrama S, Murphy K, CVPR 2017
https://arxiv.org/abs/1611.10012
parent 60c3ed2e
# Tensorflow Object Detection API: main runnables.
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"])
# Apache 2.0
py_binary(
name = "train",
srcs = [
"train.py",
],
deps = [
":trainer",
"//tensorflow",
"//tensorflow_models/object_detection/builders:input_reader_builder",
"//tensorflow_models/object_detection/builders:model_builder",
"//tensorflow_models/object_detection/protos:input_reader_py_pb2",
"//tensorflow_models/object_detection/protos:model_py_pb2",
"//tensorflow_models/object_detection/protos:pipeline_py_pb2",
"//tensorflow_models/object_detection/protos:train_py_pb2",
],
)
py_library(
name = "trainer",
srcs = ["trainer.py"],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection/builders:optimizer_builder",
"//tensorflow_models/object_detection/builders:preprocessor_builder",
"//tensorflow_models/object_detection/core:batcher",
"//tensorflow_models/object_detection/core:standard_fields",
"//tensorflow_models/object_detection/utils:ops",
"//tensorflow_models/object_detection/utils:variables_helper",
"//tensorflow_models/slim:model_deploy",
],
)
py_test(
name = "trainer_test",
srcs = ["trainer_test.py"],
deps = [
":trainer",
"//tensorflow",
"//tensorflow_models/object_detection/core:losses",
"//tensorflow_models/object_detection/core:model",
"//tensorflow_models/object_detection/core:standard_fields",
"//tensorflow_models/object_detection/protos:train_py_pb2",
],
)
py_library(
name = "eval_util",
srcs = [
"eval_util.py",
],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection/utils:label_map_util",
"//tensorflow_models/object_detection/utils:object_detection_evaluation",
"//tensorflow_models/object_detection/utils:visualization_utils",
],
)
py_library(
name = "evaluator",
srcs = ["evaluator.py"],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection:eval_util",
"//tensorflow_models/object_detection/core:box_list",
"//tensorflow_models/object_detection/core:box_list_ops",
"//tensorflow_models/object_detection/core:prefetcher",
"//tensorflow_models/object_detection/core:standard_fields",
"//tensorflow_models/object_detection/protos:eval_py_pb2",
],
)
py_binary(
name = "eval",
srcs = [
"eval.py",
],
deps = [
":evaluator",
"//tensorflow",
"//tensorflow_models/object_detection/builders:input_reader_builder",
"//tensorflow_models/object_detection/builders:model_builder",
"//tensorflow_models/object_detection/protos:eval_py_pb2",
"//tensorflow_models/object_detection/protos:input_reader_py_pb2",
"//tensorflow_models/object_detection/protos:model_py_pb2",
"//tensorflow_models/object_detection/protos:pipeline_py_pb2",
"//tensorflow_models/object_detection/utils:label_map_util",
],
)
py_library(
name = "exporter",
srcs = [
"exporter.py",
],
deps = [
"//tensorflow",
"//tensorflow/python/tools:freeze_graph_lib",
"//tensorflow_models/object_detection/builders:model_builder",
"//tensorflow_models/object_detection/core:standard_fields",
"//tensorflow_models/object_detection/data_decoders:tf_example_decoder",
],
)
py_test(
name = "exporter_test",
srcs = [
"exporter_test.py",
],
deps = [
":exporter",
"//tensorflow",
"//tensorflow_models/object_detection/builders:model_builder",
"//tensorflow_models/object_detection/core:model",
"//tensorflow_models/object_detection/protos:pipeline_py_pb2",
],
)
py_binary(
name = "export_inference_graph",
srcs = [
"export_inference_graph.py",
],
deps = [
":exporter",
"//tensorflow",
"//tensorflow_models/object_detection/protos:pipeline_py_pb2",
],
)
py_binary(
name = "create_pascal_tf_record",
srcs = [
"create_pascal_tf_record.py",
],
deps = [
"//third_party/py/PIL:pil",
"//third_party/py/lxml",
"//tensorflow",
"//tensorflow_models/object_detection/utils:dataset_util",
"//tensorflow_models/object_detection/utils:label_map_util",
],
)
py_test(
name = "create_pascal_tf_record_test",
srcs = [
"create_pascal_tf_record_test.py",
],
deps = [
":create_pascal_tf_record",
"//tensorflow",
],
)
py_binary(
name = "create_pet_tf_record",
srcs = [
"create_pet_tf_record.py",
],
deps = [
"//third_party/py/PIL:pil",
"//third_party/py/lxml",
"//tensorflow",
"//tensorflow_models/object_detection/utils:dataset_util",
"//tensorflow_models/object_detection/utils:label_map_util",
],
)
# Contributing to the Tensorflow Object Detection API
Patches to Tensorflow Object Detection API are welcome!
We require contributors to fill out either the individual or corporate
Contributor License Agreement (CLA).
* If you are an individual writing original source code and you're sure you own the intellectual property, then you'll need to sign an [individual CLA](http://code.google.com/legal/individual-cla-v1.0.html).
* If you work for a company that wants to allow you to contribute your work, then you'll need to sign a [corporate CLA](http://code.google.com/legal/corporate-cla-v1.0.html).
Please follow the
[Tensorflow contributing guidelines](https://github.com/tensorflow/tensorflow/blob/master/CONTRIBUTING.md)
when submitting pull requests.
# Tensorflow Object Detection API
Creating accurate machine learning models capable of localizing and identifying
multiple objects in a single image remains a core challenge in computer vision.
The TensorFlow Object Detection API is an open source framework built on top of
TensorFlow that makes it easy to construct, train and deploy object detection
models. At Google we’ve certainly found this codebase to be useful for our
computer vision needs, and we hope that you will as well.
<p align="center">
<img src="g3doc/img/kites_detections_output.jpg" width=676 height=450>
</p>
Contributions to the codebase are welcome and we would love to hear back from
you if you find this API useful. Finally if you use the Tensorflow Object
Detection API for a research publication, please consider citing:
```
"Speed/accuracy trade-offs for modern convolutional object detectors."
Huang J, Rathod V, Sun C, Zhu M, Korattikara A, Fathi A, Fischer I, Wojna Z,
Song Y, Guadarrama S, Murphy K, CVPR 2017
```
\[[link](https://arxiv.org/abs/1611.10012)\]\[[bibtex](
https://scholar.googleusercontent.com/scholar.bib?q=info:l291WsrB-hQJ:scholar.google.com/&output=citation&scisig=AAGBfm0AAAAAWUIIlnPZ_L9jxvPwcC49kDlELtaeIyU-&scisf=4&ct=citation&cd=-1&hl=en&scfhb=1)\]
## Maintainers
* Jonathan Huang, github: [jch1](https://github.com/jch1)
* Vivek Rathod, github: [tombstone](https://github.com/tombstone)
* Derek Chow, github: [derekjchow](https://github.com/derekjchow)
* Chen Sun, github: [jesu9](https://github.com/jesu9)
* Menglong Zhu, github: [dreamdragon](https://github.com/dreamdragon)
## Table of contents
Quick Start:
* <a href='object_detection_tutorial.ipynb'>
Quick Start: Jupyter notebook for off-the-shelf inference</a><br>
* <a href="g3doc/running_pets.md">Quick Start: Training on a pet detector</a><br>
Setup:
* <a href='g3doc/installation.md'>Installation</a><br>
* <a href='g3doc/configuring_jobs.md'>
Configuring an object detection pipeline</a><br>
* <a href='g3doc/preparing_inputs.md'>Preparing inputs</a><br>
Running:
* <a href='g3doc/running_locally.md'>Running locally</a><br>
* <a href='g3doc/running_on_cloud.md'>Running on the cloud</a><br>
Extras:
* <a href='g3doc/detection_model_zoo.md'>Tensorflow detection model zoo</a><br>
* <a href='g3doc/exporting_models.md'>
Exporting a trained model for inference</a><br>
* <a href='g3doc/defining_your_own_model.md'>
Defining your own model architecture</a><br>
## Release information
### June 15, 2017
In addition to our base Tensorflow detection model definitions, this
release includes:
* A selection of trainable detection models, including:
* Single Shot Multibox Detector (SSD) with MobileNet,
* SSD with Inception V2,
* Region-Based Fully Convolutional Networks (R-FCN) with Resnet 101,
* Faster RCNN with Resnet 101,
* Faster RCNN with Inception Resnet v2
* Mask R-CNN with Resnet 101.
* Frozen weights (trained on the COCO dataset) for each of the above models to
be used for out-of-the-box inference purposes.
* A [Jupyter notebook](object_detection_tutorial.ipynb) for performing
out-of-the-box inference with one of our released models
* Convenient [local training](g3doc/running_locally.md) scripts as well as
distributed training and evaluation pipelines via
[Google Cloud](g3doc/running_on_cloud.md).
<b>Thanks to contributors</b>: Jonathan Huang, Vivek Rathod, Derek Chow,
Chen Sun, Menglong Zhu, Matthew Tang, Anoop Korattikara, Alireza Fathi, Ian Fischer, Zbigniew Wojna, Yang Song, Sergio Guadarrama, Jasper Uijlings,
Viacheslav Kovalevskyi, Kevin Murphy
# Tensorflow Object Detection API: Anchor Generator implementations.
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"])
# Apache 2.0
py_library(
name = "grid_anchor_generator",
srcs = [
"grid_anchor_generator.py",
],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection/core:anchor_generator",
"//tensorflow_models/object_detection/core:box_list",
"//tensorflow_models/object_detection/utils:ops",
],
)
py_test(
name = "grid_anchor_generator_test",
srcs = [
"grid_anchor_generator_test.py",
],
deps = [
":grid_anchor_generator",
"//tensorflow",
],
)
py_library(
name = "multiple_grid_anchor_generator",
srcs = [
"multiple_grid_anchor_generator.py",
],
deps = [
":grid_anchor_generator",
"//tensorflow",
"//tensorflow_models/object_detection/core:anchor_generator",
"//tensorflow_models/object_detection/core:box_list_ops",
],
)
py_test(
name = "multiple_grid_anchor_generator_test",
srcs = [
"multiple_grid_anchor_generator_test.py",
],
deps = [
":multiple_grid_anchor_generator",
"//third_party/py/numpy",
],
)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Generates grid anchors on the fly as used in Faster RCNN.
Generates grid anchors on the fly as described in:
"Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks"
Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun.
"""
import tensorflow as tf
from object_detection.core import anchor_generator
from object_detection.core import box_list
from object_detection.utils import ops
class GridAnchorGenerator(anchor_generator.AnchorGenerator):
"""Generates a grid of anchors at given scales and aspect ratios."""
def __init__(self,
scales=(0.5, 1.0, 2.0),
aspect_ratios=(0.5, 1.0, 2.0),
base_anchor_size=None,
anchor_stride=None,
anchor_offset=None):
"""Constructs a GridAnchorGenerator.
Args:
scales: a list of (float) scales, default=(0.5, 1.0, 2.0)
aspect_ratios: a list of (float) aspect ratios, default=(0.5, 1.0, 2.0)
base_anchor_size: base anchor size as height, width (
(length-2 float32 list, default=[256, 256])
anchor_stride: difference in centers between base anchors for adjacent
grid positions (length-2 float32 list, default=[16, 16])
anchor_offset: center of the anchor with scale and aspect ratio 1 for the
upper left element of the grid, this should be zero for
feature networks with only VALID padding and even receptive
field size, but may need additional calculation if other
padding is used (length-2 float32 tensor, default=[0, 0])
"""
# Handle argument defaults
if base_anchor_size is None:
base_anchor_size = [256, 256]
base_anchor_size = tf.constant(base_anchor_size, tf.float32)
if anchor_stride is None:
anchor_stride = [16, 16]
anchor_stride = tf.constant(anchor_stride, dtype=tf.float32)
if anchor_offset is None:
anchor_offset = [0, 0]
anchor_offset = tf.constant(anchor_offset, dtype=tf.float32)
self._scales = scales
self._aspect_ratios = aspect_ratios
self._base_anchor_size = base_anchor_size
self._anchor_stride = anchor_stride
self._anchor_offset = anchor_offset
def name_scope(self):
return 'GridAnchorGenerator'
def num_anchors_per_location(self):
"""Returns the number of anchors per spatial location.
Returns:
a list of integers, one for each expected feature map to be passed to
the `generate` function.
"""
return [len(self._scales) * len(self._aspect_ratios)]
def _generate(self, feature_map_shape_list):
"""Generates a collection of bounding boxes to be used as anchors.
Args:
feature_map_shape_list: list of pairs of convnet layer resolutions in the
format [(height_0, width_0)]. For example, setting
feature_map_shape_list=[(8, 8)] asks for anchors that correspond
to an 8x8 layer. For this anchor generator, only lists of length 1 are
allowed.
Returns:
boxes: a BoxList holding a collection of N anchor boxes
Raises:
ValueError: if feature_map_shape_list, box_specs_list do not have the same
length.
ValueError: if feature_map_shape_list does not consist of pairs of
integers
"""
if not (isinstance(feature_map_shape_list, list)
and len(feature_map_shape_list) == 1):
raise ValueError('feature_map_shape_list must be a list of length 1.')
if not all([isinstance(list_item, tuple) and len(list_item) == 2
for list_item in feature_map_shape_list]):
raise ValueError('feature_map_shape_list must be a list of pairs.')
grid_height, grid_width = feature_map_shape_list[0]
scales_grid, aspect_ratios_grid = ops.meshgrid(self._scales,
self._aspect_ratios)
scales_grid = tf.reshape(scales_grid, [-1])
aspect_ratios_grid = tf.reshape(aspect_ratios_grid, [-1])
return tile_anchors(grid_height,
grid_width,
scales_grid,
aspect_ratios_grid,
self._base_anchor_size,
self._anchor_stride,
self._anchor_offset)
def tile_anchors(grid_height,
grid_width,
scales,
aspect_ratios,
base_anchor_size,
anchor_stride,
anchor_offset):
"""Create a tiled set of anchors strided along a grid in image space.
This op creates a set of anchor boxes by placing a "basis" collection of
boxes with user-specified scales and aspect ratios centered at evenly
distributed points along a grid. The basis collection is specified via the
scale and aspect_ratios arguments. For example, setting scales=[.1, .2, .2]
and aspect ratios = [2,2,1/2] means that we create three boxes: one with scale
.1, aspect ratio 2, one with scale .2, aspect ratio 2, and one with scale .2
and aspect ratio 1/2. Each box is multiplied by "base_anchor_size" before
placing it over its respective center.
Grid points are specified via grid_height, grid_width parameters as well as
the anchor_stride and anchor_offset parameters.
Args:
grid_height: size of the grid in the y direction (int or int scalar tensor)
grid_width: size of the grid in the x direction (int or int scalar tensor)
scales: a 1-d (float) tensor representing the scale of each box in the
basis set.
aspect_ratios: a 1-d (float) tensor representing the aspect ratio of each
box in the basis set. The length of the scales and aspect_ratios tensors
must be equal.
base_anchor_size: base anchor size as [height, width]
(float tensor of shape [2])
anchor_stride: difference in centers between base anchors for adjacent grid
positions (float tensor of shape [2])
anchor_offset: center of the anchor with scale and aspect ratio 1 for the
upper left element of the grid, this should be zero for
feature networks with only VALID padding and even receptive
field size, but may need some additional calculation if other
padding is used (float tensor of shape [2])
Returns:
a BoxList holding a collection of N anchor boxes
"""
ratio_sqrts = tf.sqrt(aspect_ratios)
heights = scales / ratio_sqrts * base_anchor_size[0]
widths = scales * ratio_sqrts * base_anchor_size[1]
# Get a grid of box centers
y_centers = tf.to_float(tf.range(grid_height))
y_centers = y_centers * anchor_stride[0] + anchor_offset[0]
x_centers = tf.to_float(tf.range(grid_width))
x_centers = x_centers * anchor_stride[1] + anchor_offset[1]
x_centers, y_centers = ops.meshgrid(x_centers, y_centers)
widths_grid, x_centers_grid = ops.meshgrid(widths, x_centers)
heights_grid, y_centers_grid = ops.meshgrid(heights, y_centers)
bbox_centers = tf.stack([y_centers_grid, x_centers_grid], axis=3)
bbox_sizes = tf.stack([heights_grid, widths_grid], axis=3)
bbox_centers = tf.reshape(bbox_centers, [-1, 2])
bbox_sizes = tf.reshape(bbox_sizes, [-1, 2])
bbox_corners = _center_size_bbox_to_corners_bbox(bbox_centers, bbox_sizes)
return box_list.BoxList(bbox_corners)
def _center_size_bbox_to_corners_bbox(centers, sizes):
"""Converts bbox center-size representation to corners representation.
Args:
centers: a tensor with shape [N, 2] representing bounding box centers
sizes: a tensor with shape [N, 2] representing bounding boxes
Returns:
corners: tensor with shape [N, 4] representing bounding boxes in corners
representation
"""
return tf.concat([centers - .5 * sizes, centers + .5 * sizes], 1)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.grid_anchor_generator."""
import tensorflow as tf
from object_detection.anchor_generators import grid_anchor_generator
class GridAnchorGeneratorTest(tf.test.TestCase):
def test_construct_single_anchor(self):
"""Builds a 1x1 anchor grid to test the size of the output boxes."""
scales = [0.5, 1.0, 2.0]
aspect_ratios = [0.25, 1.0, 4.0]
anchor_offset = [7, -3]
exp_anchor_corners = [[-121, -35, 135, 29], [-249, -67, 263, 61],
[-505, -131, 519, 125], [-57, -67, 71, 61],
[-121, -131, 135, 125], [-249, -259, 263, 253],
[-25, -131, 39, 125], [-57, -259, 71, 253],
[-121, -515, 135, 509]]
anchor_generator = grid_anchor_generator.GridAnchorGenerator(
scales, aspect_ratios,
anchor_offset=anchor_offset)
anchors = anchor_generator.generate(feature_map_shape_list=[(1, 1)])
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_anchor_grid(self):
base_anchor_size = [10, 10]
anchor_stride = [19, 19]
anchor_offset = [0, 0]
scales = [0.5, 1.0, 2.0]
aspect_ratios = [1.0]
exp_anchor_corners = [[-2.5, -2.5, 2.5, 2.5], [-5., -5., 5., 5.],
[-10., -10., 10., 10.], [-2.5, 16.5, 2.5, 21.5],
[-5., 14., 5, 24], [-10., 9., 10, 29],
[16.5, -2.5, 21.5, 2.5], [14., -5., 24, 5],
[9., -10., 29, 10], [16.5, 16.5, 21.5, 21.5],
[14., 14., 24, 24], [9., 9., 29, 29]]
anchor_generator = grid_anchor_generator.GridAnchorGenerator(
scales,
aspect_ratios,
base_anchor_size=base_anchor_size,
anchor_stride=anchor_stride,
anchor_offset=anchor_offset)
anchors = anchor_generator.generate(feature_map_shape_list=[(2, 2)])
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Generates grid anchors on the fly corresponding to multiple CNN layers.
Generates grid anchors on the fly corresponding to multiple CNN layers as
described in:
"SSD: Single Shot MultiBox Detector"
Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed,
Cheng-Yang Fu, Alexander C. Berg
(see Section 2.2: Choosing scales and aspect ratios for default boxes)
"""
import numpy as np
import tensorflow as tf
from object_detection.anchor_generators import grid_anchor_generator
from object_detection.core import anchor_generator
from object_detection.core import box_list_ops
class MultipleGridAnchorGenerator(anchor_generator.AnchorGenerator):
"""Generate a grid of anchors for multiple CNN layers."""
def __init__(self,
box_specs_list,
base_anchor_size=None,
clip_window=None):
"""Constructs a MultipleGridAnchorGenerator.
To construct anchors, at multiple grid resolutions, one must provide a
list of feature_map_shape_list (e.g., [(8, 8), (4, 4)]), and for each grid
size, a corresponding list of (scale, aspect ratio) box specifications.
For example:
box_specs_list = [[(.1, 1.0), (.1, 2.0)], # for 8x8 grid
[(.2, 1.0), (.3, 1.0), (.2, 2.0)]] # for 4x4 grid
To support the fully convolutional setting, we pass grid sizes in at
generation time, while scale and aspect ratios are fixed at construction
time.
Args:
box_specs_list: list of list of (scale, aspect ratio) pairs with the
outside list having the same number of entries as feature_map_shape_list
(which is passed in at generation time).
base_anchor_size: base anchor size as [height, width]
(length-2 float tensor, default=[256, 256]).
clip_window: a tensor of shape [4] specifying a window to which all
anchors should be clipped. If clip_window is None, then no clipping
is performed.
Raises:
ValueError: if box_specs_list is not a list of list of pairs
ValueError: if clip_window is not either None or a tensor of shape [4]
"""
if isinstance(box_specs_list, list) and all(
[isinstance(list_item, list) for list_item in box_specs_list]):
self._box_specs = box_specs_list
else:
raise ValueError('box_specs_list is expected to be a '
'list of lists of pairs')
if base_anchor_size is None:
base_anchor_size = tf.constant([256, 256], dtype=tf.float32)
self._base_anchor_size = base_anchor_size
if clip_window is not None and clip_window.get_shape().as_list() != [4]:
raise ValueError('clip_window must either be None or a shape [4] tensor')
self._clip_window = clip_window
self._scales = []
self._aspect_ratios = []
for box_spec in self._box_specs:
if not all([isinstance(entry, tuple) and len(entry) == 2
for entry in box_spec]):
raise ValueError('box_specs_list is expected to be a '
'list of lists of pairs')
scales, aspect_ratios = zip(*box_spec)
self._scales.append(scales)
self._aspect_ratios.append(aspect_ratios)
def name_scope(self):
return 'MultipleGridAnchorGenerator'
def num_anchors_per_location(self):
"""Returns the number of anchors per spatial location.
Returns:
a list of integers, one for each expected feature map to be passed to
the Generate function.
"""
return [len(box_specs) for box_specs in self._box_specs]
def _generate(self,
feature_map_shape_list,
im_height=1,
im_width=1,
anchor_strides=None,
anchor_offsets=None):
"""Generates a collection of bounding boxes to be used as anchors.
The number of anchors generated for a single grid with shape MxM where we
place k boxes over each grid center is k*M^2 and thus the total number of
anchors is the sum over all grids. In our box_specs_list example
(see the constructor docstring), we would place two boxes over each grid
point on an 8x8 grid and three boxes over each grid point on a 4x4 grid and
thus end up with 2*8^2 + 3*4^2 = 176 anchors in total. The layout of the
output anchors follows the order of how the grid sizes and box_specs are
specified (with box_spec index varying the fastest, followed by width
index, then height index, then grid index).
Args:
feature_map_shape_list: list of pairs of convnet layer resolutions in the
format [(height_0, width_0), (height_1, width_1), ...]. For example,
setting feature_map_shape_list=[(8, 8), (7, 7)] asks for anchors that
correspond to an 8x8 layer followed by a 7x7 layer.
im_height: the height of the image to generate the grid for. If both
im_height and im_width are 1, the generated anchors default to
normalized coordinates, otherwise absolute coordinates are used for the
grid.
im_width: the width of the image to generate the grid for. If both
im_height and im_width are 1, the generated anchors default to
normalized coordinates, otherwise absolute coordinates are used for the
grid.
anchor_strides: list of pairs of strides (in y and x directions
respectively). For example, setting
anchor_strides=[(.25, .25), (.5, .5)] means that we want the anchors
corresponding to the first layer to be strided by .25 and those in the
second layer to be strided by .5 in both y and x directions. By
default, if anchor_strides=None, then they are set to be the reciprocal
of the corresponding grid sizes. The pairs can also be specified as
dynamic tf.int or tf.float numbers, e.g. for variable shape input
images.
anchor_offsets: list of pairs of offsets (in y and x directions
respectively). The offset specifies where we want the center of the
(0, 0)-th anchor to lie for each layer. For example, setting
anchor_offsets=[(.125, .125), (.25, .25)]) means that we want the
(0, 0)-th anchor of the first layer to lie at (.125, .125) in image
space and likewise that we want the (0, 0)-th anchor of the second
layer to lie at (.25, .25) in image space. By default, if
anchor_offsets=None, then they are set to be half of the corresponding
anchor stride. The pairs can also be specified as dynamic tf.int or
tf.float numbers, e.g. for variable shape input images.
Returns:
boxes: a BoxList holding a collection of N anchor boxes
Raises:
ValueError: if feature_map_shape_list, box_specs_list do not have the same
length.
ValueError: if feature_map_shape_list does not consist of pairs of
integers
"""
if not (isinstance(feature_map_shape_list, list)
and len(feature_map_shape_list) == len(self._box_specs)):
raise ValueError('feature_map_shape_list must be a list with the same '
'length as self._box_specs')
if not all([isinstance(list_item, tuple) and len(list_item) == 2
for list_item in feature_map_shape_list]):
raise ValueError('feature_map_shape_list must be a list of pairs.')
if not anchor_strides:
anchor_strides = [(tf.to_float(im_height) / tf.to_float(pair[0]),
tf.to_float(im_width) / tf.to_float(pair[1]))
for pair in feature_map_shape_list]
if not anchor_offsets:
anchor_offsets = [(0.5 * stride[0], 0.5 * stride[1])
for stride in anchor_strides]
for arg, arg_name in zip([anchor_strides, anchor_offsets],
['anchor_strides', 'anchor_offsets']):
if not (isinstance(arg, list) and len(arg) == len(self._box_specs)):
raise ValueError('%s must be a list with the same length '
'as self._box_specs' % arg_name)
if not all([isinstance(list_item, tuple) and len(list_item) == 2
for list_item in arg]):
raise ValueError('%s must be a list of pairs.' % arg_name)
anchor_grid_list = []
min_im_shape = tf.to_float(tf.minimum(im_height, im_width))
base_anchor_size = min_im_shape * self._base_anchor_size
for grid_size, scales, aspect_ratios, stride, offset in zip(
feature_map_shape_list, self._scales, self._aspect_ratios,
anchor_strides, anchor_offsets):
anchor_grid_list.append(
grid_anchor_generator.tile_anchors(
grid_height=grid_size[0],
grid_width=grid_size[1],
scales=scales,
aspect_ratios=aspect_ratios,
base_anchor_size=base_anchor_size,
anchor_stride=stride,
anchor_offset=offset))
concatenated_anchors = box_list_ops.concatenate(anchor_grid_list)
num_anchors = concatenated_anchors.num_boxes_static()
if num_anchors is None:
num_anchors = concatenated_anchors.num_boxes()
if self._clip_window is not None:
clip_window = tf.multiply(
tf.to_float([im_height, im_width, im_height, im_width]),
self._clip_window)
concatenated_anchors = box_list_ops.clip_to_window(
concatenated_anchors, clip_window, filter_nonoverlapping=False)
# TODO: make reshape an option for the clip_to_window op
concatenated_anchors.set(
tf.reshape(concatenated_anchors.get(), [num_anchors, 4]))
stddevs_tensor = 0.01 * tf.ones(
[num_anchors, 4], dtype=tf.float32, name='stddevs')
concatenated_anchors.add_field('stddev', stddevs_tensor)
return concatenated_anchors
def create_ssd_anchors(num_layers=6,
min_scale=0.2,
max_scale=0.95,
aspect_ratios=(1.0, 2.0, 3.0, 1.0/2, 1.0/3),
base_anchor_size=None,
reduce_boxes_in_lowest_layer=True):
"""Creates MultipleGridAnchorGenerator for SSD anchors.
This function instantiates a MultipleGridAnchorGenerator that reproduces
``default box`` construction proposed by Liu et al in the SSD paper.
See Section 2.2 for details. Grid sizes are assumed to be passed in
at generation time from finest resolution to coarsest resolution --- this is
used to (linearly) interpolate scales of anchor boxes corresponding to the
intermediate grid sizes.
Anchors that are returned by calling the `generate` method on the returned
MultipleGridAnchorGenerator object are always in normalized coordinates
and clipped to the unit square: (i.e. all coordinates lie in [0, 1]x[0, 1]).
Args:
num_layers: integer number of grid layers to create anchors for (actual
grid sizes passed in at generation time)
min_scale: scale of anchors corresponding to finest resolution (float)
max_scale: scale of anchors corresponding to coarsest resolution (float)
aspect_ratios: list or tuple of (float) aspect ratios to place on each
grid point.
base_anchor_size: base anchor size as [height, width].
reduce_boxes_in_lowest_layer: a boolean to indicate whether the fixed 3
boxes per location is used in the lowest layer.
Returns:
a MultipleGridAnchorGenerator
"""
if base_anchor_size is None:
base_anchor_size = [1.0, 1.0]
base_anchor_size = tf.constant(base_anchor_size, dtype=tf.float32)
box_specs_list = []
scales = [min_scale + (max_scale - min_scale) * i / (num_layers - 1)
for i in range(num_layers)] + [1.0]
for layer, scale, scale_next in zip(
range(num_layers), scales[:-1], scales[1:]):
layer_box_specs = []
if layer == 0 and reduce_boxes_in_lowest_layer:
layer_box_specs = [(0.1, 1.0), (scale, 2.0), (scale, 0.5)]
else:
for aspect_ratio in aspect_ratios:
layer_box_specs.append((scale, aspect_ratio))
if aspect_ratio == 1.0:
layer_box_specs.append((np.sqrt(scale*scale_next), 1.0))
box_specs_list.append(layer_box_specs)
return MultipleGridAnchorGenerator(box_specs_list, base_anchor_size)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for anchor_generators.multiple_grid_anchor_generator_test.py."""
import numpy as np
import tensorflow as tf
from object_detection.anchor_generators import multiple_grid_anchor_generator as ag
class MultipleGridAnchorGeneratorTest(tf.test.TestCase):
def test_construct_single_anchor_grid(self):
"""Builds a 1x1 anchor grid to test the size of the output boxes."""
exp_anchor_corners = [[-121, -35, 135, 29], [-249, -67, 263, 61],
[-505, -131, 519, 125], [-57, -67, 71, 61],
[-121, -131, 135, 125], [-249, -259, 263, 253],
[-25, -131, 39, 125], [-57, -259, 71, 253],
[-121, -515, 135, 509]]
base_anchor_size = tf.constant([256, 256], dtype=tf.float32)
box_specs_list = [[(.5, .25), (1.0, .25), (2.0, .25),
(.5, 1.0), (1.0, 1.0), (2.0, 1.0),
(.5, 4.0), (1.0, 4.0), (2.0, 4.0)]]
anchor_generator = ag.MultipleGridAnchorGenerator(
box_specs_list, base_anchor_size)
anchors = anchor_generator.generate(feature_map_shape_list=[(1, 1)],
anchor_strides=[(16, 16)],
anchor_offsets=[(7, -3)])
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_anchor_grid(self):
base_anchor_size = tf.constant([10, 10], dtype=tf.float32)
box_specs_list = [[(0.5, 1.0), (1.0, 1.0), (2.0, 1.0)]]
exp_anchor_corners = [[-2.5, -2.5, 2.5, 2.5], [-5., -5., 5., 5.],
[-10., -10., 10., 10.], [-2.5, 16.5, 2.5, 21.5],
[-5., 14., 5, 24], [-10., 9., 10, 29],
[16.5, -2.5, 21.5, 2.5], [14., -5., 24, 5],
[9., -10., 29, 10], [16.5, 16.5, 21.5, 21.5],
[14., 14., 24, 24], [9., 9., 29, 29]]
anchor_generator = ag.MultipleGridAnchorGenerator(
box_specs_list, base_anchor_size)
anchors = anchor_generator.generate(feature_map_shape_list=[(2, 2)],
anchor_strides=[(19, 19)],
anchor_offsets=[(0, 0)])
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_anchor_grid_non_square(self):
base_anchor_size = tf.constant([1, 1], dtype=tf.float32)
box_specs_list = [[(1.0, 1.0)]]
exp_anchor_corners = [[0., -0.25, 1., 0.75], [0., 0.25, 1., 1.25]]
anchor_generator = ag.MultipleGridAnchorGenerator(box_specs_list,
base_anchor_size)
anchors = anchor_generator.generate(feature_map_shape_list=[(tf.constant(
1, dtype=tf.int32), tf.constant(2, dtype=tf.int32))])
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_anchor_grid_unnormalized(self):
base_anchor_size = tf.constant([1, 1], dtype=tf.float32)
box_specs_list = [[(1.0, 1.0)]]
exp_anchor_corners = [[0., 0., 320., 320.], [0., 320., 320., 640.]]
anchor_generator = ag.MultipleGridAnchorGenerator(box_specs_list,
base_anchor_size)
anchors = anchor_generator.generate(
feature_map_shape_list=[(tf.constant(1, dtype=tf.int32), tf.constant(
2, dtype=tf.int32))],
im_height=320,
im_width=640)
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_multiple_grids(self):
base_anchor_size = tf.constant([1.0, 1.0], dtype=tf.float32)
box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
[(1.0, 1.0), (1.0, 0.5)]]
# height and width of box with .5 aspect ratio
h = np.sqrt(2)
w = 1.0/np.sqrt(2)
exp_small_grid_corners = [[-.25, -.25, .75, .75],
[.25-.5*h, .25-.5*w, .25+.5*h, .25+.5*w],
[-.25, .25, .75, 1.25],
[.25-.5*h, .75-.5*w, .25+.5*h, .75+.5*w],
[.25, -.25, 1.25, .75],
[.75-.5*h, .25-.5*w, .75+.5*h, .25+.5*w],
[.25, .25, 1.25, 1.25],
[.75-.5*h, .75-.5*w, .75+.5*h, .75+.5*w]]
# only test first entry of larger set of anchors
exp_big_grid_corners = [[.125-.5, .125-.5, .125+.5, .125+.5],
[.125-1.0, .125-1.0, .125+1.0, .125+1.0],
[.125-.5*h, .125-.5*w, .125+.5*h, .125+.5*w],]
anchor_generator = ag.MultipleGridAnchorGenerator(
box_specs_list, base_anchor_size)
anchors = anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)],
anchor_strides=[(.25, .25), (.5, .5)],
anchor_offsets=[(.125, .125),
(.25, .25)])
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
self.assertEquals(anchor_corners_out.shape, (56, 4))
big_grid_corners = anchor_corners_out[0:3, :]
small_grid_corners = anchor_corners_out[48:, :]
self.assertAllClose(small_grid_corners, exp_small_grid_corners)
self.assertAllClose(big_grid_corners, exp_big_grid_corners)
def test_construct_multiple_grids_with_clipping(self):
base_anchor_size = tf.constant([1.0, 1.0], dtype=tf.float32)
box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
[(1.0, 1.0), (1.0, 0.5)]]
# height and width of box with .5 aspect ratio
h = np.sqrt(2)
w = 1.0/np.sqrt(2)
exp_small_grid_corners = [[0, 0, .75, .75],
[0, 0, .25+.5*h, .25+.5*w],
[0, .25, .75, 1],
[0, .75-.5*w, .25+.5*h, 1],
[.25, 0, 1, .75],
[.75-.5*h, 0, 1, .25+.5*w],
[.25, .25, 1, 1],
[.75-.5*h, .75-.5*w, 1, 1]]
clip_window = tf.constant([0, 0, 1, 1], dtype=tf.float32)
anchor_generator = ag.MultipleGridAnchorGenerator(
box_specs_list, base_anchor_size, clip_window=clip_window)
anchors = anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)])
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
small_grid_corners = anchor_corners_out[48:, :]
self.assertAllClose(small_grid_corners, exp_small_grid_corners)
def test_invalid_box_specs(self):
# not all box specs are pairs
box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
[(1.0, 1.0), (1.0, 0.5, .3)]]
with self.assertRaises(ValueError):
ag.MultipleGridAnchorGenerator(box_specs_list)
# box_specs_list is not a list of lists
box_specs_list = [(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)]
with self.assertRaises(ValueError):
ag.MultipleGridAnchorGenerator(box_specs_list)
def test_invalid_generate_arguments(self):
base_anchor_size = tf.constant([1.0, 1.0], dtype=tf.float32)
box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
[(1.0, 1.0), (1.0, 0.5)]]
anchor_generator = ag.MultipleGridAnchorGenerator(
box_specs_list, base_anchor_size)
# incompatible lengths with box_specs_list
with self.assertRaises(ValueError):
anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)],
anchor_strides=[(.25, .25)],
anchor_offsets=[(.125, .125), (.25, .25)])
with self.assertRaises(ValueError):
anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2), (1, 1)],
anchor_strides=[(.25, .25), (.5, .5)],
anchor_offsets=[(.125, .125), (.25, .25)])
with self.assertRaises(ValueError):
anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)],
anchor_strides=[(.5, .5)],
anchor_offsets=[(.25, .25)])
# not pairs
with self.assertRaises(ValueError):
anchor_generator.generate(feature_map_shape_list=[(4, 4, 4), (2, 2)],
anchor_strides=[(.25, .25), (.5, .5)],
anchor_offsets=[(.125, .125), (.25, .25)])
with self.assertRaises(ValueError):
anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)],
anchor_strides=[(.25, .25, .1), (.5, .5)],
anchor_offsets=[(.125, .125),
(.25, .25)])
with self.assertRaises(ValueError):
anchor_generator.generate(feature_map_shape_list=[(4), (2, 2)],
anchor_strides=[(.25, .25), (.5, .5)],
anchor_offsets=[(.125), (.25)])
class CreateSSDAnchorsTest(tf.test.TestCase):
def test_create_ssd_anchors_returns_correct_shape(self):
anchor_generator = ag.create_ssd_anchors(
num_layers=6, min_scale=0.2, max_scale=0.95,
aspect_ratios=(1.0, 2.0, 3.0, 1.0/2, 1.0/3),
reduce_boxes_in_lowest_layer=True)
feature_map_shape_list = [(38, 38), (19, 19), (10, 10),
(5, 5), (3, 3), (1, 1)]
anchors = anchor_generator.generate(
feature_map_shape_list=feature_map_shape_list)
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
self.assertEquals(anchor_corners_out.shape, (7308, 4))
anchor_generator = ag.create_ssd_anchors(
num_layers=6, min_scale=0.2, max_scale=0.95,
aspect_ratios=(1.0, 2.0, 3.0, 1.0/2, 1.0/3),
reduce_boxes_in_lowest_layer=False)
feature_map_shape_list = [(38, 38), (19, 19), (10, 10),
(5, 5), (3, 3), (1, 1)]
anchors = anchor_generator.generate(
feature_map_shape_list=feature_map_shape_list)
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
self.assertEquals(anchor_corners_out.shape, (11640, 4))
if __name__ == '__main__':
tf.test.main()
# Tensorflow Object Detection API: Box Coder implementations.
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"])
# Apache 2.0
py_library(
name = "faster_rcnn_box_coder",
srcs = [
"faster_rcnn_box_coder.py",
],
deps = [
"//tensorflow_models/object_detection/core:box_coder",
"//tensorflow_models/object_detection/core:box_list",
],
)
py_test(
name = "faster_rcnn_box_coder_test",
srcs = [
"faster_rcnn_box_coder_test.py",
],
deps = [
":faster_rcnn_box_coder",
"//tensorflow",
"//tensorflow_models/object_detection/core:box_list",
],
)
py_library(
name = "keypoint_box_coder",
srcs = [
"keypoint_box_coder.py",
],
deps = [
"//tensorflow_models/object_detection/core:box_coder",
"//tensorflow_models/object_detection/core:box_list",
"//tensorflow_models/object_detection/core:standard_fields",
],
)
py_test(
name = "keypoint_box_coder_test",
srcs = [
"keypoint_box_coder_test.py",
],
deps = [
":keypoint_box_coder",
"//tensorflow",
"//tensorflow_models/object_detection/core:box_list",
"//tensorflow_models/object_detection/core:standard_fields",
],
)
py_library(
name = "mean_stddev_box_coder",
srcs = [
"mean_stddev_box_coder.py",
],
deps = [
"//tensorflow_models/object_detection/core:box_coder",
"//tensorflow_models/object_detection/core:box_list",
],
)
py_test(
name = "mean_stddev_box_coder_test",
srcs = [
"mean_stddev_box_coder_test.py",
],
deps = [
":mean_stddev_box_coder",
"//tensorflow",
"//tensorflow_models/object_detection/core:box_list",
],
)
py_library(
name = "square_box_coder",
srcs = [
"square_box_coder.py",
],
deps = [
"//tensorflow_models/object_detection/core:box_coder",
"//tensorflow_models/object_detection/core:box_list",
],
)
py_test(
name = "square_box_coder_test",
srcs = [
"square_box_coder_test.py",
],
deps = [
":square_box_coder",
"//tensorflow",
"//tensorflow_models/object_detection/core:box_list",
],
)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Faster RCNN box coder.
Faster RCNN box coder follows the coding schema described below:
ty = (y - ya) / ha
tx = (x - xa) / wa
th = log(h / ha)
tw = log(w / wa)
where x, y, w, h denote the box's center coordinates, width and height
respectively. Similarly, xa, ya, wa, ha denote the anchor's center
coordinates, width and height. tx, ty, tw and th denote the anchor-encoded
center, width and height respectively.
See http://arxiv.org/abs/1506.01497 for details.
"""
import tensorflow as tf
from object_detection.core import box_coder
from object_detection.core import box_list
EPSILON = 1e-8
class FasterRcnnBoxCoder(box_coder.BoxCoder):
"""Faster RCNN box coder."""
def __init__(self, scale_factors=None):
"""Constructor for FasterRcnnBoxCoder.
Args:
scale_factors: List of 4 positive scalars to scale ty, tx, th and tw.
If set to None, does not perform scaling. For Faster RCNN,
the open-source implementation recommends using [10.0, 10.0, 5.0, 5.0].
"""
if scale_factors:
assert len(scale_factors) == 4
for scalar in scale_factors:
assert scalar > 0
self._scale_factors = scale_factors
@property
def code_size(self):
return 4
def _encode(self, boxes, anchors):
"""Encode a box collection with respect to anchor collection.
Args:
boxes: BoxList holding N boxes to be encoded.
anchors: BoxList of anchors.
Returns:
a tensor representing N anchor-encoded boxes of the format
[ty, tx, th, tw].
"""
# Convert anchors to the center coordinate representation.
ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
# Avoid NaN in division and log below.
ha += EPSILON
wa += EPSILON
h += EPSILON
w += EPSILON
tx = (xcenter - xcenter_a) / wa
ty = (ycenter - ycenter_a) / ha
tw = tf.log(w / wa)
th = tf.log(h / ha)
# Scales location targets as used in paper for joint training.
if self._scale_factors:
ty *= self._scale_factors[0]
tx *= self._scale_factors[1]
th *= self._scale_factors[2]
tw *= self._scale_factors[3]
return tf.transpose(tf.stack([ty, tx, th, tw]))
def _decode(self, rel_codes, anchors):
"""Decode relative codes to boxes.
Args:
rel_codes: a tensor representing N anchor-encoded boxes.
anchors: BoxList of anchors.
Returns:
boxes: BoxList holding N bounding boxes.
"""
ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
ty, tx, th, tw = tf.unstack(tf.transpose(rel_codes))
if self._scale_factors:
ty /= self._scale_factors[0]
tx /= self._scale_factors[1]
th /= self._scale_factors[2]
tw /= self._scale_factors[3]
w = tf.exp(tw) * wa
h = tf.exp(th) * ha
ycenter = ty * ha + ycenter_a
xcenter = tx * wa + xcenter_a
ymin = ycenter - h / 2.
xmin = xcenter - w / 2.
ymax = ycenter + h / 2.
xmax = xcenter + w / 2.
return box_list.BoxList(tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.box_coder.faster_rcnn_box_coder."""
import tensorflow as tf
from object_detection.box_coders import faster_rcnn_box_coder
from object_detection.core import box_list
class FasterRcnnBoxCoderTest(tf.test.TestCase):
def test_get_correct_relative_codes_after_encoding(self):
boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
expected_rel_codes = [[-0.5, -0.416666, -0.405465, -0.182321],
[-0.083333, -0.222222, -0.693147, -1.098612]]
boxes = box_list.BoxList(tf.constant(boxes))
anchors = box_list.BoxList(tf.constant(anchors))
coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
rel_codes_out, = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
def test_get_correct_relative_codes_after_encoding_with_scaling(self):
boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
scale_factors = [2, 3, 4, 5]
expected_rel_codes = [[-1., -1.25, -1.62186, -0.911608],
[-0.166667, -0.666667, -2.772588, -5.493062]]
boxes = box_list.BoxList(tf.constant(boxes))
anchors = box_list.BoxList(tf.constant(anchors))
coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
scale_factors=scale_factors)
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
rel_codes_out, = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
def test_get_correct_boxes_after_decoding(self):
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
rel_codes = [[-0.5, -0.416666, -0.405465, -0.182321],
[-0.083333, -0.222222, -0.693147, -1.098612]]
expected_boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
anchors = box_list.BoxList(tf.constant(anchors))
coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
boxes = coder.decode(rel_codes, anchors)
with self.test_session() as sess:
boxes_out, = sess.run([boxes.get()])
self.assertAllClose(boxes_out, expected_boxes)
def test_get_correct_boxes_after_decoding_with_scaling(self):
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
rel_codes = [[-1., -1.25, -1.62186, -0.911608],
[-0.166667, -0.666667, -2.772588, -5.493062]]
scale_factors = [2, 3, 4, 5]
expected_boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
anchors = box_list.BoxList(tf.constant(anchors))
coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
scale_factors=scale_factors)
boxes = coder.decode(rel_codes, anchors)
with self.test_session() as sess:
boxes_out, = sess.run([boxes.get()])
self.assertAllClose(boxes_out, expected_boxes)
def test_very_small_Width_nan_after_encoding(self):
boxes = [[10.0, 10.0, 10.0000001, 20.0]]
anchors = [[15.0, 12.0, 30.0, 18.0]]
expected_rel_codes = [[-0.833333, 0., -21.128731, 0.510826]]
boxes = box_list.BoxList(tf.constant(boxes))
anchors = box_list.BoxList(tf.constant(anchors))
coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
rel_codes_out, = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Keypoint box coder.
The keypoint box coder follows the coding schema described below (this is
similar to the FasterRcnnBoxCoder, except that it encodes keypoints in addition
to box coordinates):
ty = (y - ya) / ha
tx = (x - xa) / wa
th = log(h / ha)
tw = log(w / wa)
tky0 = (ky0 - ya) / ha
tkx0 = (kx0 - xa) / ha
tky1 = (ky1 - ya) / ha
tkx1 = (kx1 - xa) / ha
...
where x, y, w, h denote the box's center coordinates, width and height
respectively. Similarly, xa, ya, wa, ha denote the anchor's center
coordinates, width and height. tx, ty, tw and th denote the anchor-encoded
center, width and height respectively. ky0, kx0, ky1, kx1, ... denote the
keypoints' coordinates, and tky0, tkx0, tky1, tkx1, ... denote the
anchor-encoded keypoint coordinates.
"""
import tensorflow as tf
from object_detection.core import box_coder
from object_detection.core import box_list
from object_detection.core import standard_fields as fields
EPSILON = 1e-8
class KeypointBoxCoder(box_coder.BoxCoder):
"""Keypoint box coder."""
def __init__(self, num_keypoints, scale_factors=None):
"""Constructor for KeypointBoxCoder.
Args:
num_keypoints: Number of keypoints to encode/decode.
scale_factors: List of 4 positive scalars to scale ty, tx, th and tw.
In addition to scaling ty and tx, the first 2 scalars are used to scale
the y and x coordinates of the keypoints as well. If set to None, does
not perform scaling.
"""
self._num_keypoints = num_keypoints
if scale_factors:
assert len(scale_factors) == 4
for scalar in scale_factors:
assert scalar > 0
self._scale_factors = scale_factors
self._keypoint_scale_factors = None
if scale_factors is not None:
self._keypoint_scale_factors = tf.expand_dims(tf.tile(
[tf.to_float(scale_factors[0]), tf.to_float(scale_factors[1])],
[num_keypoints]), 1)
@property
def code_size(self):
return 4 + self._num_keypoints * 2
def _encode(self, boxes, anchors):
"""Encode a box and keypoint collection with respect to anchor collection.
Args:
boxes: BoxList holding N boxes and keypoints to be encoded. Boxes are
tensors with the shape [N, 4], and keypoints are tensors with the shape
[N, num_keypoints, 2].
anchors: BoxList of anchors.
Returns:
a tensor representing N anchor-encoded boxes of the format
[ty, tx, th, tw, tky0, tkx0, tky1, tkx1, ...] where tky0 and tkx0
represent the y and x coordinates of the first keypoint, tky1 and tkx1
represent the y and x coordinates of the second keypoint, and so on.
"""
# Convert anchors to the center coordinate representation.
ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
keypoints = boxes.get_field(fields.BoxListFields.keypoints)
keypoints = tf.transpose(tf.reshape(keypoints,
[-1, self._num_keypoints * 2]))
num_boxes = boxes.num_boxes()
# Avoid NaN in division and log below.
ha += EPSILON
wa += EPSILON
h += EPSILON
w += EPSILON
tx = (xcenter - xcenter_a) / wa
ty = (ycenter - ycenter_a) / ha
tw = tf.log(w / wa)
th = tf.log(h / ha)
tiled_anchor_centers = tf.tile(
tf.stack([ycenter_a, xcenter_a]), [self._num_keypoints, 1])
tiled_anchor_sizes = tf.tile(
tf.stack([ha, wa]), [self._num_keypoints, 1])
tkeypoints = (keypoints - tiled_anchor_centers) / tiled_anchor_sizes
# Scales location targets as used in paper for joint training.
if self._scale_factors:
ty *= self._scale_factors[0]
tx *= self._scale_factors[1]
th *= self._scale_factors[2]
tw *= self._scale_factors[3]
tkeypoints *= tf.tile(self._keypoint_scale_factors, [1, num_boxes])
tboxes = tf.stack([ty, tx, th, tw])
return tf.transpose(tf.concat([tboxes, tkeypoints], 0))
def _decode(self, rel_codes, anchors):
"""Decode relative codes to boxes and keypoints.
Args:
rel_codes: a tensor with shape [N, 4 + 2 * num_keypoints] representing N
anchor-encoded boxes and keypoints
anchors: BoxList of anchors.
Returns:
boxes: BoxList holding N bounding boxes and keypoints.
"""
ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
num_codes = tf.shape(rel_codes)[0]
result = tf.unstack(tf.transpose(rel_codes))
ty, tx, th, tw = result[:4]
tkeypoints = result[4:]
if self._scale_factors:
ty /= self._scale_factors[0]
tx /= self._scale_factors[1]
th /= self._scale_factors[2]
tw /= self._scale_factors[3]
tkeypoints /= tf.tile(self._keypoint_scale_factors, [1, num_codes])
w = tf.exp(tw) * wa
h = tf.exp(th) * ha
ycenter = ty * ha + ycenter_a
xcenter = tx * wa + xcenter_a
ymin = ycenter - h / 2.
xmin = xcenter - w / 2.
ymax = ycenter + h / 2.
xmax = xcenter + w / 2.
decoded_boxes_keypoints = box_list.BoxList(
tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
tiled_anchor_centers = tf.tile(
tf.stack([ycenter_a, xcenter_a]), [self._num_keypoints, 1])
tiled_anchor_sizes = tf.tile(
tf.stack([ha, wa]), [self._num_keypoints, 1])
keypoints = tkeypoints * tiled_anchor_sizes + tiled_anchor_centers
keypoints = tf.reshape(tf.transpose(keypoints),
[-1, self._num_keypoints, 2])
decoded_boxes_keypoints.add_field(fields.BoxListFields.keypoints, keypoints)
return decoded_boxes_keypoints
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.box_coder.keypoint_box_coder."""
import tensorflow as tf
from object_detection.box_coders import keypoint_box_coder
from object_detection.core import box_list
from object_detection.core import standard_fields as fields
class KeypointBoxCoderTest(tf.test.TestCase):
def test_get_correct_relative_codes_after_encoding(self):
boxes = [[10., 10., 20., 15.],
[0.2, 0.1, 0.5, 0.4]]
keypoints = [[[15., 12.], [10., 15.]],
[[0.5, 0.3], [0.2, 0.4]]]
num_keypoints = len(keypoints[0])
anchors = [[15., 12., 30., 18.],
[0.1, 0.0, 0.7, 0.9]]
expected_rel_codes = [
[-0.5, -0.416666, -0.405465, -0.182321,
-0.5, -0.5, -0.833333, 0.],
[-0.083333, -0.222222, -0.693147, -1.098612,
0.166667, -0.166667, -0.333333, -0.055556]
]
boxes = box_list.BoxList(tf.constant(boxes))
boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints))
anchors = box_list.BoxList(tf.constant(anchors))
coder = keypoint_box_coder.KeypointBoxCoder(num_keypoints)
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
rel_codes_out, = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
def test_get_correct_relative_codes_after_encoding_with_scaling(self):
boxes = [[10., 10., 20., 15.],
[0.2, 0.1, 0.5, 0.4]]
keypoints = [[[15., 12.], [10., 15.]],
[[0.5, 0.3], [0.2, 0.4]]]
num_keypoints = len(keypoints[0])
anchors = [[15., 12., 30., 18.],
[0.1, 0.0, 0.7, 0.9]]
scale_factors = [2, 3, 4, 5]
expected_rel_codes = [
[-1., -1.25, -1.62186, -0.911608,
-1.0, -1.5, -1.666667, 0.],
[-0.166667, -0.666667, -2.772588, -5.493062,
0.333333, -0.5, -0.666667, -0.166667]
]
boxes = box_list.BoxList(tf.constant(boxes))
boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints))
anchors = box_list.BoxList(tf.constant(anchors))
coder = keypoint_box_coder.KeypointBoxCoder(
num_keypoints, scale_factors=scale_factors)
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
rel_codes_out, = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
def test_get_correct_boxes_after_decoding(self):
anchors = [[15., 12., 30., 18.],
[0.1, 0.0, 0.7, 0.9]]
rel_codes = [
[-0.5, -0.416666, -0.405465, -0.182321,
-0.5, -0.5, -0.833333, 0.],
[-0.083333, -0.222222, -0.693147, -1.098612,
0.166667, -0.166667, -0.333333, -0.055556]
]
expected_boxes = [[10., 10., 20., 15.],
[0.2, 0.1, 0.5, 0.4]]
expected_keypoints = [[[15., 12.], [10., 15.]],
[[0.5, 0.3], [0.2, 0.4]]]
num_keypoints = len(expected_keypoints[0])
anchors = box_list.BoxList(tf.constant(anchors))
coder = keypoint_box_coder.KeypointBoxCoder(num_keypoints)
boxes = coder.decode(rel_codes, anchors)
with self.test_session() as sess:
boxes_out, keypoints_out = sess.run(
[boxes.get(), boxes.get_field(fields.BoxListFields.keypoints)])
self.assertAllClose(boxes_out, expected_boxes)
self.assertAllClose(keypoints_out, expected_keypoints)
def test_get_correct_boxes_after_decoding_with_scaling(self):
anchors = [[15., 12., 30., 18.],
[0.1, 0.0, 0.7, 0.9]]
rel_codes = [
[-1., -1.25, -1.62186, -0.911608,
-1.0, -1.5, -1.666667, 0.],
[-0.166667, -0.666667, -2.772588, -5.493062,
0.333333, -0.5, -0.666667, -0.166667]
]
scale_factors = [2, 3, 4, 5]
expected_boxes = [[10., 10., 20., 15.],
[0.2, 0.1, 0.5, 0.4]]
expected_keypoints = [[[15., 12.], [10., 15.]],
[[0.5, 0.3], [0.2, 0.4]]]
num_keypoints = len(expected_keypoints[0])
anchors = box_list.BoxList(tf.constant(anchors))
coder = keypoint_box_coder.KeypointBoxCoder(
num_keypoints, scale_factors=scale_factors)
boxes = coder.decode(rel_codes, anchors)
with self.test_session() as sess:
boxes_out, keypoints_out = sess.run(
[boxes.get(), boxes.get_field(fields.BoxListFields.keypoints)])
self.assertAllClose(boxes_out, expected_boxes)
self.assertAllClose(keypoints_out, expected_keypoints)
def test_very_small_width_nan_after_encoding(self):
boxes = [[10., 10., 10.0000001, 20.]]
keypoints = [[[10., 10.], [10.0000001, 20.]]]
anchors = [[15., 12., 30., 18.]]
expected_rel_codes = [[-0.833333, 0., -21.128731, 0.510826,
-0.833333, -0.833333, -0.833333, 0.833333]]
boxes = box_list.BoxList(tf.constant(boxes))
boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints))
anchors = box_list.BoxList(tf.constant(anchors))
coder = keypoint_box_coder.KeypointBoxCoder(2)
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
rel_codes_out, = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Mean stddev box coder.
This box coder use the following coding schema to encode boxes:
rel_code = (box_corner - anchor_corner_mean) / anchor_corner_stddev.
"""
from object_detection.core import box_coder
from object_detection.core import box_list
class MeanStddevBoxCoder(box_coder.BoxCoder):
"""Mean stddev box coder."""
@property
def code_size(self):
return 4
def _encode(self, boxes, anchors):
"""Encode a box collection with respect to anchor collection.
Args:
boxes: BoxList holding N boxes to be encoded.
anchors: BoxList of N anchors. We assume that anchors has an associated
stddev field.
Returns:
a tensor representing N anchor-encoded boxes
Raises:
ValueError: if the anchors BoxList does not have a stddev field
"""
if not anchors.has_field('stddev'):
raise ValueError('anchors must have a stddev field')
box_corners = boxes.get()
means = anchors.get()
stddev = anchors.get_field('stddev')
return (box_corners - means) / stddev
def _decode(self, rel_codes, anchors):
"""Decode.
Args:
rel_codes: a tensor representing N anchor-encoded boxes.
anchors: BoxList of anchors. We assume that anchors has an associated
stddev field.
Returns:
boxes: BoxList holding N bounding boxes
Raises:
ValueError: if the anchors BoxList does not have a stddev field
"""
if not anchors.has_field('stddev'):
raise ValueError('anchors must have a stddev field')
means = anchors.get()
stddevs = anchors.get_field('stddev')
box_corners = rel_codes * stddevs + means
return box_list.BoxList(box_corners)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.box_coder.mean_stddev_boxcoder."""
import tensorflow as tf
from object_detection.box_coders import mean_stddev_box_coder
from object_detection.core import box_list
class MeanStddevBoxCoderTest(tf.test.TestCase):
def testGetCorrectRelativeCodesAfterEncoding(self):
box_corners = [[0.0, 0.0, 0.5, 0.5], [0.0, 0.0, 0.5, 0.5]]
boxes = box_list.BoxList(tf.constant(box_corners))
expected_rel_codes = [[0.0, 0.0, 0.0, 0.0], [-5.0, -5.0, -5.0, -3.0]]
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 1.0, 0.8]])
prior_stddevs = tf.constant(2 * [4 * [.1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
coder = mean_stddev_box_coder.MeanStddevBoxCoder()
rel_codes = coder.encode(boxes, priors)
with self.test_session() as sess:
rel_codes_out = sess.run(rel_codes)
self.assertAllClose(rel_codes_out, expected_rel_codes)
def testGetCorrectBoxesAfterDecoding(self):
rel_codes = tf.constant([[0.0, 0.0, 0.0, 0.0], [-5.0, -5.0, -5.0, -3.0]])
expected_box_corners = [[0.0, 0.0, 0.5, 0.5], [0.0, 0.0, 0.5, 0.5]]
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 1.0, 0.8]])
prior_stddevs = tf.constant(2 * [4 * [.1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
coder = mean_stddev_box_coder.MeanStddevBoxCoder()
decoded_boxes = coder.decode(rel_codes, priors)
decoded_box_corners = decoded_boxes.get()
with self.test_session() as sess:
decoded_out = sess.run(decoded_box_corners)
self.assertAllClose(decoded_out, expected_box_corners)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Square box coder.
Square box coder follows the coding schema described below:
l = sqrt(h * w)
la = sqrt(ha * wa)
ty = (y - ya) / la
tx = (x - xa) / la
tl = log(l / la)
where x, y, w, h denote the box's center coordinates, width, and height,
respectively. Similarly, xa, ya, wa, ha denote the anchor's center
coordinates, width and height. tx, ty, tl denote the anchor-encoded
center, and length, respectively. Because the encoded box is a square, only
one length is encoded.
This has shown to provide performance improvements over the Faster RCNN box
coder when the objects being detected tend to be square (e.g. faces) and when
the input images are not distorted via resizing.
"""
import tensorflow as tf
from object_detection.core import box_coder
from object_detection.core import box_list
EPSILON = 1e-8
class SquareBoxCoder(box_coder.BoxCoder):
"""Encodes a 3-scalar representation of a square box."""
def __init__(self, scale_factors=None):
"""Constructor for SquareBoxCoder.
Args:
scale_factors: List of 3 positive scalars to scale ty, tx, and tl.
If set to None, does not perform scaling. For faster RCNN,
the open-source implementation recommends using [10.0, 10.0, 5.0].
Raises:
ValueError: If scale_factors is not length 3 or contains values less than
or equal to 0.
"""
if scale_factors:
if len(scale_factors) != 3:
raise ValueError('The argument scale_factors must be a list of length '
'3.')
if any(scalar <= 0 for scalar in scale_factors):
raise ValueError('The values in scale_factors must all be greater '
'than 0.')
self._scale_factors = scale_factors
@property
def code_size(self):
return 3
def _encode(self, boxes, anchors):
"""Encodes a box collection with respect to an anchor collection.
Args:
boxes: BoxList holding N boxes to be encoded.
anchors: BoxList of anchors.
Returns:
a tensor representing N anchor-encoded boxes of the format
[ty, tx, tl].
"""
# Convert anchors to the center coordinate representation.
ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
la = tf.sqrt(ha * wa)
ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
l = tf.sqrt(h * w)
# Avoid NaN in division and log below.
la += EPSILON
l += EPSILON
tx = (xcenter - xcenter_a) / la
ty = (ycenter - ycenter_a) / la
tl = tf.log(l / la)
# Scales location targets for joint training.
if self._scale_factors:
ty *= self._scale_factors[0]
tx *= self._scale_factors[1]
tl *= self._scale_factors[2]
return tf.transpose(tf.stack([ty, tx, tl]))
def _decode(self, rel_codes, anchors):
"""Decodes relative codes to boxes.
Args:
rel_codes: a tensor representing N anchor-encoded boxes.
anchors: BoxList of anchors.
Returns:
boxes: BoxList holding N bounding boxes.
"""
ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
la = tf.sqrt(ha * wa)
ty, tx, tl = tf.unstack(tf.transpose(rel_codes))
if self._scale_factors:
ty /= self._scale_factors[0]
tx /= self._scale_factors[1]
tl /= self._scale_factors[2]
l = tf.exp(tl) * la
ycenter = ty * la + ycenter_a
xcenter = tx * la + xcenter_a
ymin = ycenter - l / 2.
xmin = xcenter - l / 2.
ymax = ycenter + l / 2.
xmax = xcenter + l / 2.
return box_list.BoxList(tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.box_coder.square_box_coder."""
import tensorflow as tf
from object_detection.box_coders import square_box_coder
from object_detection.core import box_list
class SquareBoxCoderTest(tf.test.TestCase):
def test_correct_relative_codes_with_default_scale(self):
boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
scale_factors = None
expected_rel_codes = [[-0.790569, -0.263523, -0.293893],
[-0.068041, -0.272166, -0.89588]]
boxes = box_list.BoxList(tf.constant(boxes))
anchors = box_list.BoxList(tf.constant(anchors))
coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors)
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
(rel_codes_out,) = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
def test_correct_relative_codes_with_non_default_scale(self):
boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
scale_factors = [2, 3, 4]
expected_rel_codes = [[-1.581139, -0.790569, -1.175573],
[-0.136083, -0.816497, -3.583519]]
boxes = box_list.BoxList(tf.constant(boxes))
anchors = box_list.BoxList(tf.constant(anchors))
coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors)
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
(rel_codes_out,) = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
def test_correct_relative_codes_with_small_width(self):
boxes = [[10.0, 10.0, 10.0000001, 20.0]]
anchors = [[15.0, 12.0, 30.0, 18.0]]
scale_factors = None
expected_rel_codes = [[-1.317616, 0., -20.670586]]
boxes = box_list.BoxList(tf.constant(boxes))
anchors = box_list.BoxList(tf.constant(anchors))
coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors)
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
(rel_codes_out,) = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
def test_correct_boxes_with_default_scale(self):
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
rel_codes = [[-0.5, -0.416666, -0.405465],
[-0.083333, -0.222222, -0.693147]]
scale_factors = None
expected_boxes = [[14.594306, 7.884875, 20.918861, 14.209432],
[0.155051, 0.102989, 0.522474, 0.470412]]
anchors = box_list.BoxList(tf.constant(anchors))
coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors)
boxes = coder.decode(rel_codes, anchors)
with self.test_session() as sess:
(boxes_out,) = sess.run([boxes.get()])
self.assertAllClose(boxes_out, expected_boxes)
def test_correct_boxes_with_non_default_scale(self):
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
rel_codes = [[-1., -1.25, -1.62186], [-0.166667, -0.666667, -2.772588]]
scale_factors = [2, 3, 4]
expected_boxes = [[14.594306, 7.884875, 20.918861, 14.209432],
[0.155051, 0.102989, 0.522474, 0.470412]]
anchors = box_list.BoxList(tf.constant(anchors))
coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors)
boxes = coder.decode(rel_codes, anchors)
with self.test_session() as sess:
(boxes_out,) = sess.run([boxes.get()])
self.assertAllClose(boxes_out, expected_boxes)
if __name__ == '__main__':
tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment