"...promptdiffusion/pipeline_prompt_diffusion.py" did not exist on "56806cdbfd4613447385e8ba78da30d901abea4d"
Commit f282f6ef authored by Alexander Gorban's avatar Alexander Gorban
Browse files

Merge branch 'master' of github.com:tensorflow/models

parents 58a5da7b a2970b03
# Copyright 2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# ==============================================================================
from __future__ import print_function
import os
import h5py
import json
import numpy as np
import tensorflow as tf
def log_sum_exp(x_k):
"""Computes log \sum exp in a numerically stable way.
log ( sum_i exp(x_i) )
log ( sum_i exp(x_i - m + m) ), with m = max(x_i)
log ( sum_i exp(x_i - m)*exp(m) )
log ( sum_i exp(x_i - m) + m
Args:
x_k - k -dimensional list of arguments to log_sum_exp.
Returns:
log_sum_exp of the arguments.
"""
m = tf.reduce_max(x_k)
x1_k = x_k - m
u_k = tf.exp(x1_k)
z = tf.reduce_sum(u_k)
return tf.log(z) + m
def linear(x, out_size, do_bias=True, alpha=1.0, identity_if_possible=False,
normalized=False, name=None, collections=None):
"""Linear (affine) transformation, y = x W + b, for a variety of
configurations.
Args:
x: input The tensor to tranformation.
out_size: The integer size of non-batch output dimension.
do_bias (optional): Add a learnable bias vector to the operation.
alpha (optional): A multiplicative scaling for the weight initialization
of the matrix, in the form \alpha * 1/\sqrt{x.shape[1]}.
identity_if_possible (optional): just return identity,
if x.shape[1] == out_size.
normalized (optional): Option to divide out by the norms of the rows of W.
name (optional): The name prefix to add to variables.
collections (optional): List of additional collections. (Placed in
tf.GraphKeys.GLOBAL_VARIABLES already, so no need for that.)
Returns:
In the equation, y = x W + b, returns the tensorflow op that yields y.
"""
in_size = int(x.get_shape()[1]) # from Dimension(10) -> 10
stddev = alpha/np.sqrt(float(in_size))
mat_init = tf.random_normal_initializer(0.0, stddev)
wname = (name + "/W") if name else "/W"
if identity_if_possible and in_size == out_size:
# Sometimes linear layers are nothing more than size adapters.
return tf.identity(x, name=(wname+'_ident'))
W,b = init_linear(in_size, out_size, do_bias=do_bias, alpha=alpha,
normalized=normalized, name=name, collections=collections)
if do_bias:
return tf.matmul(x, W) + b
else:
return tf.matmul(x, W)
def init_linear(in_size, out_size, do_bias=True, mat_init_value=None, alpha=1.0,
identity_if_possible=False, normalized=False,
name=None, collections=None):
"""Linear (affine) transformation, y = x W + b, for a variety of
configurations.
Args:
in_size: The integer size of the non-batc input dimension. [(x),y]
out_size: The integer size of non-batch output dimension. [x,(y)]
do_bias (optional): Add a learnable bias vector to the operation.
mat_init_value (optional): numpy constant for matrix initialization, if None
, do random, with additional parameters.
alpha (optional): A multiplicative scaling for the weight initialization
of the matrix, in the form \alpha * 1/\sqrt{x.shape[1]}.
identity_if_possible (optional): just return identity,
if x.shape[1] == out_size.
normalized (optional): Option to divide out by the norms of the rows of W.
name (optional): The name prefix to add to variables.
collections (optional): List of additional collections. (Placed in
tf.GraphKeys.GLOBAL_VARIABLES already, so no need for that.)
Returns:
In the equation, y = x W + b, returns the pair (W, b).
"""
if mat_init_value is not None and mat_init_value.shape != (in_size, out_size):
raise ValueError(
'Provided mat_init_value must have shape [%d, %d].'%(in_size, out_size))
if mat_init_value is None:
stddev = alpha/np.sqrt(float(in_size))
mat_init = tf.random_normal_initializer(0.0, stddev)
wname = (name + "/W") if name else "/W"
if identity_if_possible and in_size == out_size:
return (tf.constant(np.eye(in_size).astype(np.float32)),
tf.zeros(in_size))
# Note the use of get_variable vs. tf.Variable. this is because get_variable
# does not allow the initialization of the variable with a value.
if normalized:
w_collections = [tf.GraphKeys.GLOBAL_VARIABLES, "norm-variables"]
if collections:
w_collections += collections
if mat_init_value is not None:
w = tf.Variable(mat_init_value, name=wname, collections=w_collections)
else:
w = tf.get_variable(wname, [in_size, out_size], initializer=mat_init,
collections=w_collections)
w = tf.nn.l2_normalize(w, dim=0) # x W, so xW_j = \sum_i x_bi W_ij
else:
w_collections = [tf.GraphKeys.GLOBAL_VARIABLES]
if collections:
w_collections += collections
if mat_init_value is not None:
w = tf.Variable(mat_init_value, name=wname, collections=w_collections)
else:
w = tf.get_variable(wname, [in_size, out_size], initializer=mat_init,
collections=w_collections)
if do_bias:
b_collections = [tf.GraphKeys.GLOBAL_VARIABLES]
if collections:
b_collections += collections
bname = (name + "/b") if name else "/b"
b = tf.get_variable(bname, [1, out_size],
initializer=tf.zeros_initializer(),
collections=b_collections)
else:
b = None
return (w, b)
def write_data(data_fname, data_dict, use_json=False, compression=None):
"""Write data in HD5F format.
Args:
data_fname: The filename of teh file in which to write the data.
data_dict: The dictionary of data to write. The keys are strings
and the values are numpy arrays.
use_json (optional): human readable format for simple items
compression (optional): The compression to use for h5py (disabled by
default because the library borks on scalars, otherwise try 'gzip').
"""
dir_name = os.path.dirname(data_fname)
if not os.path.exists(dir_name):
os.makedirs(dir_name)
if use_json:
the_file = open(data_fname,'w')
json.dump(data_dict, the_file)
the_file.close()
else:
try:
with h5py.File(data_fname, 'w') as hf:
for k, v in data_dict.items():
clean_k = k.replace('/', '_')
if clean_k is not k:
print('Warning: saving variable with name: ', k, ' as ', clean_k)
else:
print('Saving variable with name: ', clean_k)
hf.create_dataset(clean_k, data=v, compression=compression)
except IOError:
print("Cannot open %s for writing.", data_fname)
raise
def read_data(data_fname):
""" Read saved data in HDF5 format.
Args:
data_fname: The filename of the file from which to read the data.
Returns:
A dictionary whose keys will vary depending on dataset (but should
always contain the keys 'train_data' and 'valid_data') and whose
values are numpy arrays.
"""
try:
with h5py.File(data_fname, 'r') as hf:
data_dict = {k: np.array(v) for k, v in hf.items()}
return data_dict
except IOError:
print("Cannot open %s for reading." % data_fname)
raise
def write_datasets(data_path, data_fname_stem, dataset_dict, compression=None):
"""Write datasets in HD5F format.
This function assumes the dataset_dict is a mapping ( string ->
to data_dict ). It calls write_data for each data dictionary,
post-fixing the data filename with the key of the dataset.
Args:
data_path: The path to the save directory.
data_fname_stem: The filename stem of the file in which to write the data.
dataset_dict: The dictionary of datasets. The keys are strings
and the values data dictionaries (str -> numpy arrays) associations.
compression (optional): The compression to use for h5py (disabled by
default because the library borks on scalars, otherwise try 'gzip').
"""
full_name_stem = os.path.join(data_path, data_fname_stem)
for s, data_dict in dataset_dict.items():
write_data(full_name_stem + "_" + s, data_dict, compression=compression)
def read_datasets(data_path, data_fname_stem):
"""Read dataset sin HD5F format.
This function assumes the dataset_dict is a mapping ( string ->
to data_dict ). It calls write_data for each data dictionary,
post-fixing the data filename with the key of the dataset.
Args:
data_path: The path to the save directory.
data_fname_stem: The filename stem of the file in which to write the data.
"""
dataset_dict = {}
fnames = os.listdir(data_path)
print ('loading data from ' + data_path + ' with stem ' + data_fname_stem)
for fname in fnames:
if fname.startswith(data_fname_stem):
data_dict = read_data(os.path.join(data_path,fname))
idx = len(data_fname_stem) + 1
key = fname[idx:]
data_dict['data_dim'] = data_dict['train_data'].shape[2]
data_dict['num_steps'] = data_dict['train_data'].shape[1]
dataset_dict[key] = data_dict
if len(dataset_dict) == 0:
raise ValueError("Failed to load any datasets, are you sure that the "
"'--data_dir' and '--data_filename_stem' flag values "
"are correct?")
print (str(len(dataset_dict)) + ' datasets loaded')
return dataset_dict
# NUMPY utility functions
def list_t_bxn_to_list_b_txn(values_t_bxn):
"""Convert a length T list of BxN numpy tensors of length B list of TxN numpy
tensors.
Args:
values_t_bxn: The length T list of BxN numpy tensors.
Returns:
The length B list of TxN numpy tensors.
"""
T = len(values_t_bxn)
B, N = values_t_bxn[0].shape
values_b_txn = []
for b in range(B):
values_pb_txn = np.zeros([T,N])
for t in range(T):
values_pb_txn[t,:] = values_t_bxn[t][b,:]
values_b_txn.append(values_pb_txn)
return values_b_txn
def list_t_bxn_to_tensor_bxtxn(values_t_bxn):
"""Convert a length T list of BxN numpy tensors to single numpy tensor with
shape BxTxN.
Args:
values_t_bxn: The length T list of BxN numpy tensors.
Returns:
values_bxtxn: The BxTxN numpy tensor.
"""
T = len(values_t_bxn)
B, N = values_t_bxn[0].shape
values_bxtxn = np.zeros([B,T,N])
for t in range(T):
values_bxtxn[:,t,:] = values_t_bxn[t]
return values_bxtxn
def tensor_bxtxn_to_list_t_bxn(tensor_bxtxn):
"""Convert a numpy tensor with shape BxTxN to a length T list of numpy tensors
with shape BxT.
Args:
tensor_bxtxn: The BxTxN numpy tensor.
Returns:
A length T list of numpy tensors with shape BxT.
"""
values_t_bxn = []
B, T, N = tensor_bxtxn.shape
for t in range(T):
values_t_bxn.append(np.squeeze(tensor_bxtxn[:,t,:]))
return values_t_bxn
def flatten(list_of_lists):
"""Takes a list of lists and returns a list of the elements.
Args:
list_of_lists: List of lists.
Returns:
flat_list: Flattened list.
flat_list_idxs: Flattened list indices.
"""
flat_list = []
flat_list_idxs = []
start_idx = 0
for item in list_of_lists:
if isinstance(item, list):
flat_list += item
l = len(item)
idxs = range(start_idx, start_idx+l)
start_idx = start_idx+l
else: # a value
flat_list.append(item)
idxs = [start_idx]
start_idx += 1
flat_list_idxs.append(idxs)
return flat_list, flat_list_idxs
# NeuralGPU
Code for the Neural GPU model described in [[http://arxiv.org/abs/1511.08228]].
The extended version was described in [[https://arxiv.org/abs/1610.08613]].
Code for the Neural GPU model described in http://arxiv.org/abs/1511.08228.
The extended version was described in https://arxiv.org/abs/1610.08613.
Requirements:
* TensorFlow (see tensorflow.org for how to install)
......
......@@ -478,8 +478,10 @@ class NeuralGPU(object):
# This is just for running a baseline RNN seq2seq model.
if do_rnn:
self.after_enc_step.append(step) # Not meaningful here, but needed.
lstm_cell = tf.contrib.rnn.BasicLSTMCell(height * nmaps)
cell = tf.contrib.rnn.MultiRNNCell([lstm_cell] * nconvs)
def lstm_cell():
return tf.contrib.rnn.BasicLSTMCell(height * nmaps)
cell = tf.contrib.rnn.MultiRNNCell(
[lstm_cell() for _ in range(nconvs)])
with tf.variable_scope("encoder"):
encoder_outputs, encoder_state = tf.nn.dynamic_rnn(
cell, tf.reshape(step, [batch_size, length, height * nmaps]),
......
# Tensorflow Object Detection API: main runnables.
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"])
# Apache 2.0
py_binary(
name = "train",
srcs = [
"train.py",
],
deps = [
":trainer",
"//tensorflow",
"//tensorflow_models/object_detection/builders:input_reader_builder",
"//tensorflow_models/object_detection/builders:model_builder",
"//tensorflow_models/object_detection/protos:input_reader_py_pb2",
"//tensorflow_models/object_detection/protos:model_py_pb2",
"//tensorflow_models/object_detection/protos:pipeline_py_pb2",
"//tensorflow_models/object_detection/protos:train_py_pb2",
],
)
py_library(
name = "trainer",
srcs = ["trainer.py"],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection/builders:optimizer_builder",
"//tensorflow_models/object_detection/builders:preprocessor_builder",
"//tensorflow_models/object_detection/core:batcher",
"//tensorflow_models/object_detection/core:standard_fields",
"//tensorflow_models/object_detection/utils:ops",
"//tensorflow_models/object_detection/utils:variables_helper",
"//tensorflow_models/slim:model_deploy",
],
)
py_test(
name = "trainer_test",
srcs = ["trainer_test.py"],
deps = [
":trainer",
"//tensorflow",
"//tensorflow_models/object_detection/core:losses",
"//tensorflow_models/object_detection/core:model",
"//tensorflow_models/object_detection/core:standard_fields",
"//tensorflow_models/object_detection/protos:train_py_pb2",
],
)
py_library(
name = "eval_util",
srcs = [
"eval_util.py",
],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection/utils:label_map_util",
"//tensorflow_models/object_detection/utils:object_detection_evaluation",
"//tensorflow_models/object_detection/utils:visualization_utils",
],
)
py_library(
name = "evaluator",
srcs = ["evaluator.py"],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection:eval_util",
"//tensorflow_models/object_detection/core:box_list",
"//tensorflow_models/object_detection/core:box_list_ops",
"//tensorflow_models/object_detection/core:prefetcher",
"//tensorflow_models/object_detection/core:standard_fields",
"//tensorflow_models/object_detection/protos:eval_py_pb2",
],
)
py_binary(
name = "eval",
srcs = [
"eval.py",
],
deps = [
":evaluator",
"//tensorflow",
"//tensorflow_models/object_detection/builders:input_reader_builder",
"//tensorflow_models/object_detection/builders:model_builder",
"//tensorflow_models/object_detection/protos:eval_py_pb2",
"//tensorflow_models/object_detection/protos:input_reader_py_pb2",
"//tensorflow_models/object_detection/protos:model_py_pb2",
"//tensorflow_models/object_detection/protos:pipeline_py_pb2",
"//tensorflow_models/object_detection/utils:label_map_util",
],
)
py_library(
name = "exporter",
srcs = [
"exporter.py",
],
deps = [
"//tensorflow",
"//tensorflow/python/tools:freeze_graph_lib",
"//tensorflow_models/object_detection/builders:model_builder",
"//tensorflow_models/object_detection/core:standard_fields",
"//tensorflow_models/object_detection/data_decoders:tf_example_decoder",
],
)
py_test(
name = "exporter_test",
srcs = [
"exporter_test.py",
],
deps = [
":exporter",
"//tensorflow",
"//tensorflow_models/object_detection/builders:model_builder",
"//tensorflow_models/object_detection/core:model",
"//tensorflow_models/object_detection/protos:pipeline_py_pb2",
],
)
py_binary(
name = "export_inference_graph",
srcs = [
"export_inference_graph.py",
],
deps = [
":exporter",
"//tensorflow",
"//tensorflow_models/object_detection/protos:pipeline_py_pb2",
],
)
py_binary(
name = "create_pascal_tf_record",
srcs = [
"create_pascal_tf_record.py",
],
deps = [
"//third_party/py/PIL:pil",
"//third_party/py/lxml",
"//tensorflow",
"//tensorflow_models/object_detection/utils:dataset_util",
"//tensorflow_models/object_detection/utils:label_map_util",
],
)
py_test(
name = "create_pascal_tf_record_test",
srcs = [
"create_pascal_tf_record_test.py",
],
deps = [
":create_pascal_tf_record",
"//tensorflow",
],
)
py_binary(
name = "create_pet_tf_record",
srcs = [
"create_pet_tf_record.py",
],
deps = [
"//third_party/py/PIL:pil",
"//third_party/py/lxml",
"//tensorflow",
"//tensorflow_models/object_detection/utils:dataset_util",
"//tensorflow_models/object_detection/utils:label_map_util",
],
)
# Contributing to the Tensorflow Object Detection API
Patches to Tensorflow Object Detection API are welcome!
We require contributors to fill out either the individual or corporate
Contributor License Agreement (CLA).
* If you are an individual writing original source code and you're sure you own the intellectual property, then you'll need to sign an [individual CLA](http://code.google.com/legal/individual-cla-v1.0.html).
* If you work for a company that wants to allow you to contribute your work, then you'll need to sign a [corporate CLA](http://code.google.com/legal/corporate-cla-v1.0.html).
Please follow the
[Tensorflow contributing guidelines](https://github.com/tensorflow/tensorflow/blob/master/CONTRIBUTING.md)
when submitting pull requests.
# Tensorflow Object Detection API
Creating accurate machine learning models capable of localizing and identifying
multiple objects in a single image remains a core challenge in computer vision.
The TensorFlow Object Detection API is an open source framework built on top of
TensorFlow that makes it easy to construct, train and deploy object detection
models. At Google we’ve certainly found this codebase to be useful for our
computer vision needs, and we hope that you will as well.
<p align="center">
<img src="g3doc/img/kites_detections_output.jpg" width=676 height=450>
</p>
Contributions to the codebase are welcome and we would love to hear back from
you if you find this API useful. Finally if you use the Tensorflow Object
Detection API for a research publication, please consider citing:
```
"Speed/accuracy trade-offs for modern convolutional object detectors."
Huang J, Rathod V, Sun C, Zhu M, Korattikara A, Fathi A, Fischer I, Wojna Z,
Song Y, Guadarrama S, Murphy K, CVPR 2017
```
\[[link](https://arxiv.org/abs/1611.10012)\]\[[bibtex](
https://scholar.googleusercontent.com/scholar.bib?q=info:l291WsrB-hQJ:scholar.google.com/&output=citation&scisig=AAGBfm0AAAAAWUIIlnPZ_L9jxvPwcC49kDlELtaeIyU-&scisf=4&ct=citation&cd=-1&hl=en&scfhb=1)\]
## Maintainers
* Jonathan Huang, github: [jch1](https://github.com/jch1)
* Vivek Rathod, github: [tombstone](https://github.com/tombstone)
* Derek Chow, github: [derekjchow](https://github.com/derekjchow)
* Chen Sun, github: [jesu9](https://github.com/jesu9)
* Menglong Zhu, github: [dreamdragon](https://github.com/dreamdragon)
## Table of contents
Before You Start:
* <a href='g3doc/installation.md'>Installation</a><br>
Quick Start:
* <a href='object_detection_tutorial.ipynb'>
Quick Start: Jupyter notebook for off-the-shelf inference</a><br>
* <a href="g3doc/running_pets.md">Quick Start: Training a pet detector</a><br>
Setup:
* <a href='g3doc/configuring_jobs.md'>
Configuring an object detection pipeline</a><br>
* <a href='g3doc/preparing_inputs.md'>Preparing inputs</a><br>
Running:
* <a href='g3doc/running_locally.md'>Running locally</a><br>
* <a href='g3doc/running_on_cloud.md'>Running on the cloud</a><br>
Extras:
* <a href='g3doc/detection_model_zoo.md'>Tensorflow detection model zoo</a><br>
* <a href='g3doc/exporting_models.md'>
Exporting a trained model for inference</a><br>
* <a href='g3doc/defining_your_own_model.md'>
Defining your own model architecture</a><br>
## Release information
### June 15, 2017
In addition to our base Tensorflow detection model definitions, this
release includes:
* A selection of trainable detection models, including:
* Single Shot Multibox Detector (SSD) with MobileNet,
* SSD with Inception V2,
* Region-Based Fully Convolutional Networks (R-FCN) with Resnet 101,
* Faster RCNN with Resnet 101,
* Faster RCNN with Inception Resnet v2
* Frozen weights (trained on the COCO dataset) for each of the above models to
be used for out-of-the-box inference purposes.
* A [Jupyter notebook](object_detection_tutorial.ipynb) for performing
out-of-the-box inference with one of our released models
* Convenient [local training](g3doc/running_locally.md) scripts as well as
distributed training and evaluation pipelines via
[Google Cloud](g3doc/running_on_cloud.md).
<b>Thanks to contributors</b>: Jonathan Huang, Vivek Rathod, Derek Chow,
Chen Sun, Menglong Zhu, Matthew Tang, Anoop Korattikara, Alireza Fathi, Ian Fischer, Zbigniew Wojna, Yang Song, Sergio Guadarrama, Jasper Uijlings,
Viacheslav Kovalevskyi, Kevin Murphy
# Tensorflow Object Detection API: Anchor Generator implementations.
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"])
# Apache 2.0
py_library(
name = "grid_anchor_generator",
srcs = [
"grid_anchor_generator.py",
],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection/core:anchor_generator",
"//tensorflow_models/object_detection/core:box_list",
"//tensorflow_models/object_detection/utils:ops",
],
)
py_test(
name = "grid_anchor_generator_test",
srcs = [
"grid_anchor_generator_test.py",
],
deps = [
":grid_anchor_generator",
"//tensorflow",
],
)
py_library(
name = "multiple_grid_anchor_generator",
srcs = [
"multiple_grid_anchor_generator.py",
],
deps = [
":grid_anchor_generator",
"//tensorflow",
"//tensorflow_models/object_detection/core:anchor_generator",
"//tensorflow_models/object_detection/core:box_list_ops",
],
)
py_test(
name = "multiple_grid_anchor_generator_test",
srcs = [
"multiple_grid_anchor_generator_test.py",
],
deps = [
":multiple_grid_anchor_generator",
"//third_party/py/numpy",
],
)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Generates grid anchors on the fly as used in Faster RCNN.
Generates grid anchors on the fly as described in:
"Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks"
Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun.
"""
import tensorflow as tf
from object_detection.core import anchor_generator
from object_detection.core import box_list
from object_detection.utils import ops
class GridAnchorGenerator(anchor_generator.AnchorGenerator):
"""Generates a grid of anchors at given scales and aspect ratios."""
def __init__(self,
scales=(0.5, 1.0, 2.0),
aspect_ratios=(0.5, 1.0, 2.0),
base_anchor_size=None,
anchor_stride=None,
anchor_offset=None):
"""Constructs a GridAnchorGenerator.
Args:
scales: a list of (float) scales, default=(0.5, 1.0, 2.0)
aspect_ratios: a list of (float) aspect ratios, default=(0.5, 1.0, 2.0)
base_anchor_size: base anchor size as height, width (
(length-2 float32 list, default=[256, 256])
anchor_stride: difference in centers between base anchors for adjacent
grid positions (length-2 float32 list, default=[16, 16])
anchor_offset: center of the anchor with scale and aspect ratio 1 for the
upper left element of the grid, this should be zero for
feature networks with only VALID padding and even receptive
field size, but may need additional calculation if other
padding is used (length-2 float32 tensor, default=[0, 0])
"""
# Handle argument defaults
if base_anchor_size is None:
base_anchor_size = [256, 256]
base_anchor_size = tf.constant(base_anchor_size, tf.float32)
if anchor_stride is None:
anchor_stride = [16, 16]
anchor_stride = tf.constant(anchor_stride, dtype=tf.float32)
if anchor_offset is None:
anchor_offset = [0, 0]
anchor_offset = tf.constant(anchor_offset, dtype=tf.float32)
self._scales = scales
self._aspect_ratios = aspect_ratios
self._base_anchor_size = base_anchor_size
self._anchor_stride = anchor_stride
self._anchor_offset = anchor_offset
def name_scope(self):
return 'GridAnchorGenerator'
def num_anchors_per_location(self):
"""Returns the number of anchors per spatial location.
Returns:
a list of integers, one for each expected feature map to be passed to
the `generate` function.
"""
return [len(self._scales) * len(self._aspect_ratios)]
def _generate(self, feature_map_shape_list):
"""Generates a collection of bounding boxes to be used as anchors.
Args:
feature_map_shape_list: list of pairs of convnet layer resolutions in the
format [(height_0, width_0)]. For example, setting
feature_map_shape_list=[(8, 8)] asks for anchors that correspond
to an 8x8 layer. For this anchor generator, only lists of length 1 are
allowed.
Returns:
boxes: a BoxList holding a collection of N anchor boxes
Raises:
ValueError: if feature_map_shape_list, box_specs_list do not have the same
length.
ValueError: if feature_map_shape_list does not consist of pairs of
integers
"""
if not (isinstance(feature_map_shape_list, list)
and len(feature_map_shape_list) == 1):
raise ValueError('feature_map_shape_list must be a list of length 1.')
if not all([isinstance(list_item, tuple) and len(list_item) == 2
for list_item in feature_map_shape_list]):
raise ValueError('feature_map_shape_list must be a list of pairs.')
grid_height, grid_width = feature_map_shape_list[0]
scales_grid, aspect_ratios_grid = ops.meshgrid(self._scales,
self._aspect_ratios)
scales_grid = tf.reshape(scales_grid, [-1])
aspect_ratios_grid = tf.reshape(aspect_ratios_grid, [-1])
return tile_anchors(grid_height,
grid_width,
scales_grid,
aspect_ratios_grid,
self._base_anchor_size,
self._anchor_stride,
self._anchor_offset)
def tile_anchors(grid_height,
grid_width,
scales,
aspect_ratios,
base_anchor_size,
anchor_stride,
anchor_offset):
"""Create a tiled set of anchors strided along a grid in image space.
This op creates a set of anchor boxes by placing a "basis" collection of
boxes with user-specified scales and aspect ratios centered at evenly
distributed points along a grid. The basis collection is specified via the
scale and aspect_ratios arguments. For example, setting scales=[.1, .2, .2]
and aspect ratios = [2,2,1/2] means that we create three boxes: one with scale
.1, aspect ratio 2, one with scale .2, aspect ratio 2, and one with scale .2
and aspect ratio 1/2. Each box is multiplied by "base_anchor_size" before
placing it over its respective center.
Grid points are specified via grid_height, grid_width parameters as well as
the anchor_stride and anchor_offset parameters.
Args:
grid_height: size of the grid in the y direction (int or int scalar tensor)
grid_width: size of the grid in the x direction (int or int scalar tensor)
scales: a 1-d (float) tensor representing the scale of each box in the
basis set.
aspect_ratios: a 1-d (float) tensor representing the aspect ratio of each
box in the basis set. The length of the scales and aspect_ratios tensors
must be equal.
base_anchor_size: base anchor size as [height, width]
(float tensor of shape [2])
anchor_stride: difference in centers between base anchors for adjacent grid
positions (float tensor of shape [2])
anchor_offset: center of the anchor with scale and aspect ratio 1 for the
upper left element of the grid, this should be zero for
feature networks with only VALID padding and even receptive
field size, but may need some additional calculation if other
padding is used (float tensor of shape [2])
Returns:
a BoxList holding a collection of N anchor boxes
"""
ratio_sqrts = tf.sqrt(aspect_ratios)
heights = scales / ratio_sqrts * base_anchor_size[0]
widths = scales * ratio_sqrts * base_anchor_size[1]
# Get a grid of box centers
y_centers = tf.to_float(tf.range(grid_height))
y_centers = y_centers * anchor_stride[0] + anchor_offset[0]
x_centers = tf.to_float(tf.range(grid_width))
x_centers = x_centers * anchor_stride[1] + anchor_offset[1]
x_centers, y_centers = ops.meshgrid(x_centers, y_centers)
widths_grid, x_centers_grid = ops.meshgrid(widths, x_centers)
heights_grid, y_centers_grid = ops.meshgrid(heights, y_centers)
bbox_centers = tf.stack([y_centers_grid, x_centers_grid], axis=3)
bbox_sizes = tf.stack([heights_grid, widths_grid], axis=3)
bbox_centers = tf.reshape(bbox_centers, [-1, 2])
bbox_sizes = tf.reshape(bbox_sizes, [-1, 2])
bbox_corners = _center_size_bbox_to_corners_bbox(bbox_centers, bbox_sizes)
return box_list.BoxList(bbox_corners)
def _center_size_bbox_to_corners_bbox(centers, sizes):
"""Converts bbox center-size representation to corners representation.
Args:
centers: a tensor with shape [N, 2] representing bounding box centers
sizes: a tensor with shape [N, 2] representing bounding boxes
Returns:
corners: tensor with shape [N, 4] representing bounding boxes in corners
representation
"""
return tf.concat([centers - .5 * sizes, centers + .5 * sizes], 1)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.grid_anchor_generator."""
import tensorflow as tf
from object_detection.anchor_generators import grid_anchor_generator
class GridAnchorGeneratorTest(tf.test.TestCase):
def test_construct_single_anchor(self):
"""Builds a 1x1 anchor grid to test the size of the output boxes."""
scales = [0.5, 1.0, 2.0]
aspect_ratios = [0.25, 1.0, 4.0]
anchor_offset = [7, -3]
exp_anchor_corners = [[-121, -35, 135, 29], [-249, -67, 263, 61],
[-505, -131, 519, 125], [-57, -67, 71, 61],
[-121, -131, 135, 125], [-249, -259, 263, 253],
[-25, -131, 39, 125], [-57, -259, 71, 253],
[-121, -515, 135, 509]]
anchor_generator = grid_anchor_generator.GridAnchorGenerator(
scales, aspect_ratios,
anchor_offset=anchor_offset)
anchors = anchor_generator.generate(feature_map_shape_list=[(1, 1)])
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_anchor_grid(self):
base_anchor_size = [10, 10]
anchor_stride = [19, 19]
anchor_offset = [0, 0]
scales = [0.5, 1.0, 2.0]
aspect_ratios = [1.0]
exp_anchor_corners = [[-2.5, -2.5, 2.5, 2.5], [-5., -5., 5., 5.],
[-10., -10., 10., 10.], [-2.5, 16.5, 2.5, 21.5],
[-5., 14., 5, 24], [-10., 9., 10, 29],
[16.5, -2.5, 21.5, 2.5], [14., -5., 24, 5],
[9., -10., 29, 10], [16.5, 16.5, 21.5, 21.5],
[14., 14., 24, 24], [9., 9., 29, 29]]
anchor_generator = grid_anchor_generator.GridAnchorGenerator(
scales,
aspect_ratios,
base_anchor_size=base_anchor_size,
anchor_stride=anchor_stride,
anchor_offset=anchor_offset)
anchors = anchor_generator.generate(feature_map_shape_list=[(2, 2)])
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Generates grid anchors on the fly corresponding to multiple CNN layers.
Generates grid anchors on the fly corresponding to multiple CNN layers as
described in:
"SSD: Single Shot MultiBox Detector"
Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed,
Cheng-Yang Fu, Alexander C. Berg
(see Section 2.2: Choosing scales and aspect ratios for default boxes)
"""
import numpy as np
import tensorflow as tf
from object_detection.anchor_generators import grid_anchor_generator
from object_detection.core import anchor_generator
from object_detection.core import box_list_ops
class MultipleGridAnchorGenerator(anchor_generator.AnchorGenerator):
"""Generate a grid of anchors for multiple CNN layers."""
def __init__(self,
box_specs_list,
base_anchor_size=None,
clip_window=None):
"""Constructs a MultipleGridAnchorGenerator.
To construct anchors, at multiple grid resolutions, one must provide a
list of feature_map_shape_list (e.g., [(8, 8), (4, 4)]), and for each grid
size, a corresponding list of (scale, aspect ratio) box specifications.
For example:
box_specs_list = [[(.1, 1.0), (.1, 2.0)], # for 8x8 grid
[(.2, 1.0), (.3, 1.0), (.2, 2.0)]] # for 4x4 grid
To support the fully convolutional setting, we pass grid sizes in at
generation time, while scale and aspect ratios are fixed at construction
time.
Args:
box_specs_list: list of list of (scale, aspect ratio) pairs with the
outside list having the same number of entries as feature_map_shape_list
(which is passed in at generation time).
base_anchor_size: base anchor size as [height, width]
(length-2 float tensor, default=[256, 256]).
clip_window: a tensor of shape [4] specifying a window to which all
anchors should be clipped. If clip_window is None, then no clipping
is performed.
Raises:
ValueError: if box_specs_list is not a list of list of pairs
ValueError: if clip_window is not either None or a tensor of shape [4]
"""
if isinstance(box_specs_list, list) and all(
[isinstance(list_item, list) for list_item in box_specs_list]):
self._box_specs = box_specs_list
else:
raise ValueError('box_specs_list is expected to be a '
'list of lists of pairs')
if base_anchor_size is None:
base_anchor_size = tf.constant([256, 256], dtype=tf.float32)
self._base_anchor_size = base_anchor_size
if clip_window is not None and clip_window.get_shape().as_list() != [4]:
raise ValueError('clip_window must either be None or a shape [4] tensor')
self._clip_window = clip_window
self._scales = []
self._aspect_ratios = []
for box_spec in self._box_specs:
if not all([isinstance(entry, tuple) and len(entry) == 2
for entry in box_spec]):
raise ValueError('box_specs_list is expected to be a '
'list of lists of pairs')
scales, aspect_ratios = zip(*box_spec)
self._scales.append(scales)
self._aspect_ratios.append(aspect_ratios)
def name_scope(self):
return 'MultipleGridAnchorGenerator'
def num_anchors_per_location(self):
"""Returns the number of anchors per spatial location.
Returns:
a list of integers, one for each expected feature map to be passed to
the Generate function.
"""
return [len(box_specs) for box_specs in self._box_specs]
def _generate(self,
feature_map_shape_list,
im_height=1,
im_width=1,
anchor_strides=None,
anchor_offsets=None):
"""Generates a collection of bounding boxes to be used as anchors.
The number of anchors generated for a single grid with shape MxM where we
place k boxes over each grid center is k*M^2 and thus the total number of
anchors is the sum over all grids. In our box_specs_list example
(see the constructor docstring), we would place two boxes over each grid
point on an 8x8 grid and three boxes over each grid point on a 4x4 grid and
thus end up with 2*8^2 + 3*4^2 = 176 anchors in total. The layout of the
output anchors follows the order of how the grid sizes and box_specs are
specified (with box_spec index varying the fastest, followed by width
index, then height index, then grid index).
Args:
feature_map_shape_list: list of pairs of convnet layer resolutions in the
format [(height_0, width_0), (height_1, width_1), ...]. For example,
setting feature_map_shape_list=[(8, 8), (7, 7)] asks for anchors that
correspond to an 8x8 layer followed by a 7x7 layer.
im_height: the height of the image to generate the grid for. If both
im_height and im_width are 1, the generated anchors default to
normalized coordinates, otherwise absolute coordinates are used for the
grid.
im_width: the width of the image to generate the grid for. If both
im_height and im_width are 1, the generated anchors default to
normalized coordinates, otherwise absolute coordinates are used for the
grid.
anchor_strides: list of pairs of strides (in y and x directions
respectively). For example, setting
anchor_strides=[(.25, .25), (.5, .5)] means that we want the anchors
corresponding to the first layer to be strided by .25 and those in the
second layer to be strided by .5 in both y and x directions. By
default, if anchor_strides=None, then they are set to be the reciprocal
of the corresponding grid sizes. The pairs can also be specified as
dynamic tf.int or tf.float numbers, e.g. for variable shape input
images.
anchor_offsets: list of pairs of offsets (in y and x directions
respectively). The offset specifies where we want the center of the
(0, 0)-th anchor to lie for each layer. For example, setting
anchor_offsets=[(.125, .125), (.25, .25)]) means that we want the
(0, 0)-th anchor of the first layer to lie at (.125, .125) in image
space and likewise that we want the (0, 0)-th anchor of the second
layer to lie at (.25, .25) in image space. By default, if
anchor_offsets=None, then they are set to be half of the corresponding
anchor stride. The pairs can also be specified as dynamic tf.int or
tf.float numbers, e.g. for variable shape input images.
Returns:
boxes: a BoxList holding a collection of N anchor boxes
Raises:
ValueError: if feature_map_shape_list, box_specs_list do not have the same
length.
ValueError: if feature_map_shape_list does not consist of pairs of
integers
"""
if not (isinstance(feature_map_shape_list, list)
and len(feature_map_shape_list) == len(self._box_specs)):
raise ValueError('feature_map_shape_list must be a list with the same '
'length as self._box_specs')
if not all([isinstance(list_item, tuple) and len(list_item) == 2
for list_item in feature_map_shape_list]):
raise ValueError('feature_map_shape_list must be a list of pairs.')
if not anchor_strides:
anchor_strides = [(tf.to_float(im_height) / tf.to_float(pair[0]),
tf.to_float(im_width) / tf.to_float(pair[1]))
for pair in feature_map_shape_list]
if not anchor_offsets:
anchor_offsets = [(0.5 * stride[0], 0.5 * stride[1])
for stride in anchor_strides]
for arg, arg_name in zip([anchor_strides, anchor_offsets],
['anchor_strides', 'anchor_offsets']):
if not (isinstance(arg, list) and len(arg) == len(self._box_specs)):
raise ValueError('%s must be a list with the same length '
'as self._box_specs' % arg_name)
if not all([isinstance(list_item, tuple) and len(list_item) == 2
for list_item in arg]):
raise ValueError('%s must be a list of pairs.' % arg_name)
anchor_grid_list = []
min_im_shape = tf.to_float(tf.minimum(im_height, im_width))
base_anchor_size = min_im_shape * self._base_anchor_size
for grid_size, scales, aspect_ratios, stride, offset in zip(
feature_map_shape_list, self._scales, self._aspect_ratios,
anchor_strides, anchor_offsets):
anchor_grid_list.append(
grid_anchor_generator.tile_anchors(
grid_height=grid_size[0],
grid_width=grid_size[1],
scales=scales,
aspect_ratios=aspect_ratios,
base_anchor_size=base_anchor_size,
anchor_stride=stride,
anchor_offset=offset))
concatenated_anchors = box_list_ops.concatenate(anchor_grid_list)
num_anchors = concatenated_anchors.num_boxes_static()
if num_anchors is None:
num_anchors = concatenated_anchors.num_boxes()
if self._clip_window is not None:
clip_window = tf.multiply(
tf.to_float([im_height, im_width, im_height, im_width]),
self._clip_window)
concatenated_anchors = box_list_ops.clip_to_window(
concatenated_anchors, clip_window, filter_nonoverlapping=False)
# TODO: make reshape an option for the clip_to_window op
concatenated_anchors.set(
tf.reshape(concatenated_anchors.get(), [num_anchors, 4]))
stddevs_tensor = 0.01 * tf.ones(
[num_anchors, 4], dtype=tf.float32, name='stddevs')
concatenated_anchors.add_field('stddev', stddevs_tensor)
return concatenated_anchors
def create_ssd_anchors(num_layers=6,
min_scale=0.2,
max_scale=0.95,
aspect_ratios=(1.0, 2.0, 3.0, 1.0/2, 1.0/3),
base_anchor_size=None,
reduce_boxes_in_lowest_layer=True):
"""Creates MultipleGridAnchorGenerator for SSD anchors.
This function instantiates a MultipleGridAnchorGenerator that reproduces
``default box`` construction proposed by Liu et al in the SSD paper.
See Section 2.2 for details. Grid sizes are assumed to be passed in
at generation time from finest resolution to coarsest resolution --- this is
used to (linearly) interpolate scales of anchor boxes corresponding to the
intermediate grid sizes.
Anchors that are returned by calling the `generate` method on the returned
MultipleGridAnchorGenerator object are always in normalized coordinates
and clipped to the unit square: (i.e. all coordinates lie in [0, 1]x[0, 1]).
Args:
num_layers: integer number of grid layers to create anchors for (actual
grid sizes passed in at generation time)
min_scale: scale of anchors corresponding to finest resolution (float)
max_scale: scale of anchors corresponding to coarsest resolution (float)
aspect_ratios: list or tuple of (float) aspect ratios to place on each
grid point.
base_anchor_size: base anchor size as [height, width].
reduce_boxes_in_lowest_layer: a boolean to indicate whether the fixed 3
boxes per location is used in the lowest layer.
Returns:
a MultipleGridAnchorGenerator
"""
if base_anchor_size is None:
base_anchor_size = [1.0, 1.0]
base_anchor_size = tf.constant(base_anchor_size, dtype=tf.float32)
box_specs_list = []
scales = [min_scale + (max_scale - min_scale) * i / (num_layers - 1)
for i in range(num_layers)] + [1.0]
for layer, scale, scale_next in zip(
range(num_layers), scales[:-1], scales[1:]):
layer_box_specs = []
if layer == 0 and reduce_boxes_in_lowest_layer:
layer_box_specs = [(0.1, 1.0), (scale, 2.0), (scale, 0.5)]
else:
for aspect_ratio in aspect_ratios:
layer_box_specs.append((scale, aspect_ratio))
if aspect_ratio == 1.0:
layer_box_specs.append((np.sqrt(scale*scale_next), 1.0))
box_specs_list.append(layer_box_specs)
return MultipleGridAnchorGenerator(box_specs_list, base_anchor_size)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for anchor_generators.multiple_grid_anchor_generator_test.py."""
import numpy as np
import tensorflow as tf
from object_detection.anchor_generators import multiple_grid_anchor_generator as ag
class MultipleGridAnchorGeneratorTest(tf.test.TestCase):
def test_construct_single_anchor_grid(self):
"""Builds a 1x1 anchor grid to test the size of the output boxes."""
exp_anchor_corners = [[-121, -35, 135, 29], [-249, -67, 263, 61],
[-505, -131, 519, 125], [-57, -67, 71, 61],
[-121, -131, 135, 125], [-249, -259, 263, 253],
[-25, -131, 39, 125], [-57, -259, 71, 253],
[-121, -515, 135, 509]]
base_anchor_size = tf.constant([256, 256], dtype=tf.float32)
box_specs_list = [[(.5, .25), (1.0, .25), (2.0, .25),
(.5, 1.0), (1.0, 1.0), (2.0, 1.0),
(.5, 4.0), (1.0, 4.0), (2.0, 4.0)]]
anchor_generator = ag.MultipleGridAnchorGenerator(
box_specs_list, base_anchor_size)
anchors = anchor_generator.generate(feature_map_shape_list=[(1, 1)],
anchor_strides=[(16, 16)],
anchor_offsets=[(7, -3)])
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_anchor_grid(self):
base_anchor_size = tf.constant([10, 10], dtype=tf.float32)
box_specs_list = [[(0.5, 1.0), (1.0, 1.0), (2.0, 1.0)]]
exp_anchor_corners = [[-2.5, -2.5, 2.5, 2.5], [-5., -5., 5., 5.],
[-10., -10., 10., 10.], [-2.5, 16.5, 2.5, 21.5],
[-5., 14., 5, 24], [-10., 9., 10, 29],
[16.5, -2.5, 21.5, 2.5], [14., -5., 24, 5],
[9., -10., 29, 10], [16.5, 16.5, 21.5, 21.5],
[14., 14., 24, 24], [9., 9., 29, 29]]
anchor_generator = ag.MultipleGridAnchorGenerator(
box_specs_list, base_anchor_size)
anchors = anchor_generator.generate(feature_map_shape_list=[(2, 2)],
anchor_strides=[(19, 19)],
anchor_offsets=[(0, 0)])
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_anchor_grid_non_square(self):
base_anchor_size = tf.constant([1, 1], dtype=tf.float32)
box_specs_list = [[(1.0, 1.0)]]
exp_anchor_corners = [[0., -0.25, 1., 0.75], [0., 0.25, 1., 1.25]]
anchor_generator = ag.MultipleGridAnchorGenerator(box_specs_list,
base_anchor_size)
anchors = anchor_generator.generate(feature_map_shape_list=[(tf.constant(
1, dtype=tf.int32), tf.constant(2, dtype=tf.int32))])
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_anchor_grid_unnormalized(self):
base_anchor_size = tf.constant([1, 1], dtype=tf.float32)
box_specs_list = [[(1.0, 1.0)]]
exp_anchor_corners = [[0., 0., 320., 320.], [0., 320., 320., 640.]]
anchor_generator = ag.MultipleGridAnchorGenerator(box_specs_list,
base_anchor_size)
anchors = anchor_generator.generate(
feature_map_shape_list=[(tf.constant(1, dtype=tf.int32), tf.constant(
2, dtype=tf.int32))],
im_height=320,
im_width=640)
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_multiple_grids(self):
base_anchor_size = tf.constant([1.0, 1.0], dtype=tf.float32)
box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
[(1.0, 1.0), (1.0, 0.5)]]
# height and width of box with .5 aspect ratio
h = np.sqrt(2)
w = 1.0/np.sqrt(2)
exp_small_grid_corners = [[-.25, -.25, .75, .75],
[.25-.5*h, .25-.5*w, .25+.5*h, .25+.5*w],
[-.25, .25, .75, 1.25],
[.25-.5*h, .75-.5*w, .25+.5*h, .75+.5*w],
[.25, -.25, 1.25, .75],
[.75-.5*h, .25-.5*w, .75+.5*h, .25+.5*w],
[.25, .25, 1.25, 1.25],
[.75-.5*h, .75-.5*w, .75+.5*h, .75+.5*w]]
# only test first entry of larger set of anchors
exp_big_grid_corners = [[.125-.5, .125-.5, .125+.5, .125+.5],
[.125-1.0, .125-1.0, .125+1.0, .125+1.0],
[.125-.5*h, .125-.5*w, .125+.5*h, .125+.5*w],]
anchor_generator = ag.MultipleGridAnchorGenerator(
box_specs_list, base_anchor_size)
anchors = anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)],
anchor_strides=[(.25, .25), (.5, .5)],
anchor_offsets=[(.125, .125),
(.25, .25)])
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
self.assertEquals(anchor_corners_out.shape, (56, 4))
big_grid_corners = anchor_corners_out[0:3, :]
small_grid_corners = anchor_corners_out[48:, :]
self.assertAllClose(small_grid_corners, exp_small_grid_corners)
self.assertAllClose(big_grid_corners, exp_big_grid_corners)
def test_construct_multiple_grids_with_clipping(self):
base_anchor_size = tf.constant([1.0, 1.0], dtype=tf.float32)
box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
[(1.0, 1.0), (1.0, 0.5)]]
# height and width of box with .5 aspect ratio
h = np.sqrt(2)
w = 1.0/np.sqrt(2)
exp_small_grid_corners = [[0, 0, .75, .75],
[0, 0, .25+.5*h, .25+.5*w],
[0, .25, .75, 1],
[0, .75-.5*w, .25+.5*h, 1],
[.25, 0, 1, .75],
[.75-.5*h, 0, 1, .25+.5*w],
[.25, .25, 1, 1],
[.75-.5*h, .75-.5*w, 1, 1]]
clip_window = tf.constant([0, 0, 1, 1], dtype=tf.float32)
anchor_generator = ag.MultipleGridAnchorGenerator(
box_specs_list, base_anchor_size, clip_window=clip_window)
anchors = anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)])
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
small_grid_corners = anchor_corners_out[48:, :]
self.assertAllClose(small_grid_corners, exp_small_grid_corners)
def test_invalid_box_specs(self):
# not all box specs are pairs
box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
[(1.0, 1.0), (1.0, 0.5, .3)]]
with self.assertRaises(ValueError):
ag.MultipleGridAnchorGenerator(box_specs_list)
# box_specs_list is not a list of lists
box_specs_list = [(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)]
with self.assertRaises(ValueError):
ag.MultipleGridAnchorGenerator(box_specs_list)
def test_invalid_generate_arguments(self):
base_anchor_size = tf.constant([1.0, 1.0], dtype=tf.float32)
box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
[(1.0, 1.0), (1.0, 0.5)]]
anchor_generator = ag.MultipleGridAnchorGenerator(
box_specs_list, base_anchor_size)
# incompatible lengths with box_specs_list
with self.assertRaises(ValueError):
anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)],
anchor_strides=[(.25, .25)],
anchor_offsets=[(.125, .125), (.25, .25)])
with self.assertRaises(ValueError):
anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2), (1, 1)],
anchor_strides=[(.25, .25), (.5, .5)],
anchor_offsets=[(.125, .125), (.25, .25)])
with self.assertRaises(ValueError):
anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)],
anchor_strides=[(.5, .5)],
anchor_offsets=[(.25, .25)])
# not pairs
with self.assertRaises(ValueError):
anchor_generator.generate(feature_map_shape_list=[(4, 4, 4), (2, 2)],
anchor_strides=[(.25, .25), (.5, .5)],
anchor_offsets=[(.125, .125), (.25, .25)])
with self.assertRaises(ValueError):
anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)],
anchor_strides=[(.25, .25, .1), (.5, .5)],
anchor_offsets=[(.125, .125),
(.25, .25)])
with self.assertRaises(ValueError):
anchor_generator.generate(feature_map_shape_list=[(4), (2, 2)],
anchor_strides=[(.25, .25), (.5, .5)],
anchor_offsets=[(.125), (.25)])
class CreateSSDAnchorsTest(tf.test.TestCase):
def test_create_ssd_anchors_returns_correct_shape(self):
anchor_generator = ag.create_ssd_anchors(
num_layers=6, min_scale=0.2, max_scale=0.95,
aspect_ratios=(1.0, 2.0, 3.0, 1.0/2, 1.0/3),
reduce_boxes_in_lowest_layer=True)
feature_map_shape_list = [(38, 38), (19, 19), (10, 10),
(5, 5), (3, 3), (1, 1)]
anchors = anchor_generator.generate(
feature_map_shape_list=feature_map_shape_list)
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
self.assertEquals(anchor_corners_out.shape, (7308, 4))
anchor_generator = ag.create_ssd_anchors(
num_layers=6, min_scale=0.2, max_scale=0.95,
aspect_ratios=(1.0, 2.0, 3.0, 1.0/2, 1.0/3),
reduce_boxes_in_lowest_layer=False)
feature_map_shape_list = [(38, 38), (19, 19), (10, 10),
(5, 5), (3, 3), (1, 1)]
anchors = anchor_generator.generate(
feature_map_shape_list=feature_map_shape_list)
anchor_corners = anchors.get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
self.assertEquals(anchor_corners_out.shape, (11640, 4))
if __name__ == '__main__':
tf.test.main()
# Tensorflow Object Detection API: Box Coder implementations.
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"])
# Apache 2.0
py_library(
name = "faster_rcnn_box_coder",
srcs = [
"faster_rcnn_box_coder.py",
],
deps = [
"//tensorflow_models/object_detection/core:box_coder",
"//tensorflow_models/object_detection/core:box_list",
],
)
py_test(
name = "faster_rcnn_box_coder_test",
srcs = [
"faster_rcnn_box_coder_test.py",
],
deps = [
":faster_rcnn_box_coder",
"//tensorflow",
"//tensorflow_models/object_detection/core:box_list",
],
)
py_library(
name = "keypoint_box_coder",
srcs = [
"keypoint_box_coder.py",
],
deps = [
"//tensorflow_models/object_detection/core:box_coder",
"//tensorflow_models/object_detection/core:box_list",
"//tensorflow_models/object_detection/core:standard_fields",
],
)
py_test(
name = "keypoint_box_coder_test",
srcs = [
"keypoint_box_coder_test.py",
],
deps = [
":keypoint_box_coder",
"//tensorflow",
"//tensorflow_models/object_detection/core:box_list",
"//tensorflow_models/object_detection/core:standard_fields",
],
)
py_library(
name = "mean_stddev_box_coder",
srcs = [
"mean_stddev_box_coder.py",
],
deps = [
"//tensorflow_models/object_detection/core:box_coder",
"//tensorflow_models/object_detection/core:box_list",
],
)
py_test(
name = "mean_stddev_box_coder_test",
srcs = [
"mean_stddev_box_coder_test.py",
],
deps = [
":mean_stddev_box_coder",
"//tensorflow",
"//tensorflow_models/object_detection/core:box_list",
],
)
py_library(
name = "square_box_coder",
srcs = [
"square_box_coder.py",
],
deps = [
"//tensorflow_models/object_detection/core:box_coder",
"//tensorflow_models/object_detection/core:box_list",
],
)
py_test(
name = "square_box_coder_test",
srcs = [
"square_box_coder_test.py",
],
deps = [
":square_box_coder",
"//tensorflow",
"//tensorflow_models/object_detection/core:box_list",
],
)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Faster RCNN box coder.
Faster RCNN box coder follows the coding schema described below:
ty = (y - ya) / ha
tx = (x - xa) / wa
th = log(h / ha)
tw = log(w / wa)
where x, y, w, h denote the box's center coordinates, width and height
respectively. Similarly, xa, ya, wa, ha denote the anchor's center
coordinates, width and height. tx, ty, tw and th denote the anchor-encoded
center, width and height respectively.
See http://arxiv.org/abs/1506.01497 for details.
"""
import tensorflow as tf
from object_detection.core import box_coder
from object_detection.core import box_list
EPSILON = 1e-8
class FasterRcnnBoxCoder(box_coder.BoxCoder):
"""Faster RCNN box coder."""
def __init__(self, scale_factors=None):
"""Constructor for FasterRcnnBoxCoder.
Args:
scale_factors: List of 4 positive scalars to scale ty, tx, th and tw.
If set to None, does not perform scaling. For Faster RCNN,
the open-source implementation recommends using [10.0, 10.0, 5.0, 5.0].
"""
if scale_factors:
assert len(scale_factors) == 4
for scalar in scale_factors:
assert scalar > 0
self._scale_factors = scale_factors
@property
def code_size(self):
return 4
def _encode(self, boxes, anchors):
"""Encode a box collection with respect to anchor collection.
Args:
boxes: BoxList holding N boxes to be encoded.
anchors: BoxList of anchors.
Returns:
a tensor representing N anchor-encoded boxes of the format
[ty, tx, th, tw].
"""
# Convert anchors to the center coordinate representation.
ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
# Avoid NaN in division and log below.
ha += EPSILON
wa += EPSILON
h += EPSILON
w += EPSILON
tx = (xcenter - xcenter_a) / wa
ty = (ycenter - ycenter_a) / ha
tw = tf.log(w / wa)
th = tf.log(h / ha)
# Scales location targets as used in paper for joint training.
if self._scale_factors:
ty *= self._scale_factors[0]
tx *= self._scale_factors[1]
th *= self._scale_factors[2]
tw *= self._scale_factors[3]
return tf.transpose(tf.stack([ty, tx, th, tw]))
def _decode(self, rel_codes, anchors):
"""Decode relative codes to boxes.
Args:
rel_codes: a tensor representing N anchor-encoded boxes.
anchors: BoxList of anchors.
Returns:
boxes: BoxList holding N bounding boxes.
"""
ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
ty, tx, th, tw = tf.unstack(tf.transpose(rel_codes))
if self._scale_factors:
ty /= self._scale_factors[0]
tx /= self._scale_factors[1]
th /= self._scale_factors[2]
tw /= self._scale_factors[3]
w = tf.exp(tw) * wa
h = tf.exp(th) * ha
ycenter = ty * ha + ycenter_a
xcenter = tx * wa + xcenter_a
ymin = ycenter - h / 2.
xmin = xcenter - w / 2.
ymax = ycenter + h / 2.
xmax = xcenter + w / 2.
return box_list.BoxList(tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.box_coder.faster_rcnn_box_coder."""
import tensorflow as tf
from object_detection.box_coders import faster_rcnn_box_coder
from object_detection.core import box_list
class FasterRcnnBoxCoderTest(tf.test.TestCase):
def test_get_correct_relative_codes_after_encoding(self):
boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
expected_rel_codes = [[-0.5, -0.416666, -0.405465, -0.182321],
[-0.083333, -0.222222, -0.693147, -1.098612]]
boxes = box_list.BoxList(tf.constant(boxes))
anchors = box_list.BoxList(tf.constant(anchors))
coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
rel_codes_out, = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
def test_get_correct_relative_codes_after_encoding_with_scaling(self):
boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
scale_factors = [2, 3, 4, 5]
expected_rel_codes = [[-1., -1.25, -1.62186, -0.911608],
[-0.166667, -0.666667, -2.772588, -5.493062]]
boxes = box_list.BoxList(tf.constant(boxes))
anchors = box_list.BoxList(tf.constant(anchors))
coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
scale_factors=scale_factors)
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
rel_codes_out, = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
def test_get_correct_boxes_after_decoding(self):
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
rel_codes = [[-0.5, -0.416666, -0.405465, -0.182321],
[-0.083333, -0.222222, -0.693147, -1.098612]]
expected_boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
anchors = box_list.BoxList(tf.constant(anchors))
coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
boxes = coder.decode(rel_codes, anchors)
with self.test_session() as sess:
boxes_out, = sess.run([boxes.get()])
self.assertAllClose(boxes_out, expected_boxes)
def test_get_correct_boxes_after_decoding_with_scaling(self):
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
rel_codes = [[-1., -1.25, -1.62186, -0.911608],
[-0.166667, -0.666667, -2.772588, -5.493062]]
scale_factors = [2, 3, 4, 5]
expected_boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
anchors = box_list.BoxList(tf.constant(anchors))
coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
scale_factors=scale_factors)
boxes = coder.decode(rel_codes, anchors)
with self.test_session() as sess:
boxes_out, = sess.run([boxes.get()])
self.assertAllClose(boxes_out, expected_boxes)
def test_very_small_Width_nan_after_encoding(self):
boxes = [[10.0, 10.0, 10.0000001, 20.0]]
anchors = [[15.0, 12.0, 30.0, 18.0]]
expected_rel_codes = [[-0.833333, 0., -21.128731, 0.510826]]
boxes = box_list.BoxList(tf.constant(boxes))
anchors = box_list.BoxList(tf.constant(anchors))
coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
rel_codes_out, = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Keypoint box coder.
The keypoint box coder follows the coding schema described below (this is
similar to the FasterRcnnBoxCoder, except that it encodes keypoints in addition
to box coordinates):
ty = (y - ya) / ha
tx = (x - xa) / wa
th = log(h / ha)
tw = log(w / wa)
tky0 = (ky0 - ya) / ha
tkx0 = (kx0 - xa) / ha
tky1 = (ky1 - ya) / ha
tkx1 = (kx1 - xa) / ha
...
where x, y, w, h denote the box's center coordinates, width and height
respectively. Similarly, xa, ya, wa, ha denote the anchor's center
coordinates, width and height. tx, ty, tw and th denote the anchor-encoded
center, width and height respectively. ky0, kx0, ky1, kx1, ... denote the
keypoints' coordinates, and tky0, tkx0, tky1, tkx1, ... denote the
anchor-encoded keypoint coordinates.
"""
import tensorflow as tf
from object_detection.core import box_coder
from object_detection.core import box_list
from object_detection.core import standard_fields as fields
EPSILON = 1e-8
class KeypointBoxCoder(box_coder.BoxCoder):
"""Keypoint box coder."""
def __init__(self, num_keypoints, scale_factors=None):
"""Constructor for KeypointBoxCoder.
Args:
num_keypoints: Number of keypoints to encode/decode.
scale_factors: List of 4 positive scalars to scale ty, tx, th and tw.
In addition to scaling ty and tx, the first 2 scalars are used to scale
the y and x coordinates of the keypoints as well. If set to None, does
not perform scaling.
"""
self._num_keypoints = num_keypoints
if scale_factors:
assert len(scale_factors) == 4
for scalar in scale_factors:
assert scalar > 0
self._scale_factors = scale_factors
self._keypoint_scale_factors = None
if scale_factors is not None:
self._keypoint_scale_factors = tf.expand_dims(tf.tile(
[tf.to_float(scale_factors[0]), tf.to_float(scale_factors[1])],
[num_keypoints]), 1)
@property
def code_size(self):
return 4 + self._num_keypoints * 2
def _encode(self, boxes, anchors):
"""Encode a box and keypoint collection with respect to anchor collection.
Args:
boxes: BoxList holding N boxes and keypoints to be encoded. Boxes are
tensors with the shape [N, 4], and keypoints are tensors with the shape
[N, num_keypoints, 2].
anchors: BoxList of anchors.
Returns:
a tensor representing N anchor-encoded boxes of the format
[ty, tx, th, tw, tky0, tkx0, tky1, tkx1, ...] where tky0 and tkx0
represent the y and x coordinates of the first keypoint, tky1 and tkx1
represent the y and x coordinates of the second keypoint, and so on.
"""
# Convert anchors to the center coordinate representation.
ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
keypoints = boxes.get_field(fields.BoxListFields.keypoints)
keypoints = tf.transpose(tf.reshape(keypoints,
[-1, self._num_keypoints * 2]))
num_boxes = boxes.num_boxes()
# Avoid NaN in division and log below.
ha += EPSILON
wa += EPSILON
h += EPSILON
w += EPSILON
tx = (xcenter - xcenter_a) / wa
ty = (ycenter - ycenter_a) / ha
tw = tf.log(w / wa)
th = tf.log(h / ha)
tiled_anchor_centers = tf.tile(
tf.stack([ycenter_a, xcenter_a]), [self._num_keypoints, 1])
tiled_anchor_sizes = tf.tile(
tf.stack([ha, wa]), [self._num_keypoints, 1])
tkeypoints = (keypoints - tiled_anchor_centers) / tiled_anchor_sizes
# Scales location targets as used in paper for joint training.
if self._scale_factors:
ty *= self._scale_factors[0]
tx *= self._scale_factors[1]
th *= self._scale_factors[2]
tw *= self._scale_factors[3]
tkeypoints *= tf.tile(self._keypoint_scale_factors, [1, num_boxes])
tboxes = tf.stack([ty, tx, th, tw])
return tf.transpose(tf.concat([tboxes, tkeypoints], 0))
def _decode(self, rel_codes, anchors):
"""Decode relative codes to boxes and keypoints.
Args:
rel_codes: a tensor with shape [N, 4 + 2 * num_keypoints] representing N
anchor-encoded boxes and keypoints
anchors: BoxList of anchors.
Returns:
boxes: BoxList holding N bounding boxes and keypoints.
"""
ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
num_codes = tf.shape(rel_codes)[0]
result = tf.unstack(tf.transpose(rel_codes))
ty, tx, th, tw = result[:4]
tkeypoints = result[4:]
if self._scale_factors:
ty /= self._scale_factors[0]
tx /= self._scale_factors[1]
th /= self._scale_factors[2]
tw /= self._scale_factors[3]
tkeypoints /= tf.tile(self._keypoint_scale_factors, [1, num_codes])
w = tf.exp(tw) * wa
h = tf.exp(th) * ha
ycenter = ty * ha + ycenter_a
xcenter = tx * wa + xcenter_a
ymin = ycenter - h / 2.
xmin = xcenter - w / 2.
ymax = ycenter + h / 2.
xmax = xcenter + w / 2.
decoded_boxes_keypoints = box_list.BoxList(
tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
tiled_anchor_centers = tf.tile(
tf.stack([ycenter_a, xcenter_a]), [self._num_keypoints, 1])
tiled_anchor_sizes = tf.tile(
tf.stack([ha, wa]), [self._num_keypoints, 1])
keypoints = tkeypoints * tiled_anchor_sizes + tiled_anchor_centers
keypoints = tf.reshape(tf.transpose(keypoints),
[-1, self._num_keypoints, 2])
decoded_boxes_keypoints.add_field(fields.BoxListFields.keypoints, keypoints)
return decoded_boxes_keypoints
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.box_coder.keypoint_box_coder."""
import tensorflow as tf
from object_detection.box_coders import keypoint_box_coder
from object_detection.core import box_list
from object_detection.core import standard_fields as fields
class KeypointBoxCoderTest(tf.test.TestCase):
def test_get_correct_relative_codes_after_encoding(self):
boxes = [[10., 10., 20., 15.],
[0.2, 0.1, 0.5, 0.4]]
keypoints = [[[15., 12.], [10., 15.]],
[[0.5, 0.3], [0.2, 0.4]]]
num_keypoints = len(keypoints[0])
anchors = [[15., 12., 30., 18.],
[0.1, 0.0, 0.7, 0.9]]
expected_rel_codes = [
[-0.5, -0.416666, -0.405465, -0.182321,
-0.5, -0.5, -0.833333, 0.],
[-0.083333, -0.222222, -0.693147, -1.098612,
0.166667, -0.166667, -0.333333, -0.055556]
]
boxes = box_list.BoxList(tf.constant(boxes))
boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints))
anchors = box_list.BoxList(tf.constant(anchors))
coder = keypoint_box_coder.KeypointBoxCoder(num_keypoints)
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
rel_codes_out, = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
def test_get_correct_relative_codes_after_encoding_with_scaling(self):
boxes = [[10., 10., 20., 15.],
[0.2, 0.1, 0.5, 0.4]]
keypoints = [[[15., 12.], [10., 15.]],
[[0.5, 0.3], [0.2, 0.4]]]
num_keypoints = len(keypoints[0])
anchors = [[15., 12., 30., 18.],
[0.1, 0.0, 0.7, 0.9]]
scale_factors = [2, 3, 4, 5]
expected_rel_codes = [
[-1., -1.25, -1.62186, -0.911608,
-1.0, -1.5, -1.666667, 0.],
[-0.166667, -0.666667, -2.772588, -5.493062,
0.333333, -0.5, -0.666667, -0.166667]
]
boxes = box_list.BoxList(tf.constant(boxes))
boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints))
anchors = box_list.BoxList(tf.constant(anchors))
coder = keypoint_box_coder.KeypointBoxCoder(
num_keypoints, scale_factors=scale_factors)
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
rel_codes_out, = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
def test_get_correct_boxes_after_decoding(self):
anchors = [[15., 12., 30., 18.],
[0.1, 0.0, 0.7, 0.9]]
rel_codes = [
[-0.5, -0.416666, -0.405465, -0.182321,
-0.5, -0.5, -0.833333, 0.],
[-0.083333, -0.222222, -0.693147, -1.098612,
0.166667, -0.166667, -0.333333, -0.055556]
]
expected_boxes = [[10., 10., 20., 15.],
[0.2, 0.1, 0.5, 0.4]]
expected_keypoints = [[[15., 12.], [10., 15.]],
[[0.5, 0.3], [0.2, 0.4]]]
num_keypoints = len(expected_keypoints[0])
anchors = box_list.BoxList(tf.constant(anchors))
coder = keypoint_box_coder.KeypointBoxCoder(num_keypoints)
boxes = coder.decode(rel_codes, anchors)
with self.test_session() as sess:
boxes_out, keypoints_out = sess.run(
[boxes.get(), boxes.get_field(fields.BoxListFields.keypoints)])
self.assertAllClose(boxes_out, expected_boxes)
self.assertAllClose(keypoints_out, expected_keypoints)
def test_get_correct_boxes_after_decoding_with_scaling(self):
anchors = [[15., 12., 30., 18.],
[0.1, 0.0, 0.7, 0.9]]
rel_codes = [
[-1., -1.25, -1.62186, -0.911608,
-1.0, -1.5, -1.666667, 0.],
[-0.166667, -0.666667, -2.772588, -5.493062,
0.333333, -0.5, -0.666667, -0.166667]
]
scale_factors = [2, 3, 4, 5]
expected_boxes = [[10., 10., 20., 15.],
[0.2, 0.1, 0.5, 0.4]]
expected_keypoints = [[[15., 12.], [10., 15.]],
[[0.5, 0.3], [0.2, 0.4]]]
num_keypoints = len(expected_keypoints[0])
anchors = box_list.BoxList(tf.constant(anchors))
coder = keypoint_box_coder.KeypointBoxCoder(
num_keypoints, scale_factors=scale_factors)
boxes = coder.decode(rel_codes, anchors)
with self.test_session() as sess:
boxes_out, keypoints_out = sess.run(
[boxes.get(), boxes.get_field(fields.BoxListFields.keypoints)])
self.assertAllClose(boxes_out, expected_boxes)
self.assertAllClose(keypoints_out, expected_keypoints)
def test_very_small_width_nan_after_encoding(self):
boxes = [[10., 10., 10.0000001, 20.]]
keypoints = [[[10., 10.], [10.0000001, 20.]]]
anchors = [[15., 12., 30., 18.]]
expected_rel_codes = [[-0.833333, 0., -21.128731, 0.510826,
-0.833333, -0.833333, -0.833333, 0.833333]]
boxes = box_list.BoxList(tf.constant(boxes))
boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints))
anchors = box_list.BoxList(tf.constant(anchors))
coder = keypoint_box_coder.KeypointBoxCoder(2)
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
rel_codes_out, = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Mean stddev box coder.
This box coder use the following coding schema to encode boxes:
rel_code = (box_corner - anchor_corner_mean) / anchor_corner_stddev.
"""
from object_detection.core import box_coder
from object_detection.core import box_list
class MeanStddevBoxCoder(box_coder.BoxCoder):
"""Mean stddev box coder."""
@property
def code_size(self):
return 4
def _encode(self, boxes, anchors):
"""Encode a box collection with respect to anchor collection.
Args:
boxes: BoxList holding N boxes to be encoded.
anchors: BoxList of N anchors. We assume that anchors has an associated
stddev field.
Returns:
a tensor representing N anchor-encoded boxes
Raises:
ValueError: if the anchors BoxList does not have a stddev field
"""
if not anchors.has_field('stddev'):
raise ValueError('anchors must have a stddev field')
box_corners = boxes.get()
means = anchors.get()
stddev = anchors.get_field('stddev')
return (box_corners - means) / stddev
def _decode(self, rel_codes, anchors):
"""Decode.
Args:
rel_codes: a tensor representing N anchor-encoded boxes.
anchors: BoxList of anchors. We assume that anchors has an associated
stddev field.
Returns:
boxes: BoxList holding N bounding boxes
Raises:
ValueError: if the anchors BoxList does not have a stddev field
"""
if not anchors.has_field('stddev'):
raise ValueError('anchors must have a stddev field')
means = anchors.get()
stddevs = anchors.get_field('stddev')
box_corners = rel_codes * stddevs + means
return box_list.BoxList(box_corners)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment