Commit 30aeec75 authored by Toby Boyd's avatar Toby Boyd Committed by GitHub
Browse files

Merge pull request #2 from tensorflow/master

Sync to tensorflow-master
parents 68a18b70 78007443
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Preprocess images and bounding boxes for detection.
We perform two sets of operations in preprocessing stage:
(a) operations that are applied to both training and testing data,
(b) operations that are applied only to training data for the purpose of
data augmentation.
A preprocessing function receives a set of inputs,
e.g. an image and bounding boxes,
performs an operation on them, and returns them.
Some examples are: randomly cropping the image, randomly mirroring the image,
randomly changing the brightness, contrast, hue and
randomly jittering the bounding boxes.
The preprocess function receives a tensor_dict which is a dictionary that maps
different field names to their tensors. For example,
tensor_dict[fields.InputDataFields.image] holds the image tensor.
The image is a rank 4 tensor: [1, height, width, channels] with
dtype=tf.float32. The groundtruth_boxes is a rank 2 tensor: [N, 4] where
in each row there is a box with [ymin xmin ymax xmax].
Boxes are in normalized coordinates meaning
their coordinate values range in [0, 1]
Important Note: In tensor_dict, images is a rank 4 tensor, but preprocessing
functions receive a rank 3 tensor for processing the image. Thus, inside the
preprocess function we squeeze the image to become a rank 3 tensor and then
we pass it to the functions. At the end of the preprocess we expand the image
back to rank 4.
"""
import sys
import tensorflow as tf
from tensorflow.python.ops import control_flow_ops
from object_detection.core import box_list
from object_detection.core import box_list_ops
from object_detection.core import keypoint_ops
from object_detection.core import standard_fields as fields
def _apply_with_random_selector(x, func, num_cases):
"""Computes func(x, sel), with sel sampled from [0...num_cases-1].
Args:
x: input Tensor.
func: Python function to apply.
num_cases: Python int32, number of cases to sample sel from.
Returns:
The result of func(x, sel), where func receives the value of the
selector as a python integer, but sel is sampled dynamically.
"""
rand_sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32)
# Pass the real x only to one of the func calls.
return control_flow_ops.merge([func(
control_flow_ops.switch(x, tf.equal(rand_sel, case))[1], case)
for case in range(num_cases)])[0]
def _apply_with_random_selector_tuples(x, func, num_cases):
"""Computes func(x, sel), with sel sampled from [0...num_cases-1].
Args:
x: A tuple of input tensors.
func: Python function to apply.
num_cases: Python int32, number of cases to sample sel from.
Returns:
The result of func(x, sel), where func receives the value of the
selector as a python integer, but sel is sampled dynamically.
"""
num_inputs = len(x)
rand_sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32)
# Pass the real x only to one of the func calls.
tuples = [list() for t in x]
for case in range(num_cases):
new_x = [control_flow_ops.switch(t, tf.equal(rand_sel, case))[1] for t in x]
output = func(tuple(new_x), case)
for j in range(num_inputs):
tuples[j].append(output[j])
for i in range(num_inputs):
tuples[i] = control_flow_ops.merge(tuples[i])[0]
return tuple(tuples)
def _random_integer(minval, maxval, seed):
"""Returns a random 0-D tensor between minval and maxval.
Args:
minval: minimum value of the random tensor.
maxval: maximum value of the random tensor.
seed: random seed.
Returns:
A random 0-D tensor between minval and maxval.
"""
return tf.random_uniform(
[], minval=minval, maxval=maxval, dtype=tf.int32, seed=seed)
def normalize_image(image, original_minval, original_maxval, target_minval,
target_maxval):
"""Normalizes pixel values in the image.
Moves the pixel values from the current [original_minval, original_maxval]
range to a the [target_minval, target_maxval] range.
Args:
image: rank 3 float32 tensor containing 1
image -> [height, width, channels].
original_minval: current image minimum value.
original_maxval: current image maximum value.
target_minval: target image minimum value.
target_maxval: target image maximum value.
Returns:
image: image which is the same shape as input image.
"""
with tf.name_scope('NormalizeImage', values=[image]):
original_minval = float(original_minval)
original_maxval = float(original_maxval)
target_minval = float(target_minval)
target_maxval = float(target_maxval)
image = tf.to_float(image)
image = tf.subtract(image, original_minval)
image = tf.multiply(image, (target_maxval - target_minval) /
(original_maxval - original_minval))
image = tf.add(image, target_minval)
return image
def flip_boxes(boxes):
"""Left-right flip the boxes.
Args:
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
Returns:
Flipped boxes.
"""
# Flip boxes.
ymin, xmin, ymax, xmax = tf.split(value=boxes, num_or_size_splits=4, axis=1)
flipped_xmin = tf.subtract(1.0, xmax)
flipped_xmax = tf.subtract(1.0, xmin)
flipped_boxes = tf.concat([ymin, flipped_xmin, ymax, flipped_xmax], 1)
return flipped_boxes
def retain_boxes_above_threshold(
boxes, labels, label_scores, masks=None, keypoints=None, threshold=0.0):
"""Retains boxes whose label score is above a given threshold.
If the label score for a box is missing (represented by NaN), the box is
retained. The boxes that don't pass the threshold will not appear in the
returned tensor.
Args:
boxes: float32 tensor of shape [num_instance, 4] representing boxes
location in normalized coordinates.
labels: rank 1 int32 tensor of shape [num_instance] containing the object
classes.
label_scores: float32 tensor of shape [num_instance] representing the
score for each box.
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks. The masks are of
the same height, width as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x normalized
coordinates.
threshold: scalar python float.
Returns:
retained_boxes: [num_retained_instance, 4]
retianed_labels: [num_retained_instance]
retained_label_scores: [num_retained_instance]
If masks, or keypoints are not None, the function also returns:
retained_masks: [num_retained_instance, height, width]
retained_keypoints: [num_retained_instance, num_keypoints, 2]
"""
with tf.name_scope('RetainBoxesAboveThreshold',
values=[boxes, labels, label_scores]):
indices = tf.where(
tf.logical_or(label_scores > threshold, tf.is_nan(label_scores)))
indices = tf.squeeze(indices, axis=1)
retained_boxes = tf.gather(boxes, indices)
retained_labels = tf.gather(labels, indices)
retained_label_scores = tf.gather(label_scores, indices)
result = [retained_boxes, retained_labels, retained_label_scores]
if masks is not None:
retained_masks = tf.gather(masks, indices)
result.append(retained_masks)
if keypoints is not None:
retained_keypoints = tf.gather(keypoints, indices)
result.append(retained_keypoints)
return result
def _flip_masks(masks):
"""Left-right flips masks.
Args:
masks: rank 3 float32 tensor with shape
[num_instances, height, width] representing instance masks.
Returns:
flipped masks: rank 3 float32 tensor with shape
[num_instances, height, width] representing instance masks.
"""
return masks[:, :, ::-1]
def random_horizontal_flip(
image,
boxes=None,
masks=None,
keypoints=None,
keypoint_flip_permutation=None,
seed=None):
"""Randomly decides whether to mirror the image and detections or not.
The probability of flipping the image is 50%.
Args:
image: rank 3 float32 tensor with shape [height, width, channels].
boxes: (optional) rank 2 float32 tensor with shape [N, 4]
containing the bounding boxes.
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks. The masks
are of the same height, width as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x
normalized coordinates.
keypoint_flip_permutation: rank 1 int32 tensor containing keypoint flip
permutation.
seed: random seed
Returns:
image: image which is the same shape as input image.
If boxes, masks, keypoints, and keypoint_flip_permutation is not None,
the function also returns the following tensors.
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
masks: rank 3 float32 tensor with shape [num_instances, height, width]
containing instance masks.
keypoints: rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]
Raises:
ValueError: if keypoints are provided but keypoint_flip_permutation is not.
"""
def _flip_image(image):
# flip image
image_flipped = tf.image.flip_left_right(image)
return image_flipped
if keypoints is not None and keypoint_flip_permutation is None:
raise ValueError(
'keypoints are provided but keypoints_flip_permutation is not provided')
with tf.name_scope('RandomHorizontalFlip', values=[image, boxes]):
result = []
# random variable defining whether to do flip or not
do_a_flip_random = tf.random_uniform([], seed=seed)
# flip only if there are bounding boxes in image!
do_a_flip_random = tf.logical_and(
tf.greater(tf.size(boxes), 0), tf.greater(do_a_flip_random, 0.5))
# flip image
image = tf.cond(do_a_flip_random, lambda: _flip_image(image), lambda: image)
result.append(image)
# flip boxes
if boxes is not None:
boxes = tf.cond(
do_a_flip_random, lambda: flip_boxes(boxes), lambda: boxes)
result.append(boxes)
# flip masks
if masks is not None:
masks = tf.cond(
do_a_flip_random, lambda: _flip_masks(masks), lambda: masks)
result.append(masks)
# flip keypoints
if keypoints is not None and keypoint_flip_permutation is not None:
permutation = keypoint_flip_permutation
keypoints = tf.cond(
do_a_flip_random,
lambda: keypoint_ops.flip_horizontal(keypoints, 0.5, permutation),
lambda: keypoints)
result.append(keypoints)
return tuple(result)
def random_pixel_value_scale(image, minval=0.9, maxval=1.1, seed=None):
"""Scales each value in the pixels of the image.
This function scales each pixel independent of the other ones.
For each value in image tensor, draws a random number between
minval and maxval and multiples the values with them.
Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
minval: lower ratio of scaling pixel values.
maxval: upper ratio of scaling pixel values.
seed: random seed.
Returns:
image: image which is the same shape as input image.
"""
with tf.name_scope('RandomPixelValueScale', values=[image]):
color_coef = tf.random_uniform(
tf.shape(image),
minval=minval,
maxval=maxval,
dtype=tf.float32,
seed=seed)
image = tf.multiply(image, color_coef)
image = tf.clip_by_value(image, 0.0, 1.0)
return image
def random_image_scale(image,
masks=None,
min_scale_ratio=0.5,
max_scale_ratio=2.0,
seed=None):
"""Scales the image size.
Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels].
masks: (optional) rank 3 float32 tensor containing masks with
size [height, width, num_masks]. The value is set to None if there are no
masks.
min_scale_ratio: minimum scaling ratio.
max_scale_ratio: maximum scaling ratio.
seed: random seed.
Returns:
image: image which is the same rank as input image.
masks: If masks is not none, resized masks which are the same rank as input
masks will be returned.
"""
with tf.name_scope('RandomImageScale', values=[image]):
result = []
image_shape = tf.shape(image)
image_height = image_shape[0]
image_width = image_shape[1]
size_coef = tf.random_uniform([],
minval=min_scale_ratio,
maxval=max_scale_ratio,
dtype=tf.float32, seed=seed)
image_newysize = tf.to_int32(
tf.multiply(tf.to_float(image_height), size_coef))
image_newxsize = tf.to_int32(
tf.multiply(tf.to_float(image_width), size_coef))
image = tf.image.resize_images(
image, [image_newysize, image_newxsize], align_corners=True)
result.append(image)
if masks:
masks = tf.image.resize_nearest_neighbor(
masks, [image_newysize, image_newxsize], align_corners=True)
result.append(masks)
return tuple(result)
def random_rgb_to_gray(image, probability=0.1, seed=None):
"""Changes the image from RGB to Grayscale with the given probability.
Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
probability: the probability of returning a grayscale image.
The probability should be a number between [0, 1].
seed: random seed.
Returns:
image: image which is the same shape as input image.
"""
def _image_to_gray(image):
image_gray1 = tf.image.rgb_to_grayscale(image)
image_gray3 = tf.image.grayscale_to_rgb(image_gray1)
return image_gray3
with tf.name_scope('RandomRGBtoGray', values=[image]):
# random variable defining whether to do flip or not
do_gray_random = tf.random_uniform([], seed=seed)
image = tf.cond(
tf.greater(do_gray_random, probability), lambda: image,
lambda: _image_to_gray(image))
return image
def random_adjust_brightness(image, max_delta=0.2):
"""Randomly adjusts brightness.
Makes sure the output image is still between 0 and 1.
Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
max_delta: how much to change the brightness. A value between [0, 1).
Returns:
image: image which is the same shape as input image.
boxes: boxes which is the same shape as input boxes.
"""
with tf.name_scope('RandomAdjustBrightness', values=[image]):
image = tf.image.random_brightness(image, max_delta)
image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)
return image
def random_adjust_contrast(image, min_delta=0.8, max_delta=1.25):
"""Randomly adjusts contrast.
Makes sure the output image is still between 0 and 1.
Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
min_delta: see max_delta.
max_delta: how much to change the contrast. Contrast will change with a
value between min_delta and max_delta. This value will be
multiplied to the current contrast of the image.
Returns:
image: image which is the same shape as input image.
"""
with tf.name_scope('RandomAdjustContrast', values=[image]):
image = tf.image.random_contrast(image, min_delta, max_delta)
image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)
return image
def random_adjust_hue(image, max_delta=0.02):
"""Randomly adjusts hue.
Makes sure the output image is still between 0 and 1.
Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
max_delta: change hue randomly with a value between 0 and max_delta.
Returns:
image: image which is the same shape as input image.
"""
with tf.name_scope('RandomAdjustHue', values=[image]):
image = tf.image.random_hue(image, max_delta)
image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)
return image
def random_adjust_saturation(image, min_delta=0.8, max_delta=1.25):
"""Randomly adjusts saturation.
Makes sure the output image is still between 0 and 1.
Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
min_delta: see max_delta.
max_delta: how much to change the saturation. Saturation will change with a
value between min_delta and max_delta. This value will be
multiplied to the current saturation of the image.
Returns:
image: image which is the same shape as input image.
"""
with tf.name_scope('RandomAdjustSaturation', values=[image]):
image = tf.image.random_saturation(image, min_delta, max_delta)
image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)
return image
def random_distort_color(image, color_ordering=0):
"""Randomly distorts color.
Randomly distorts color using a combination of brightness, hue, contrast
and saturation changes. Makes sure the output image is still between 0 and 1.
Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
color_ordering: Python int, a type of distortion (valid values: 0, 1).
Returns:
image: image which is the same shape as input image.
Raises:
ValueError: if color_ordering is not in {0, 1}.
"""
with tf.name_scope('RandomDistortColor', values=[image]):
if color_ordering == 0:
image = tf.image.random_brightness(image, max_delta=32. / 255.)
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
image = tf.image.random_hue(image, max_delta=0.2)
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
elif color_ordering == 1:
image = tf.image.random_brightness(image, max_delta=32. / 255.)
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
image = tf.image.random_hue(image, max_delta=0.2)
else:
raise ValueError('color_ordering must be in {0, 1}')
# The random_* ops do not necessarily clamp.
image = tf.clip_by_value(image, 0.0, 1.0)
return image
def random_jitter_boxes(boxes, ratio=0.05, seed=None):
"""Randomly jitter boxes in image.
Args:
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
ratio: The ratio of the box width and height that the corners can jitter.
For example if the width is 100 pixels and ratio is 0.05,
the corners can jitter up to 5 pixels in the x direction.
seed: random seed.
Returns:
boxes: boxes which is the same shape as input boxes.
"""
def random_jitter_box(box, ratio, seed):
"""Randomly jitter box.
Args:
box: bounding box [1, 1, 4].
ratio: max ratio between jittered box and original box,
a number between [0, 0.5].
seed: random seed.
Returns:
jittered_box: jittered box.
"""
rand_numbers = tf.random_uniform(
[1, 1, 4], minval=-ratio, maxval=ratio, dtype=tf.float32, seed=seed)
box_width = tf.subtract(box[0, 0, 3], box[0, 0, 1])
box_height = tf.subtract(box[0, 0, 2], box[0, 0, 0])
hw_coefs = tf.stack([box_height, box_width, box_height, box_width])
hw_rand_coefs = tf.multiply(hw_coefs, rand_numbers)
jittered_box = tf.add(box, hw_rand_coefs)
jittered_box = tf.clip_by_value(jittered_box, 0.0, 1.0)
return jittered_box
with tf.name_scope('RandomJitterBoxes', values=[boxes]):
# boxes are [N, 4]. Lets first make them [N, 1, 1, 4]
boxes_shape = tf.shape(boxes)
boxes = tf.expand_dims(boxes, 1)
boxes = tf.expand_dims(boxes, 2)
distorted_boxes = tf.map_fn(
lambda x: random_jitter_box(x, ratio, seed), boxes, dtype=tf.float32)
distorted_boxes = tf.reshape(distorted_boxes, boxes_shape)
return distorted_boxes
def _strict_random_crop_image(image,
boxes,
labels,
masks=None,
keypoints=None,
min_object_covered=1.0,
aspect_ratio_range=(0.75, 1.33),
area_range=(0.1, 1.0),
overlap_thresh=0.3):
"""Performs random crop.
Note: boxes will be clipped to the crop. Keypoint coordinates that are
outside the crop will be set to NaN, which is consistent with the original
keypoint encoding for non-existing keypoints. This function always crops
the image and is supposed to be used by `random_crop_image` function which
sometimes returns image unchanged.
Args:
image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
boxes: rank 2 float32 tensor containing the bounding boxes with shape
[num_instances, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
labels: rank 1 int32 tensor containing the object classes.
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks. The masks
are of the same height, width as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x
normalized coordinates.
min_object_covered: the cropped image must cover at least this fraction of
at least one of the input bounding boxes.
aspect_ratio_range: allowed range for aspect ratio of cropped image.
area_range: allowed range for area ratio between cropped image and the
original image.
overlap_thresh: minimum overlap thresh with new cropped
image to keep the box.
Returns:
image: image which is the same rank as input image.
boxes: boxes which is the same rank as input boxes.
Boxes are in normalized form.
labels: new labels.
If masks, or keypoints is not None, the function also returns:
masks: rank 3 float32 tensor with shape [num_instances, height, width]
containing instance masks.
keypoints: rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]
"""
with tf.name_scope('RandomCropImage', values=[image, boxes]):
image_shape = tf.shape(image)
# boxes are [N, 4]. Lets first make them [N, 1, 4].
boxes_expanded = tf.expand_dims(
tf.clip_by_value(
boxes, clip_value_min=0.0, clip_value_max=1.0), 1)
sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box(
image_shape,
bounding_boxes=boxes_expanded,
min_object_covered=min_object_covered,
aspect_ratio_range=aspect_ratio_range,
area_range=area_range,
max_attempts=100,
use_image_if_no_bounding_boxes=True)
im_box_begin, im_box_size, im_box = sample_distorted_bounding_box
new_image = tf.slice(image, im_box_begin, im_box_size)
new_image.set_shape([None, None, image.get_shape()[2]])
# [1, 4]
im_box_rank2 = tf.squeeze(im_box, squeeze_dims=[0])
# [4]
im_box_rank1 = tf.squeeze(im_box)
boxlist = box_list.BoxList(boxes)
boxlist.add_field('labels', labels)
im_boxlist = box_list.BoxList(im_box_rank2)
# remove boxes that are outside cropped image
boxlist, inside_window_ids = box_list_ops.prune_completely_outside_window(
boxlist, im_box_rank1)
# remove boxes that are outside image
overlapping_boxlist, keep_ids = box_list_ops.prune_non_overlapping_boxes(
boxlist, im_boxlist, overlap_thresh)
# change the coordinate of the remaining boxes
new_labels = overlapping_boxlist.get_field('labels')
new_boxlist = box_list_ops.change_coordinate_frame(overlapping_boxlist,
im_box_rank1)
new_boxes = new_boxlist.get()
new_boxes = tf.clip_by_value(
new_boxes, clip_value_min=0.0, clip_value_max=1.0)
result = [new_image, new_boxes, new_labels]
if masks is not None:
masks_of_boxes_inside_window = tf.gather(masks, inside_window_ids)
masks_of_boxes_completely_inside_window = tf.gather(
masks_of_boxes_inside_window, keep_ids)
masks_box_begin = [0, im_box_begin[0], im_box_begin[1]]
masks_box_size = [-1, im_box_size[0], im_box_size[1]]
new_masks = tf.slice(
masks_of_boxes_completely_inside_window,
masks_box_begin, masks_box_size)
result.append(new_masks)
if keypoints is not None:
keypoints_of_boxes_inside_window = tf.gather(keypoints, inside_window_ids)
keypoints_of_boxes_completely_inside_window = tf.gather(
keypoints_of_boxes_inside_window, keep_ids)
new_keypoints = keypoint_ops.change_coordinate_frame(
keypoints_of_boxes_completely_inside_window, im_box_rank1)
new_keypoints = keypoint_ops.prune_outside_window(new_keypoints,
[0.0, 0.0, 1.0, 1.0])
result.append(new_keypoints)
return tuple(result)
def random_crop_image(image,
boxes,
labels,
masks=None,
keypoints=None,
min_object_covered=1.0,
aspect_ratio_range=(0.75, 1.33),
area_range=(0.1, 1.0),
overlap_thresh=0.3,
random_coef=0.0,
seed=None):
"""Randomly crops the image.
Given the input image and its bounding boxes, this op randomly
crops a subimage. Given a user-provided set of input constraints,
the crop window is resampled until it satisfies these constraints.
If within 100 trials it is unable to find a valid crop, the original
image is returned. See the Args section for a description of the input
constraints. Both input boxes and returned Boxes are in normalized
form (e.g., lie in the unit square [0, 1]).
This function will return the original image with probability random_coef.
Note: boxes will be clipped to the crop. Keypoint coordinates that are
outside the crop will be set to NaN, which is consistent with the original
keypoint encoding for non-existing keypoints.
Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
boxes: rank 2 float32 tensor containing the bounding boxes with shape
[num_instances, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
labels: rank 1 int32 tensor containing the object classes.
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks. The masks
are of the same height, width as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x
normalized coordinates.
min_object_covered: the cropped image must cover at least this fraction of
at least one of the input bounding boxes.
aspect_ratio_range: allowed range for aspect ratio of cropped image.
area_range: allowed range for area ratio between cropped image and the
original image.
overlap_thresh: minimum overlap thresh with new cropped
image to keep the box.
random_coef: a random coefficient that defines the chance of getting the
original image. If random_coef is 0, we will always get the
cropped image, and if it is 1.0, we will always get the
original image.
seed: random seed.
Returns:
image: Image shape will be [new_height, new_width, channels].
boxes: boxes which is the same rank as input boxes. Boxes are in normalized
form.
labels: new labels.
If masks, or keypoints are not None, the function also returns:
masks: rank 3 float32 tensor with shape [num_instances, height, width]
containing instance masks.
keypoints: rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]
"""
def strict_random_crop_image_fn():
return _strict_random_crop_image(
image,
boxes,
labels,
masks=masks,
keypoints=keypoints,
min_object_covered=min_object_covered,
aspect_ratio_range=aspect_ratio_range,
area_range=area_range,
overlap_thresh=overlap_thresh)
# avoids tf.cond to make faster RCNN training on borg. See b/140057645.
if random_coef < sys.float_info.min:
result = strict_random_crop_image_fn()
else:
do_a_crop_random = tf.random_uniform([], seed=seed)
do_a_crop_random = tf.greater(do_a_crop_random, random_coef)
outputs = [image, boxes, labels]
if masks is not None:
outputs.append(masks)
if keypoints is not None:
outputs.append(keypoints)
result = tf.cond(do_a_crop_random,
strict_random_crop_image_fn,
lambda: tuple(outputs))
return result
def random_pad_image(image,
boxes,
min_image_size=None,
max_image_size=None,
pad_color=None,
seed=None):
"""Randomly pads the image.
This function randomly pads the image with zeros. The final size of the
padded image will be between min_image_size and max_image_size.
if min_image_size is smaller than the input image size, min_image_size will
be set to the input image size. The same for max_image_size. The input image
will be located at a uniformly random location inside the padded image.
The relative location of the boxes to the original image will remain the same.
Args:
image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
min_image_size: a tensor of size [min_height, min_width], type tf.int32.
If passed as None, will be set to image size
[height, width].
max_image_size: a tensor of size [max_height, max_width], type tf.int32.
If passed as None, will be set to twice the
image [height * 2, width * 2].
pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32.
if set as None, it will be set to average color of the input
image.
seed: random seed.
Returns:
image: Image shape will be [new_height, new_width, channels].
boxes: boxes which is the same rank as input boxes. Boxes are in normalized
form.
"""
if pad_color is None:
pad_color = tf.reduce_mean(image, reduction_indices=[0, 1])
image_shape = tf.shape(image)
image_height = image_shape[0]
image_width = image_shape[1]
if max_image_size is None:
max_image_size = tf.stack([image_height * 2, image_width * 2])
max_image_size = tf.maximum(max_image_size,
tf.stack([image_height, image_width]))
if min_image_size is None:
min_image_size = tf.stack([image_height, image_width])
min_image_size = tf.maximum(min_image_size,
tf.stack([image_height, image_width]))
target_height = tf.cond(
max_image_size[0] > min_image_size[0],
lambda: _random_integer(min_image_size[0], max_image_size[0], seed),
lambda: max_image_size[0])
target_width = tf.cond(
max_image_size[1] > min_image_size[1],
lambda: _random_integer(min_image_size[1], max_image_size[1], seed),
lambda: max_image_size[1])
offset_height = tf.cond(
target_height > image_height,
lambda: _random_integer(0, target_height - image_height, seed),
lambda: tf.constant(0, dtype=tf.int32))
offset_width = tf.cond(
target_width > image_width,
lambda: _random_integer(0, target_width - image_width, seed),
lambda: tf.constant(0, dtype=tf.int32))
new_image = tf.image.pad_to_bounding_box(
image, offset_height=offset_height, offset_width=offset_width,
target_height=target_height, target_width=target_width)
# Setting color of the padded pixels
image_ones = tf.ones_like(image)
image_ones_padded = tf.image.pad_to_bounding_box(
image_ones, offset_height=offset_height, offset_width=offset_width,
target_height=target_height, target_width=target_width)
image_color_paded = (1.0 - image_ones_padded) * pad_color
new_image += image_color_paded
# setting boxes
new_window = tf.to_float(
tf.stack([
-offset_height, -offset_width, target_height - offset_height,
target_width - offset_width
]))
new_window /= tf.to_float(
tf.stack([image_height, image_width, image_height, image_width]))
boxlist = box_list.BoxList(boxes)
new_boxlist = box_list_ops.change_coordinate_frame(boxlist, new_window)
new_boxes = new_boxlist.get()
return new_image, new_boxes
def random_crop_pad_image(image,
boxes,
labels,
min_object_covered=1.0,
aspect_ratio_range=(0.75, 1.33),
area_range=(0.1, 1.0),
overlap_thresh=0.3,
random_coef=0.0,
min_padded_size_ratio=None,
max_padded_size_ratio=None,
pad_color=None,
seed=None):
"""Randomly crops and pads the image.
Given an input image and its bounding boxes, this op first randomly crops
the image and then randomly pads the image with background values. Parameters
min_padded_size_ratio and max_padded_size_ratio, determine the range of the
final output image size. Specifically, the final image size will have a size
in the range of min_padded_size_ratio * tf.shape(image) and
max_padded_size_ratio * tf.shape(image). Note that these ratios are with
respect to the size of the original image, so we can't capture the same
effect easily by independently applying RandomCropImage
followed by RandomPadImage.
Args:
image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
labels: rank 1 int32 tensor containing the object classes.
min_object_covered: the cropped image must cover at least this fraction of
at least one of the input bounding boxes.
aspect_ratio_range: allowed range for aspect ratio of cropped image.
area_range: allowed range for area ratio between cropped image and the
original image.
overlap_thresh: minimum overlap thresh with new cropped
image to keep the box.
random_coef: a random coefficient that defines the chance of getting the
original image. If random_coef is 0, we will always get the
cropped image, and if it is 1.0, we will always get the
original image.
min_padded_size_ratio: min ratio of padded image height and width to the
input image's height and width. If None, it will
be set to [0.0, 0.0].
max_padded_size_ratio: max ratio of padded image height and width to the
input image's height and width. If None, it will
be set to [2.0, 2.0].
pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32.
if set as None, it will be set to average color of the randomly
cropped image.
seed: random seed.
Returns:
padded_image: padded image.
padded_boxes: boxes which is the same rank as input boxes. Boxes are in
normalized form.
cropped_labels: cropped labels.
"""
image_size = tf.shape(image)
image_height = image_size[0]
image_width = image_size[1]
if min_padded_size_ratio is None:
min_padded_size_ratio = tf.constant([0.0, 0.0], tf.float32)
if max_padded_size_ratio is None:
max_padded_size_ratio = tf.constant([2.0, 2.0], tf.float32)
cropped_image, cropped_boxes, cropped_labels = random_crop_image(
image=image,
boxes=boxes,
labels=labels,
min_object_covered=min_object_covered,
aspect_ratio_range=aspect_ratio_range,
area_range=area_range,
overlap_thresh=overlap_thresh,
random_coef=random_coef,
seed=seed)
min_image_size = tf.to_int32(
tf.to_float(tf.stack([image_height, image_width])) *
min_padded_size_ratio)
max_image_size = tf.to_int32(
tf.to_float(tf.stack([image_height, image_width])) *
max_padded_size_ratio)
padded_image, padded_boxes = random_pad_image(
cropped_image,
cropped_boxes,
min_image_size=min_image_size,
max_image_size=max_image_size,
pad_color=pad_color,
seed=seed)
return padded_image, padded_boxes, cropped_labels
def random_crop_to_aspect_ratio(image,
boxes,
labels,
masks=None,
keypoints=None,
aspect_ratio=1.0,
overlap_thresh=0.3,
seed=None):
"""Randomly crops an image to the specified aspect ratio.
Randomly crops the a portion of the image such that the crop is of the
specified aspect ratio, and the crop is as large as possible. If the specified
aspect ratio is larger than the aspect ratio of the image, this op will
randomly remove rows from the top and bottom of the image. If the specified
aspect ratio is less than the aspect ratio of the image, this op will randomly
remove cols from the left and right of the image. If the specified aspect
ratio is the same as the aspect ratio of the image, this op will return the
image.
Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
labels: rank 1 int32 tensor containing the object classes.
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks. The masks
are of the same height, width as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x
normalized coordinates.
aspect_ratio: the aspect ratio of cropped image.
overlap_thresh: minimum overlap thresh with new cropped
image to keep the box.
seed: random seed.
Returns:
image: image which is the same rank as input image.
boxes: boxes which is the same rank as input boxes.
Boxes are in normalized form.
labels: new labels.
If masks, or keypoints is not None, the function also returns:
masks: rank 3 float32 tensor with shape [num_instances, height, width]
containing instance masks.
keypoints: rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]
Raises:
ValueError: If image is not a 3D tensor.
"""
if len(image.get_shape()) != 3:
raise ValueError('Image should be 3D tensor')
with tf.name_scope('RandomCropToAspectRatio', values=[image]):
image_shape = tf.shape(image)
orig_height = image_shape[0]
orig_width = image_shape[1]
orig_aspect_ratio = tf.to_float(orig_width) / tf.to_float(orig_height)
new_aspect_ratio = tf.constant(aspect_ratio, dtype=tf.float32)
def target_height_fn():
return tf.to_int32(
tf.round(
tf.to_float(orig_height) * orig_aspect_ratio / new_aspect_ratio))
target_height = tf.cond(
orig_aspect_ratio >= new_aspect_ratio,
lambda: orig_height,
target_height_fn)
def target_width_fn():
return tf.to_int32(
tf.round(
tf.to_float(orig_width) * new_aspect_ratio / orig_aspect_ratio))
target_width = tf.cond(
orig_aspect_ratio <= new_aspect_ratio,
lambda: orig_width,
target_width_fn)
# either offset_height = 0 and offset_width is randomly chosen from
# [0, offset_width - target_width), or else offset_width = 0 and
# offset_height is randomly chosen from [0, offset_height - target_height)
offset_height = _random_integer(0, orig_height - target_height + 1, seed)
offset_width = _random_integer(0, orig_width - target_width + 1, seed)
new_image = tf.image.crop_to_bounding_box(
image, offset_height, offset_width, target_height, target_width)
im_box = tf.stack([
tf.to_float(offset_height) / tf.to_float(orig_height),
tf.to_float(offset_width) / tf.to_float(orig_width),
tf.to_float(offset_height + target_height) / tf.to_float(orig_height),
tf.to_float(offset_width + target_width) / tf.to_float(orig_width)
])
boxlist = box_list.BoxList(boxes)
boxlist.add_field('labels', labels)
im_boxlist = box_list.BoxList(tf.expand_dims(im_box, 0))
# remove boxes whose overlap with the image is less than overlap_thresh
overlapping_boxlist, keep_ids = box_list_ops.prune_non_overlapping_boxes(
boxlist, im_boxlist, overlap_thresh)
# change the coordinate of the remaining boxes
new_labels = overlapping_boxlist.get_field('labels')
new_boxlist = box_list_ops.change_coordinate_frame(overlapping_boxlist,
im_box)
new_boxlist = box_list_ops.clip_to_window(new_boxlist,
tf.constant(
[0.0, 0.0, 1.0, 1.0],
tf.float32))
new_boxes = new_boxlist.get()
result = [new_image, new_boxes, new_labels]
if masks is not None:
masks_inside_window = tf.gather(masks, keep_ids)
masks_box_begin = tf.stack([0, offset_height, offset_width])
masks_box_size = tf.stack([-1, target_height, target_width])
new_masks = tf.slice(masks_inside_window, masks_box_begin, masks_box_size)
result.append(new_masks)
if keypoints is not None:
keypoints_inside_window = tf.gather(keypoints, keep_ids)
new_keypoints = keypoint_ops.change_coordinate_frame(
keypoints_inside_window, im_box)
new_keypoints = keypoint_ops.prune_outside_window(new_keypoints,
[0.0, 0.0, 1.0, 1.0])
result.append(new_keypoints)
return tuple(result)
def random_black_patches(image,
max_black_patches=10,
probability=0.5,
size_to_image_ratio=0.1,
random_seed=None):
"""Randomly adds some black patches to the image.
This op adds up to max_black_patches square black patches of a fixed size
to the image where size is specified via the size_to_image_ratio parameter.
Args:
image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
max_black_patches: number of times that the function tries to add a
black box to the image.
probability: at each try, what is the chance of adding a box.
size_to_image_ratio: Determines the ratio of the size of the black patches
to the size of the image.
box_size = size_to_image_ratio *
min(image_width, image_height)
random_seed: random seed.
Returns:
image
"""
def add_black_patch_to_image(image):
"""Function for adding one patch to the image.
Args:
image: image
Returns:
image with a randomly added black box
"""
image_shape = tf.shape(image)
image_height = image_shape[0]
image_width = image_shape[1]
box_size = tf.to_int32(
tf.multiply(
tf.minimum(tf.to_float(image_height), tf.to_float(image_width)),
size_to_image_ratio))
normalized_y_min = tf.random_uniform(
[], minval=0.0, maxval=(1.0 - size_to_image_ratio), seed=random_seed)
normalized_x_min = tf.random_uniform(
[], minval=0.0, maxval=(1.0 - size_to_image_ratio), seed=random_seed)
y_min = tf.to_int32(normalized_y_min * tf.to_float(image_height))
x_min = tf.to_int32(normalized_x_min * tf.to_float(image_width))
black_box = tf.ones([box_size, box_size, 3], dtype=tf.float32)
mask = 1.0 - tf.image.pad_to_bounding_box(black_box, y_min, x_min,
image_height, image_width)
image = tf.multiply(image, mask)
return image
with tf.name_scope('RandomBlackPatchInImage', values=[image]):
for _ in range(max_black_patches):
random_prob = tf.random_uniform([], minval=0.0, maxval=1.0,
dtype=tf.float32, seed=random_seed)
image = tf.cond(
tf.greater(random_prob, probability), lambda: image,
lambda: add_black_patch_to_image(image))
return image
def image_to_float(image):
"""Used in Faster R-CNN. Casts image pixel values to float.
Args:
image: input image which might be in tf.uint8 or sth else format
Returns:
image: image in tf.float32 format.
"""
with tf.name_scope('ImageToFloat', values=[image]):
image = tf.to_float(image)
return image
def random_resize_method(image, target_size):
"""Uses a random resize method to resize the image to target size.
Args:
image: a rank 3 tensor.
target_size: a list of [target_height, target_width]
Returns:
resized image.
"""
resized_image = _apply_with_random_selector(
image,
lambda x, method: tf.image.resize_images(x, target_size, method),
num_cases=4)
return resized_image
def resize_to_range(image,
masks=None,
min_dimension=None,
max_dimension=None,
align_corners=False):
"""Resizes an image so its dimensions are within the provided value.
The output size can be described by two cases:
1. If the image can be rescaled so its minimum dimension is equal to the
provided value without the other dimension exceeding max_dimension,
then do so.
2. Otherwise, resize so the largest dimension is equal to max_dimension.
Args:
image: A 3D tensor of shape [height, width, channels]
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks.
min_dimension: (optional) (scalar) desired size of the smaller image
dimension.
max_dimension: (optional) (scalar) maximum allowed size
of the larger image dimension.
align_corners: bool. If true, exactly align all 4 corners of the input
and output. Defaults to False.
Returns:
A 3D tensor of shape [new_height, new_width, channels],
where the image has been resized (with bilinear interpolation) so that
min(new_height, new_width) == min_dimension or
max(new_height, new_width) == max_dimension.
If masks is not None, also outputs masks:
A 3D tensor of shape [num_instances, new_height, new_width]
Raises:
ValueError: if the image is not a 3D tensor.
"""
if len(image.get_shape()) != 3:
raise ValueError('Image should be 3D tensor')
with tf.name_scope('ResizeToRange', values=[image, min_dimension]):
image_shape = tf.shape(image)
orig_height = tf.to_float(image_shape[0])
orig_width = tf.to_float(image_shape[1])
orig_min_dim = tf.minimum(orig_height, orig_width)
# Calculates the larger of the possible sizes
min_dimension = tf.constant(min_dimension, dtype=tf.float32)
large_scale_factor = min_dimension / orig_min_dim
# Scaling orig_(height|width) by large_scale_factor will make the smaller
# dimension equal to min_dimension, save for floating point rounding errors.
# For reasonably-sized images, taking the nearest integer will reliably
# eliminate this error.
large_height = tf.to_int32(tf.round(orig_height * large_scale_factor))
large_width = tf.to_int32(tf.round(orig_width * large_scale_factor))
large_size = tf.stack([large_height, large_width])
if max_dimension:
# Calculates the smaller of the possible sizes, use that if the larger
# is too big.
orig_max_dim = tf.maximum(orig_height, orig_width)
max_dimension = tf.constant(max_dimension, dtype=tf.float32)
small_scale_factor = max_dimension / orig_max_dim
# Scaling orig_(height|width) by small_scale_factor will make the larger
# dimension equal to max_dimension, save for floating point rounding
# errors. For reasonably-sized images, taking the nearest integer will
# reliably eliminate this error.
small_height = tf.to_int32(tf.round(orig_height * small_scale_factor))
small_width = tf.to_int32(tf.round(orig_width * small_scale_factor))
small_size = tf.stack([small_height, small_width])
new_size = tf.cond(
tf.to_float(tf.reduce_max(large_size)) > max_dimension,
lambda: small_size, lambda: large_size)
else:
new_size = large_size
new_image = tf.image.resize_images(image, new_size,
align_corners=align_corners)
result = new_image
if masks is not None:
num_instances = tf.shape(masks)[0]
def resize_masks_branch():
new_masks = tf.expand_dims(masks, 3)
new_masks = tf.image.resize_nearest_neighbor(
new_masks, new_size, align_corners=align_corners)
new_masks = tf.squeeze(new_masks, axis=3)
return new_masks
def reshape_masks_branch():
new_masks = tf.reshape(masks, [0, new_size[0], new_size[1]])
return new_masks
masks = tf.cond(num_instances > 0,
resize_masks_branch,
reshape_masks_branch)
result = [new_image, masks]
return result
def scale_boxes_to_pixel_coordinates(image, boxes, keypoints=None):
"""Scales boxes from normalized to pixel coordinates.
Args:
image: A 3D float32 tensor of shape [height, width, channels].
boxes: A 2D float32 tensor of shape [num_boxes, 4] containing the bounding
boxes in normalized coordinates. Each row is of the form
[ymin, xmin, ymax, xmax].
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x normalized
coordinates.
Returns:
image: unchanged input image.
scaled_boxes: a 2D float32 tensor of shape [num_boxes, 4] containing the
bounding boxes in pixel coordinates.
scaled_keypoints: a 3D float32 tensor with shape
[num_instances, num_keypoints, 2] containing the keypoints in pixel
coordinates.
"""
boxlist = box_list.BoxList(boxes)
image_height = tf.shape(image)[0]
image_width = tf.shape(image)[1]
scaled_boxes = box_list_ops.scale(boxlist, image_height, image_width).get()
result = [image, scaled_boxes]
if keypoints is not None:
scaled_keypoints = keypoint_ops.scale(keypoints, image_height, image_width)
result.append(scaled_keypoints)
return tuple(result)
# pylint: disable=g-doc-return-or-yield
def resize_image(image,
masks=None,
new_height=600,
new_width=1024,
method=tf.image.ResizeMethod.BILINEAR,
align_corners=False):
"""See `tf.image.resize_images` for detailed doc."""
with tf.name_scope(
'ResizeImage',
values=[image, new_height, new_width, method, align_corners]):
new_image = tf.image.resize_images(image, [new_height, new_width],
method=method,
align_corners=align_corners)
result = new_image
if masks is not None:
num_instances = tf.shape(masks)[0]
new_size = tf.constant([new_height, new_width], dtype=tf.int32)
def resize_masks_branch():
new_masks = tf.expand_dims(masks, 3)
new_masks = tf.image.resize_nearest_neighbor(
new_masks, new_size, align_corners=align_corners)
new_masks = tf.squeeze(new_masks, axis=3)
return new_masks
def reshape_masks_branch():
new_masks = tf.reshape(masks, [0, new_size[0], new_size[1]])
return new_masks
masks = tf.cond(num_instances > 0,
resize_masks_branch,
reshape_masks_branch)
result = [new_image, masks]
return result
def subtract_channel_mean(image, means=None):
"""Normalizes an image by subtracting a mean from each channel.
Args:
image: A 3D tensor of shape [height, width, channels]
means: float list containing a mean for each channel
Returns:
normalized_images: a tensor of shape [height, width, channels]
Raises:
ValueError: if images is not a 4D tensor or if the number of means is not
equal to the number of channels.
"""
with tf.name_scope('SubtractChannelMean', values=[image, means]):
if len(image.get_shape()) != 3:
raise ValueError('Input must be of size [height, width, channels]')
if len(means) != image.get_shape()[-1]:
raise ValueError('len(means) must match the number of channels')
return image - [[means]]
def one_hot_encoding(labels, num_classes=None):
"""One-hot encodes the multiclass labels.
Example usage:
labels = tf.constant([1, 4], dtype=tf.int32)
one_hot = OneHotEncoding(labels, num_classes=5)
one_hot.eval() # evaluates to [0, 1, 0, 0, 1]
Args:
labels: A tensor of shape [None] corresponding to the labels.
num_classes: Number of classes in the dataset.
Returns:
onehot_labels: a tensor of shape [num_classes] corresponding to the one hot
encoding of the labels.
Raises:
ValueError: if num_classes is not specified.
"""
with tf.name_scope('OneHotEncoding', values=[labels]):
if num_classes is None:
raise ValueError('num_classes must be specified')
labels = tf.one_hot(labels, num_classes, 1, 0)
return tf.reduce_max(labels, 0)
def rgb_to_gray(image):
"""Converts a 3 channel RGB image to a 1 channel grayscale image.
Args:
image: Rank 3 float32 tensor containing 1 image -> [height, width, 3]
with pixel values varying between [0, 1].
Returns:
image: A single channel grayscale image -> [image, height, 1].
"""
return tf.image.rgb_to_grayscale(image)
def ssd_random_crop(image,
boxes,
labels,
masks=None,
keypoints=None,
min_object_covered=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
aspect_ratio_range=((0.5, 2.0),) * 7,
area_range=((0.1, 1.0),) * 7,
overlap_thresh=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
random_coef=(0.15,) * 7,
seed=None):
"""Random crop preprocessing with default parameters as in SSD paper.
Liu et al., SSD: Single shot multibox detector.
For further information on random crop preprocessing refer to RandomCrop
function above.
Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
labels: rank 1 int32 tensor containing the object classes.
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks. The masks
are of the same height, width as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x
normalized coordinates.
min_object_covered: the cropped image must cover at least this fraction of
at least one of the input bounding boxes.
aspect_ratio_range: allowed range for aspect ratio of cropped image.
area_range: allowed range for area ratio between cropped image and the
original image.
overlap_thresh: minimum overlap thresh with new cropped
image to keep the box.
random_coef: a random coefficient that defines the chance of getting the
original image. If random_coef is 0, we will always get the
cropped image, and if it is 1.0, we will always get the
original image.
seed: random seed.
Returns:
image: image which is the same rank as input image.
boxes: boxes which is the same rank as input boxes.
Boxes are in normalized form.
labels: new labels.
If masks, or keypoints is not None, the function also returns:
masks: rank 3 float32 tensor with shape [num_instances, height, width]
containing instance masks.
keypoints: rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]
"""
def random_crop_selector(selected_result, index):
"""Applies random_crop_image to selected result.
Args:
selected_result: A tuple containing image, boxes, labels, keypoints (if
not None), and masks (if not None).
index: The index that was randomly selected.
Returns: A tuple containing image, boxes, labels, keypoints (if not None),
and masks (if not None).
"""
i = 3
image, boxes, labels = selected_result[:i]
selected_masks = None
selected_keypoints = None
if masks is not None:
selected_masks = selected_result[i]
i += 1
if keypoints is not None:
selected_keypoints = selected_result[i]
return random_crop_image(
image=image,
boxes=boxes,
labels=labels,
masks=selected_masks,
keypoints=selected_keypoints,
min_object_covered=min_object_covered[index],
aspect_ratio_range=aspect_ratio_range[index],
area_range=area_range[index],
overlap_thresh=overlap_thresh[index],
random_coef=random_coef[index],
seed=seed)
result = _apply_with_random_selector_tuples(
tuple(
t for t in (image, boxes, labels, masks, keypoints) if t is not None),
random_crop_selector,
num_cases=len(min_object_covered))
return result
def ssd_random_crop_pad(image,
boxes,
labels,
min_object_covered=(0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
aspect_ratio_range=((0.5, 2.0),) * 6,
area_range=((0.1, 1.0),) * 6,
overlap_thresh=(0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
random_coef=(0.15,) * 6,
min_padded_size_ratio=(None,) * 6,
max_padded_size_ratio=(None,) * 6,
pad_color=(None,) * 6,
seed=None):
"""Random crop preprocessing with default parameters as in SSD paper.
Liu et al., SSD: Single shot multibox detector.
For further information on random crop preprocessing refer to RandomCrop
function above.
Args:
image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
labels: rank 1 int32 tensor containing the object classes.
min_object_covered: the cropped image must cover at least this fraction of
at least one of the input bounding boxes.
aspect_ratio_range: allowed range for aspect ratio of cropped image.
area_range: allowed range for area ratio between cropped image and the
original image.
overlap_thresh: minimum overlap thresh with new cropped
image to keep the box.
random_coef: a random coefficient that defines the chance of getting the
original image. If random_coef is 0, we will always get the
cropped image, and if it is 1.0, we will always get the
original image.
min_padded_size_ratio: min ratio of padded image height and width to the
input image's height and width. If None, it will
be set to [0.0, 0.0].
max_padded_size_ratio: max ratio of padded image height and width to the
input image's height and width. If None, it will
be set to [2.0, 2.0].
pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32.
if set as None, it will be set to average color of the randomly
cropped image.
seed: random seed.
Returns:
image: Image shape will be [new_height, new_width, channels].
boxes: boxes which is the same rank as input boxes. Boxes are in normalized
form.
new_labels: new labels.
"""
def random_crop_pad_selector(image_boxes_labels, index):
image, boxes, labels = image_boxes_labels
return random_crop_pad_image(
image,
boxes,
labels,
min_object_covered=min_object_covered[index],
aspect_ratio_range=aspect_ratio_range[index],
area_range=area_range[index],
overlap_thresh=overlap_thresh[index],
random_coef=random_coef[index],
min_padded_size_ratio=min_padded_size_ratio[index],
max_padded_size_ratio=max_padded_size_ratio[index],
pad_color=pad_color[index],
seed=seed)
new_image, new_boxes, new_labels = _apply_with_random_selector_tuples(
(image, boxes, labels),
random_crop_pad_selector,
num_cases=len(min_object_covered))
return new_image, new_boxes, new_labels
def ssd_random_crop_fixed_aspect_ratio(
image,
boxes,
labels,
masks=None,
keypoints=None,
min_object_covered=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
aspect_ratio=1.0,
area_range=((0.1, 1.0),) * 7,
overlap_thresh=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
random_coef=(0.15,) * 7,
seed=None):
"""Random crop preprocessing with default parameters as in SSD paper.
Liu et al., SSD: Single shot multibox detector.
For further information on random crop preprocessing refer to RandomCrop
function above.
The only difference is that the aspect ratio of the crops are fixed.
Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
labels: rank 1 int32 tensor containing the object classes.
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks. The masks
are of the same height, width as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x
normalized coordinates.
min_object_covered: the cropped image must cover at least this fraction of
at least one of the input bounding boxes.
aspect_ratio: aspect ratio of the cropped image.
area_range: allowed range for area ratio between cropped image and the
original image.
overlap_thresh: minimum overlap thresh with new cropped
image to keep the box.
random_coef: a random coefficient that defines the chance of getting the
original image. If random_coef is 0, we will always get the
cropped image, and if it is 1.0, we will always get the
original image.
seed: random seed.
Returns:
image: image which is the same rank as input image.
boxes: boxes which is the same rank as input boxes.
Boxes are in normalized form.
labels: new labels.
If masks, or keypoints is not None, the function also returns:
masks: rank 3 float32 tensor with shape [num_instances, height, width]
containing instance masks.
keypoints: rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]
"""
aspect_ratio_range = ((aspect_ratio, aspect_ratio),) * len(area_range)
crop_result = ssd_random_crop(image, boxes, labels, masks, keypoints,
min_object_covered, aspect_ratio_range,
area_range, overlap_thresh, random_coef, seed)
i = 3
new_image, new_boxes, new_labels = crop_result[:i]
new_masks = None
new_keypoints = None
if masks is not None:
new_masks = crop_result[i]
i += 1
if keypoints is not None:
new_keypoints = crop_result[i]
result = random_crop_to_aspect_ratio(
new_image,
new_boxes,
new_labels,
new_masks,
new_keypoints,
aspect_ratio=aspect_ratio,
seed=seed)
return result
def get_default_func_arg_map(include_instance_masks=False,
include_keypoints=False):
"""Returns the default mapping from a preprocessor function to its args.
Args:
include_instance_masks: If True, preprocessing functions will modify the
instance masks, too.
include_keypoints: If True, preprocessing functions will modify the
keypoints, too.
Returns:
A map from preprocessing functions to the arguments they receive.
"""
groundtruth_instance_masks = None
if include_instance_masks:
groundtruth_instance_masks = (
fields.InputDataFields.groundtruth_instance_masks)
groundtruth_keypoints = None
if include_keypoints:
groundtruth_keypoints = fields.InputDataFields.groundtruth_keypoints
prep_func_arg_map = {
normalize_image: (fields.InputDataFields.image,),
random_horizontal_flip: (fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes,
groundtruth_instance_masks,
groundtruth_keypoints,),
random_pixel_value_scale: (fields.InputDataFields.image,),
random_image_scale: (fields.InputDataFields.image,
groundtruth_instance_masks,),
random_rgb_to_gray: (fields.InputDataFields.image,),
random_adjust_brightness: (fields.InputDataFields.image,),
random_adjust_contrast: (fields.InputDataFields.image,),
random_adjust_hue: (fields.InputDataFields.image,),
random_adjust_saturation: (fields.InputDataFields.image,),
random_distort_color: (fields.InputDataFields.image,),
random_jitter_boxes: (fields.InputDataFields.groundtruth_boxes,),
random_crop_image: (fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_classes,
groundtruth_instance_masks,
groundtruth_keypoints,),
random_pad_image: (fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes),
random_crop_pad_image: (fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_classes),
random_crop_to_aspect_ratio: (fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_classes,
groundtruth_instance_masks,
groundtruth_keypoints,),
random_black_patches: (fields.InputDataFields.image,),
retain_boxes_above_threshold: (
fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_classes,
fields.InputDataFields.groundtruth_label_scores,
groundtruth_instance_masks,
groundtruth_keypoints,),
image_to_float: (fields.InputDataFields.image,),
random_resize_method: (fields.InputDataFields.image,),
resize_to_range: (fields.InputDataFields.image,
groundtruth_instance_masks,),
scale_boxes_to_pixel_coordinates: (
fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes,
groundtruth_keypoints,),
flip_boxes: (fields.InputDataFields.groundtruth_boxes,),
resize_image: (fields.InputDataFields.image,
groundtruth_instance_masks,),
subtract_channel_mean: (fields.InputDataFields.image,),
one_hot_encoding: (fields.InputDataFields.groundtruth_image_classes,),
rgb_to_gray: (fields.InputDataFields.image,),
ssd_random_crop: (fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_classes,
groundtruth_instance_masks,
groundtruth_keypoints,),
ssd_random_crop_pad: (fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_classes),
ssd_random_crop_fixed_aspect_ratio: (
fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_classes,
groundtruth_instance_masks,
groundtruth_keypoints,),
}
return prep_func_arg_map
def preprocess(tensor_dict, preprocess_options, func_arg_map=None):
"""Preprocess images and bounding boxes.
Various types of preprocessing (to be implemented) based on the
preprocess_options dictionary e.g. "crop image" (affects image and possibly
boxes), "white balance image" (affects only image), etc. If self._options
is None, no preprocessing is done.
Args:
tensor_dict: dictionary that contains images, boxes, and can contain other
things as well.
images-> rank 4 float32 tensor contains
1 image -> [1, height, width, 3].
with pixel values varying between [0, 1]
boxes-> rank 2 float32 tensor containing
the bounding boxes -> [N, 4].
Boxes are in normalized form meaning
their coordinates vary between [0, 1].
Each row is in the form
of [ymin, xmin, ymax, xmax].
preprocess_options: It is a list of tuples, where each tuple contains a
function and a dictionary that contains arguments and
their values.
func_arg_map: mapping from preprocessing functions to arguments that they
expect to receive and return.
Returns:
tensor_dict: which contains the preprocessed images, bounding boxes, etc.
Raises:
ValueError: (a) If the functions passed to Preprocess
are not in func_arg_map.
(b) If the arguments that a function needs
do not exist in tensor_dict.
(c) If image in tensor_dict is not rank 4
"""
if func_arg_map is None:
func_arg_map = get_default_func_arg_map()
# changes the images to image (rank 4 to rank 3) since the functions
# receive rank 3 tensor for image
if fields.InputDataFields.image in tensor_dict:
images = tensor_dict[fields.InputDataFields.image]
if len(images.get_shape()) != 4:
raise ValueError('images in tensor_dict should be rank 4')
image = tf.squeeze(images, squeeze_dims=[0])
tensor_dict[fields.InputDataFields.image] = image
# Preprocess inputs based on preprocess_options
for option in preprocess_options:
func, params = option
if func not in func_arg_map:
raise ValueError('The function %s does not exist in func_arg_map' %
(func.__name__))
arg_names = func_arg_map[func]
for a in arg_names:
if a is not None and a not in tensor_dict:
raise ValueError('The function %s requires argument %s' %
(func.__name__, a))
def get_arg(key):
return tensor_dict[key] if key is not None else None
args = [get_arg(a) for a in arg_names]
results = func(*args, **params)
if not isinstance(results, (list, tuple)):
results = (results,)
# Removes None args since the return values will not contain those.
arg_names = [arg_name for arg_name in arg_names if arg_name is not None]
for res, arg_name in zip(results, arg_names):
tensor_dict[arg_name] = res
# changes the image to images (rank 3 to rank 4) to be compatible to what
# we received in the first place
if fields.InputDataFields.image in tensor_dict:
image = tensor_dict[fields.InputDataFields.image]
images = tf.expand_dims(image, 0)
tensor_dict[fields.InputDataFields.image] = images
return tensor_dict
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.core.preprocessor."""
import numpy as np
import six
import tensorflow as tf
from object_detection.core import preprocessor
from object_detection.core import standard_fields as fields
if six.PY2:
import mock # pylint: disable=g-import-not-at-top
else:
from unittest import mock # pylint: disable=g-import-not-at-top
class PreprocessorTest(tf.test.TestCase):
def createColorfulTestImage(self):
ch255 = tf.fill([1, 100, 200, 1], tf.constant(255, dtype=tf.uint8))
ch128 = tf.fill([1, 100, 200, 1], tf.constant(128, dtype=tf.uint8))
ch0 = tf.fill([1, 100, 200, 1], tf.constant(0, dtype=tf.uint8))
imr = tf.concat([ch255, ch0, ch0], 3)
img = tf.concat([ch255, ch255, ch0], 3)
imb = tf.concat([ch255, ch0, ch255], 3)
imw = tf.concat([ch128, ch128, ch128], 3)
imu = tf.concat([imr, img], 2)
imd = tf.concat([imb, imw], 2)
im = tf.concat([imu, imd], 1)
return im
def createTestImages(self):
images_r = tf.constant([[[128, 128, 128, 128], [0, 0, 128, 128],
[0, 128, 128, 128], [192, 192, 128, 128]]],
dtype=tf.uint8)
images_r = tf.expand_dims(images_r, 3)
images_g = tf.constant([[[0, 0, 128, 128], [0, 0, 128, 128],
[0, 128, 192, 192], [192, 192, 128, 192]]],
dtype=tf.uint8)
images_g = tf.expand_dims(images_g, 3)
images_b = tf.constant([[[128, 128, 192, 0], [0, 0, 128, 192],
[0, 128, 128, 0], [192, 192, 192, 128]]],
dtype=tf.uint8)
images_b = tf.expand_dims(images_b, 3)
images = tf.concat([images_r, images_g, images_b], 3)
return images
def createTestBoxes(self):
boxes = tf.constant(
[[0.0, 0.25, 0.75, 1.0], [0.25, 0.5, 0.75, 1.0]], dtype=tf.float32)
return boxes
def createTestLabelScores(self):
return tf.constant([1.0, 0.5], dtype=tf.float32)
def createTestLabelScoresWithMissingScore(self):
return tf.constant([0.5, np.nan], dtype=tf.float32)
def createTestMasks(self):
mask = np.array([
[[255.0, 0.0, 0.0],
[255.0, 0.0, 0.0],
[255.0, 0.0, 0.0]],
[[255.0, 255.0, 0.0],
[255.0, 255.0, 0.0],
[255.0, 255.0, 0.0]]])
return tf.constant(mask, dtype=tf.float32)
def createTestKeypoints(self):
keypoints = np.array([
[[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
[[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]],
])
return tf.constant(keypoints, dtype=tf.float32)
def createTestKeypointsInsideCrop(self):
keypoints = np.array([
[[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]],
[[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]],
])
return tf.constant(keypoints, dtype=tf.float32)
def createTestKeypointsOutsideCrop(self):
keypoints = np.array([
[[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
[[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
])
return tf.constant(keypoints, dtype=tf.float32)
def createKeypointFlipPermutation(self):
return np.array([0, 2, 1], dtype=np.int32)
def createTestLabels(self):
labels = tf.constant([1, 2], dtype=tf.int32)
return labels
def createTestBoxesOutOfImage(self):
boxes = tf.constant(
[[-0.1, 0.25, 0.75, 1], [0.25, 0.5, 0.75, 1.1]], dtype=tf.float32)
return boxes
def expectedImagesAfterNormalization(self):
images_r = tf.constant([[[0, 0, 0, 0], [-1, -1, 0, 0],
[-1, 0, 0, 0], [0.5, 0.5, 0, 0]]],
dtype=tf.float32)
images_r = tf.expand_dims(images_r, 3)
images_g = tf.constant([[[-1, -1, 0, 0], [-1, -1, 0, 0],
[-1, 0, 0.5, 0.5], [0.5, 0.5, 0, 0.5]]],
dtype=tf.float32)
images_g = tf.expand_dims(images_g, 3)
images_b = tf.constant([[[0, 0, 0.5, -1], [-1, -1, 0, 0.5],
[-1, 0, 0, -1], [0.5, 0.5, 0.5, 0]]],
dtype=tf.float32)
images_b = tf.expand_dims(images_b, 3)
images = tf.concat([images_r, images_g, images_b], 3)
return images
def expectedMaxImageAfterColorScale(self):
images_r = tf.constant([[[0.1, 0.1, 0.1, 0.1], [-0.9, -0.9, 0.1, 0.1],
[-0.9, 0.1, 0.1, 0.1], [0.6, 0.6, 0.1, 0.1]]],
dtype=tf.float32)
images_r = tf.expand_dims(images_r, 3)
images_g = tf.constant([[[-0.9, -0.9, 0.1, 0.1], [-0.9, -0.9, 0.1, 0.1],
[-0.9, 0.1, 0.6, 0.6], [0.6, 0.6, 0.1, 0.6]]],
dtype=tf.float32)
images_g = tf.expand_dims(images_g, 3)
images_b = tf.constant([[[0.1, 0.1, 0.6, -0.9], [-0.9, -0.9, 0.1, 0.6],
[-0.9, 0.1, 0.1, -0.9], [0.6, 0.6, 0.6, 0.1]]],
dtype=tf.float32)
images_b = tf.expand_dims(images_b, 3)
images = tf.concat([images_r, images_g, images_b], 3)
return images
def expectedMinImageAfterColorScale(self):
images_r = tf.constant([[[-0.1, -0.1, -0.1, -0.1], [-1, -1, -0.1, -0.1],
[-1, -0.1, -0.1, -0.1], [0.4, 0.4, -0.1, -0.1]]],
dtype=tf.float32)
images_r = tf.expand_dims(images_r, 3)
images_g = tf.constant([[[-1, -1, -0.1, -0.1], [-1, -1, -0.1, -0.1],
[-1, -0.1, 0.4, 0.4], [0.4, 0.4, -0.1, 0.4]]],
dtype=tf.float32)
images_g = tf.expand_dims(images_g, 3)
images_b = tf.constant([[[-0.1, -0.1, 0.4, -1], [-1, -1, -0.1, 0.4],
[-1, -0.1, -0.1, -1], [0.4, 0.4, 0.4, -0.1]]],
dtype=tf.float32)
images_b = tf.expand_dims(images_b, 3)
images = tf.concat([images_r, images_g, images_b], 3)
return images
def expectedImagesAfterMirroring(self):
images_r = tf.constant([[[0, 0, 0, 0], [0, 0, -1, -1],
[0, 0, 0, -1], [0, 0, 0.5, 0.5]]],
dtype=tf.float32)
images_r = tf.expand_dims(images_r, 3)
images_g = tf.constant([[[0, 0, -1, -1], [0, 0, -1, -1],
[0.5, 0.5, 0, -1], [0.5, 0, 0.5, 0.5]]],
dtype=tf.float32)
images_g = tf.expand_dims(images_g, 3)
images_b = tf.constant([[[-1, 0.5, 0, 0], [0.5, 0, -1, -1],
[-1, 0, 0, -1], [0, 0.5, 0.5, 0.5]]],
dtype=tf.float32)
images_b = tf.expand_dims(images_b, 3)
images = tf.concat([images_r, images_g, images_b], 3)
return images
def expectedBoxesAfterMirroring(self):
boxes = tf.constant([[0.0, 0.0, 0.75, 0.75], [0.25, 0.0, 0.75, 0.5]],
dtype=tf.float32)
return boxes
def expectedBoxesAfterXY(self):
boxes = tf.constant([[0.25, 0.0, 1.0, 0.75], [0.5, 0.25, 1, 0.75]],
dtype=tf.float32)
return boxes
def expectedMasksAfterMirroring(self):
mask = np.array([
[[0.0, 0.0, 255.0],
[0.0, 0.0, 255.0],
[0.0, 0.0, 255.0]],
[[0.0, 255.0, 255.0],
[0.0, 255.0, 255.0],
[0.0, 255.0, 255.0]]])
return tf.constant(mask, dtype=tf.float32)
def expectedLabelScoresAfterThresholding(self):
return tf.constant([1.0], dtype=tf.float32)
def expectedBoxesAfterThresholding(self):
return tf.constant([[0.0, 0.25, 0.75, 1.0]], dtype=tf.float32)
def expectedLabelsAfterThresholding(self):
return tf.constant([1], dtype=tf.float32)
def expectedMasksAfterThresholding(self):
mask = np.array([
[[255.0, 0.0, 0.0],
[255.0, 0.0, 0.0],
[255.0, 0.0, 0.0]]])
return tf.constant(mask, dtype=tf.float32)
def expectedKeypointsAfterThresholding(self):
keypoints = np.array([
[[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]]
])
return tf.constant(keypoints, dtype=tf.float32)
def expectedLabelScoresAfterThresholdingWithMissingScore(self):
return tf.constant([np.nan], dtype=tf.float32)
def expectedBoxesAfterThresholdingWithMissingScore(self):
return tf.constant([[0.25, 0.5, 0.75, 1]], dtype=tf.float32)
def expectedLabelsAfterThresholdingWithMissingScore(self):
return tf.constant([2], dtype=tf.float32)
def testNormalizeImage(self):
preprocess_options = [(preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 256,
'target_minval': -1,
'target_maxval': 1
})]
images = self.createTestImages()
tensor_dict = {fields.InputDataFields.image: images}
tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
images = tensor_dict[fields.InputDataFields.image]
images_expected = self.expectedImagesAfterNormalization()
with self.test_session() as sess:
(images_, images_expected_) = sess.run(
[images, images_expected])
images_shape_ = images_.shape
images_expected_shape_ = images_expected_.shape
expected_shape = [1, 4, 4, 3]
self.assertAllEqual(images_expected_shape_, images_shape_)
self.assertAllEqual(images_shape_, expected_shape)
self.assertAllClose(images_, images_expected_)
def testRetainBoxesAboveThreshold(self):
boxes = self.createTestBoxes()
labels = self.createTestLabels()
label_scores = self.createTestLabelScores()
(retained_boxes, retained_labels,
retained_label_scores) = preprocessor.retain_boxes_above_threshold(
boxes, labels, label_scores, threshold=0.6)
with self.test_session() as sess:
(retained_boxes_, retained_labels_, retained_label_scores_,
expected_retained_boxes_, expected_retained_labels_,
expected_retained_label_scores_) = sess.run([
retained_boxes, retained_labels, retained_label_scores,
self.expectedBoxesAfterThresholding(),
self.expectedLabelsAfterThresholding(),
self.expectedLabelScoresAfterThresholding()])
self.assertAllClose(
retained_boxes_, expected_retained_boxes_)
self.assertAllClose(
retained_labels_, expected_retained_labels_)
self.assertAllClose(
retained_label_scores_, expected_retained_label_scores_)
def testRetainBoxesAboveThresholdWithMasks(self):
boxes = self.createTestBoxes()
labels = self.createTestLabels()
label_scores = self.createTestLabelScores()
masks = self.createTestMasks()
_, _, _, retained_masks = preprocessor.retain_boxes_above_threshold(
boxes, labels, label_scores, masks, threshold=0.6)
with self.test_session() as sess:
retained_masks_, expected_retained_masks_ = sess.run([
retained_masks, self.expectedMasksAfterThresholding()])
self.assertAllClose(
retained_masks_, expected_retained_masks_)
def testRetainBoxesAboveThresholdWithKeypoints(self):
boxes = self.createTestBoxes()
labels = self.createTestLabels()
label_scores = self.createTestLabelScores()
keypoints = self.createTestKeypoints()
(_, _, _, retained_keypoints) = preprocessor.retain_boxes_above_threshold(
boxes, labels, label_scores, keypoints=keypoints, threshold=0.6)
with self.test_session() as sess:
(retained_keypoints_,
expected_retained_keypoints_) = sess.run([
retained_keypoints,
self.expectedKeypointsAfterThresholding()])
self.assertAllClose(
retained_keypoints_, expected_retained_keypoints_)
def testRetainBoxesAboveThresholdWithMissingScore(self):
boxes = self.createTestBoxes()
labels = self.createTestLabels()
label_scores = self.createTestLabelScoresWithMissingScore()
(retained_boxes, retained_labels,
retained_label_scores) = preprocessor.retain_boxes_above_threshold(
boxes, labels, label_scores, threshold=0.6)
with self.test_session() as sess:
(retained_boxes_, retained_labels_, retained_label_scores_,
expected_retained_boxes_, expected_retained_labels_,
expected_retained_label_scores_) = sess.run([
retained_boxes, retained_labels, retained_label_scores,
self.expectedBoxesAfterThresholdingWithMissingScore(),
self.expectedLabelsAfterThresholdingWithMissingScore(),
self.expectedLabelScoresAfterThresholdingWithMissingScore()])
self.assertAllClose(
retained_boxes_, expected_retained_boxes_)
self.assertAllClose(
retained_labels_, expected_retained_labels_)
self.assertAllClose(
retained_label_scores_, expected_retained_label_scores_)
def testRandomFlipBoxes(self):
boxes = self.createTestBoxes()
# Case where the boxes are flipped.
boxes_expected1 = self.expectedBoxesAfterMirroring()
# Case where the boxes are not flipped.
boxes_expected2 = boxes
# After elementwise multiplication, the result should be all-zero since one
# of them is all-zero.
boxes_diff = tf.multiply(
tf.squared_difference(boxes, boxes_expected1),
tf.squared_difference(boxes, boxes_expected2))
expected_result = tf.zeros_like(boxes_diff)
with self.test_session() as sess:
(boxes_diff, expected_result) = sess.run([boxes_diff, expected_result])
self.assertAllEqual(boxes_diff, expected_result)
def testFlipMasks(self):
test_mask = self.createTestMasks()
flipped_mask = preprocessor._flip_masks(test_mask)
expected_mask = self.expectedMasksAfterMirroring()
with self.test_session() as sess:
flipped_mask, expected_mask = sess.run([flipped_mask, expected_mask])
self.assertAllEqual(flipped_mask.flatten(), expected_mask.flatten())
def testRandomHorizontalFlip(self):
preprocess_options = [(preprocessor.random_horizontal_flip, {})]
images = self.expectedImagesAfterNormalization()
boxes = self.createTestBoxes()
tensor_dict = {fields.InputDataFields.image: images,
fields.InputDataFields.groundtruth_boxes: boxes}
images_expected1 = self.expectedImagesAfterMirroring()
boxes_expected1 = self.expectedBoxesAfterMirroring()
images_expected2 = images
boxes_expected2 = boxes
tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
images = tensor_dict[fields.InputDataFields.image]
boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
boxes_diff1 = tf.squared_difference(boxes, boxes_expected1)
boxes_diff2 = tf.squared_difference(boxes, boxes_expected2)
boxes_diff = tf.multiply(boxes_diff1, boxes_diff2)
boxes_diff_expected = tf.zeros_like(boxes_diff)
images_diff1 = tf.squared_difference(images, images_expected1)
images_diff2 = tf.squared_difference(images, images_expected2)
images_diff = tf.multiply(images_diff1, images_diff2)
images_diff_expected = tf.zeros_like(images_diff)
with self.test_session() as sess:
(images_diff_, images_diff_expected_, boxes_diff_,
boxes_diff_expected_) = sess.run([images_diff, images_diff_expected,
boxes_diff, boxes_diff_expected])
self.assertAllClose(boxes_diff_, boxes_diff_expected_)
self.assertAllClose(images_diff_, images_diff_expected_)
def testRunRandomHorizontalFlipWithMaskAndKeypoints(self):
preprocess_options = [(preprocessor.random_horizontal_flip, {})]
image_height = 3
image_width = 3
images = tf.random_uniform([1, image_height, image_width, 3])
boxes = self.createTestBoxes()
masks = self.createTestMasks()
keypoints = self.createTestKeypoints()
keypoint_flip_permutation = self.createKeypointFlipPermutation()
tensor_dict = {
fields.InputDataFields.image: images,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_instance_masks: masks,
fields.InputDataFields.groundtruth_keypoints: keypoints
}
preprocess_options = [
(preprocessor.random_horizontal_flip,
{'keypoint_flip_permutation': keypoint_flip_permutation})]
preprocessor_arg_map = preprocessor.get_default_func_arg_map(
include_instance_masks=True, include_keypoints=True)
tensor_dict = preprocessor.preprocess(
tensor_dict, preprocess_options, func_arg_map=preprocessor_arg_map)
boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
keypoints = tensor_dict[fields.InputDataFields.groundtruth_keypoints]
with self.test_session() as sess:
boxes, masks, keypoints = sess.run([boxes, masks, keypoints])
self.assertTrue(boxes is not None)
self.assertTrue(masks is not None)
self.assertTrue(keypoints is not None)
def testRandomPixelValueScale(self):
preprocessing_options = []
preprocessing_options.append((preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}))
preprocessing_options.append((preprocessor.random_pixel_value_scale, {}))
images = self.createTestImages()
tensor_dict = {fields.InputDataFields.image: images}
tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
images_min = tf.to_float(images) * 0.9 / 255.0
images_max = tf.to_float(images) * 1.1 / 255.0
images = tensor_dict[fields.InputDataFields.image]
values_greater = tf.greater_equal(images, images_min)
values_less = tf.less_equal(images, images_max)
values_true = tf.fill([1, 4, 4, 3], True)
with self.test_session() as sess:
(values_greater_, values_less_, values_true_) = sess.run(
[values_greater, values_less, values_true])
self.assertAllClose(values_greater_, values_true_)
self.assertAllClose(values_less_, values_true_)
def testRandomImageScale(self):
preprocess_options = [(preprocessor.random_image_scale, {})]
images_original = self.createTestImages()
tensor_dict = {fields.InputDataFields.image: images_original}
tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
images_scaled = tensor_dict[fields.InputDataFields.image]
images_original_shape = tf.shape(images_original)
images_scaled_shape = tf.shape(images_scaled)
with self.test_session() as sess:
(images_original_shape_, images_scaled_shape_) = sess.run(
[images_original_shape, images_scaled_shape])
self.assertTrue(
images_original_shape_[1] * 0.5 <= images_scaled_shape_[1])
self.assertTrue(
images_original_shape_[1] * 2.0 >= images_scaled_shape_[1])
self.assertTrue(
images_original_shape_[2] * 0.5 <= images_scaled_shape_[2])
self.assertTrue(
images_original_shape_[2] * 2.0 >= images_scaled_shape_[2])
def testRandomRGBtoGray(self):
preprocess_options = [(preprocessor.random_rgb_to_gray, {})]
images_original = self.createTestImages()
tensor_dict = {fields.InputDataFields.image: images_original}
tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
images_gray = tensor_dict[fields.InputDataFields.image]
images_gray_r, images_gray_g, images_gray_b = tf.split(
value=images_gray, num_or_size_splits=3, axis=3)
images_r, images_g, images_b = tf.split(
value=images_original, num_or_size_splits=3, axis=3)
images_r_diff1 = tf.squared_difference(tf.to_float(images_r),
tf.to_float(images_gray_r))
images_r_diff2 = tf.squared_difference(tf.to_float(images_gray_r),
tf.to_float(images_gray_g))
images_r_diff = tf.multiply(images_r_diff1, images_r_diff2)
images_g_diff1 = tf.squared_difference(tf.to_float(images_g),
tf.to_float(images_gray_g))
images_g_diff2 = tf.squared_difference(tf.to_float(images_gray_g),
tf.to_float(images_gray_b))
images_g_diff = tf.multiply(images_g_diff1, images_g_diff2)
images_b_diff1 = tf.squared_difference(tf.to_float(images_b),
tf.to_float(images_gray_b))
images_b_diff2 = tf.squared_difference(tf.to_float(images_gray_b),
tf.to_float(images_gray_r))
images_b_diff = tf.multiply(images_b_diff1, images_b_diff2)
image_zero1 = tf.constant(0, dtype=tf.float32, shape=[1, 4, 4, 1])
with self.test_session() as sess:
(images_r_diff_, images_g_diff_, images_b_diff_, image_zero1_) = sess.run(
[images_r_diff, images_g_diff, images_b_diff, image_zero1])
self.assertAllClose(images_r_diff_, image_zero1_)
self.assertAllClose(images_g_diff_, image_zero1_)
self.assertAllClose(images_b_diff_, image_zero1_)
def testRandomAdjustBrightness(self):
preprocessing_options = []
preprocessing_options.append((preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}))
preprocessing_options.append((preprocessor.random_adjust_brightness, {}))
images_original = self.createTestImages()
tensor_dict = {fields.InputDataFields.image: images_original}
tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
images_bright = tensor_dict[fields.InputDataFields.image]
image_original_shape = tf.shape(images_original)
image_bright_shape = tf.shape(images_bright)
with self.test_session() as sess:
(image_original_shape_, image_bright_shape_) = sess.run(
[image_original_shape, image_bright_shape])
self.assertAllEqual(image_original_shape_, image_bright_shape_)
def testRandomAdjustContrast(self):
preprocessing_options = []
preprocessing_options.append((preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}))
preprocessing_options.append((preprocessor.random_adjust_contrast, {}))
images_original = self.createTestImages()
tensor_dict = {fields.InputDataFields.image: images_original}
tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
images_contrast = tensor_dict[fields.InputDataFields.image]
image_original_shape = tf.shape(images_original)
image_contrast_shape = tf.shape(images_contrast)
with self.test_session() as sess:
(image_original_shape_, image_contrast_shape_) = sess.run(
[image_original_shape, image_contrast_shape])
self.assertAllEqual(image_original_shape_, image_contrast_shape_)
def testRandomAdjustHue(self):
preprocessing_options = []
preprocessing_options.append((preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}))
preprocessing_options.append((preprocessor.random_adjust_hue, {}))
images_original = self.createTestImages()
tensor_dict = {fields.InputDataFields.image: images_original}
tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
images_hue = tensor_dict[fields.InputDataFields.image]
image_original_shape = tf.shape(images_original)
image_hue_shape = tf.shape(images_hue)
with self.test_session() as sess:
(image_original_shape_, image_hue_shape_) = sess.run(
[image_original_shape, image_hue_shape])
self.assertAllEqual(image_original_shape_, image_hue_shape_)
def testRandomDistortColor(self):
preprocessing_options = []
preprocessing_options.append((preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}))
preprocessing_options.append((preprocessor.random_distort_color, {}))
images_original = self.createTestImages()
images_original_shape = tf.shape(images_original)
tensor_dict = {fields.InputDataFields.image: images_original}
tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
images_distorted_color = tensor_dict[fields.InputDataFields.image]
images_distorted_color_shape = tf.shape(images_distorted_color)
with self.test_session() as sess:
(images_original_shape_, images_distorted_color_shape_) = sess.run(
[images_original_shape, images_distorted_color_shape])
self.assertAllEqual(images_original_shape_, images_distorted_color_shape_)
def testRandomJitterBoxes(self):
preprocessing_options = []
preprocessing_options.append((preprocessor.random_jitter_boxes, {}))
boxes = self.createTestBoxes()
boxes_shape = tf.shape(boxes)
tensor_dict = {fields.InputDataFields.groundtruth_boxes: boxes}
tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
distorted_boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
distorted_boxes_shape = tf.shape(distorted_boxes)
with self.test_session() as sess:
(boxes_shape_, distorted_boxes_shape_) = sess.run(
[boxes_shape, distorted_boxes_shape])
self.assertAllEqual(boxes_shape_, distorted_boxes_shape_)
def testRandomCropImage(self):
preprocessing_options = []
preprocessing_options.append((preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}))
preprocessing_options.append((preprocessor.random_crop_image, {}))
images = self.createTestImages()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
tensor_dict = {fields.InputDataFields.image: images,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels}
distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
preprocessing_options)
distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
distorted_boxes = distorted_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
boxes_rank = tf.rank(boxes)
distorted_boxes_rank = tf.rank(distorted_boxes)
images_rank = tf.rank(images)
distorted_images_rank = tf.rank(distorted_images)
self.assertEqual(3, distorted_images.get_shape()[3])
with self.test_session() as sess:
(boxes_rank_, distorted_boxes_rank_, images_rank_,
distorted_images_rank_) = sess.run([
boxes_rank, distorted_boxes_rank, images_rank, distorted_images_rank
])
self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
self.assertAllEqual(images_rank_, distorted_images_rank_)
def testRandomCropImageGrayscale(self):
preprocessing_options = [(preprocessor.rgb_to_gray, {}),
(preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1,
}),
(preprocessor.random_crop_image, {})]
images = self.createTestImages()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
tensor_dict = {
fields.InputDataFields.image: images,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels
}
distorted_tensor_dict = preprocessor.preprocess(
tensor_dict, preprocessing_options)
distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
distorted_boxes = distorted_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
boxes_rank = tf.rank(boxes)
distorted_boxes_rank = tf.rank(distorted_boxes)
images_rank = tf.rank(images)
distorted_images_rank = tf.rank(distorted_images)
self.assertEqual(1, distorted_images.get_shape()[3])
with self.test_session() as sess:
session_results = sess.run([
boxes_rank, distorted_boxes_rank, images_rank, distorted_images_rank
])
(boxes_rank_, distorted_boxes_rank_, images_rank_,
distorted_images_rank_) = session_results
self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
self.assertAllEqual(images_rank_, distorted_images_rank_)
def testRandomCropImageWithBoxOutOfImage(self):
preprocessing_options = []
preprocessing_options.append((preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}))
preprocessing_options.append((preprocessor.random_crop_image, {}))
images = self.createTestImages()
boxes = self.createTestBoxesOutOfImage()
labels = self.createTestLabels()
tensor_dict = {fields.InputDataFields.image: images,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels}
distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
preprocessing_options)
distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
distorted_boxes = distorted_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
boxes_rank = tf.rank(boxes)
distorted_boxes_rank = tf.rank(distorted_boxes)
images_rank = tf.rank(images)
distorted_images_rank = tf.rank(distorted_images)
with self.test_session() as sess:
(boxes_rank_, distorted_boxes_rank_, images_rank_,
distorted_images_rank_) = sess.run(
[boxes_rank, distorted_boxes_rank, images_rank,
distorted_images_rank])
self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
self.assertAllEqual(images_rank_, distorted_images_rank_)
def testRandomCropImageWithRandomCoefOne(self):
preprocessing_options = [(preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
})]
images = self.createTestImages()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
tensor_dict = {fields.InputDataFields.image: images,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels}
tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
images = tensor_dict[fields.InputDataFields.image]
preprocessing_options = [(preprocessor.random_crop_image, {
'random_coef': 1.0
})]
distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
preprocessing_options)
distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
distorted_boxes = distorted_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
distorted_labels = distorted_tensor_dict[
fields.InputDataFields.groundtruth_classes]
boxes_shape = tf.shape(boxes)
distorted_boxes_shape = tf.shape(distorted_boxes)
images_shape = tf.shape(images)
distorted_images_shape = tf.shape(distorted_images)
with self.test_session() as sess:
(boxes_shape_, distorted_boxes_shape_, images_shape_,
distorted_images_shape_, images_, distorted_images_,
boxes_, distorted_boxes_, labels_, distorted_labels_) = sess.run(
[boxes_shape, distorted_boxes_shape, images_shape,
distorted_images_shape, images, distorted_images,
boxes, distorted_boxes, labels, distorted_labels])
self.assertAllEqual(boxes_shape_, distorted_boxes_shape_)
self.assertAllEqual(images_shape_, distorted_images_shape_)
self.assertAllClose(images_, distorted_images_)
self.assertAllClose(boxes_, distorted_boxes_)
self.assertAllEqual(labels_, distorted_labels_)
def testRandomCropWithMockSampleDistortedBoundingBox(self):
preprocessing_options = [(preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
})]
images = self.createColorfulTestImage()
boxes = tf.constant([[0.1, 0.1, 0.8, 0.3],
[0.2, 0.4, 0.75, 0.75],
[0.3, 0.1, 0.4, 0.7]], dtype=tf.float32)
labels = tf.constant([1, 7, 11], dtype=tf.int32)
tensor_dict = {fields.InputDataFields.image: images,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels}
tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
images = tensor_dict[fields.InputDataFields.image]
preprocessing_options = [(preprocessor.random_crop_image, {})]
with mock.patch.object(
tf.image,
'sample_distorted_bounding_box') as mock_sample_distorted_bounding_box:
mock_sample_distorted_bounding_box.return_value = (tf.constant(
[6, 143, 0], dtype=tf.int32), tf.constant(
[190, 237, -1], dtype=tf.int32), tf.constant(
[[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
preprocessing_options)
distorted_boxes = distorted_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
distorted_labels = distorted_tensor_dict[
fields.InputDataFields.groundtruth_classes]
expected_boxes = tf.constant([[0.178947, 0.07173, 0.75789469, 0.66244733],
[0.28421, 0.0, 0.38947365, 0.57805908]],
dtype=tf.float32)
expected_labels = tf.constant([7, 11], dtype=tf.int32)
with self.test_session() as sess:
(distorted_boxes_, distorted_labels_,
expected_boxes_, expected_labels_) = sess.run(
[distorted_boxes, distorted_labels,
expected_boxes, expected_labels])
self.assertAllClose(distorted_boxes_, expected_boxes_)
self.assertAllEqual(distorted_labels_, expected_labels_)
def testStrictRandomCropImageWithMasks(self):
image = self.createColorfulTestImage()[0]
boxes = self.createTestBoxes()
labels = self.createTestLabels()
masks = tf.random_uniform([2, 200, 400], dtype=tf.float32)
with mock.patch.object(
tf.image,
'sample_distorted_bounding_box'
) as mock_sample_distorted_bounding_box:
mock_sample_distorted_bounding_box.return_value = (
tf.constant([6, 143, 0], dtype=tf.int32),
tf.constant([190, 237, -1], dtype=tf.int32),
tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
(new_image, new_boxes, new_labels,
new_masks) = preprocessor._strict_random_crop_image(
image, boxes, labels, masks=masks)
with self.test_session() as sess:
new_image, new_boxes, new_labels, new_masks = sess.run([
new_image, new_boxes, new_labels, new_masks])
expected_boxes = np.array([
[0.0, 0.0, 0.75789469, 1.0],
[0.23157893, 0.24050637, 0.75789469, 1.0],
], dtype=np.float32)
self.assertAllEqual(new_image.shape, [190, 237, 3])
self.assertAllEqual(new_masks.shape, [2, 190, 237])
self.assertAllClose(
new_boxes.flatten(), expected_boxes.flatten())
def testStrictRandomCropImageWithKeypoints(self):
image = self.createColorfulTestImage()[0]
boxes = self.createTestBoxes()
labels = self.createTestLabels()
keypoints = self.createTestKeypoints()
with mock.patch.object(
tf.image,
'sample_distorted_bounding_box'
) as mock_sample_distorted_bounding_box:
mock_sample_distorted_bounding_box.return_value = (
tf.constant([6, 143, 0], dtype=tf.int32),
tf.constant([190, 237, -1], dtype=tf.int32),
tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
(new_image, new_boxes, new_labels,
new_keypoints) = preprocessor._strict_random_crop_image(
image, boxes, labels, keypoints=keypoints)
with self.test_session() as sess:
new_image, new_boxes, new_labels, new_keypoints = sess.run([
new_image, new_boxes, new_labels, new_keypoints])
expected_boxes = np.array([
[0.0, 0.0, 0.75789469, 1.0],
[0.23157893, 0.24050637, 0.75789469, 1.0],
], dtype=np.float32)
expected_keypoints = np.array([
[[np.nan, np.nan],
[np.nan, np.nan],
[np.nan, np.nan]],
[[0.38947368, 0.07173],
[0.49473682, 0.24050637],
[0.60000002, 0.40928277]]
], dtype=np.float32)
self.assertAllEqual(new_image.shape, [190, 237, 3])
self.assertAllClose(
new_boxes.flatten(), expected_boxes.flatten())
self.assertAllClose(
new_keypoints.flatten(), expected_keypoints.flatten())
def testRunRandomCropImageWithMasks(self):
image = self.createColorfulTestImage()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
masks = tf.random_uniform([2, 200, 400], dtype=tf.float32)
tensor_dict = {
fields.InputDataFields.image: image,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels,
fields.InputDataFields.groundtruth_instance_masks: masks,
}
preprocessor_arg_map = preprocessor.get_default_func_arg_map(
include_instance_masks=True)
preprocessing_options = [(preprocessor.random_crop_image, {})]
with mock.patch.object(
tf.image,
'sample_distorted_bounding_box'
) as mock_sample_distorted_bounding_box:
mock_sample_distorted_bounding_box.return_value = (
tf.constant([6, 143, 0], dtype=tf.int32),
tf.constant([190, 237, -1], dtype=tf.int32),
tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
distorted_tensor_dict = preprocessor.preprocess(
tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
distorted_boxes = distorted_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
distorted_labels = distorted_tensor_dict[
fields.InputDataFields.groundtruth_classes]
distorted_masks = distorted_tensor_dict[
fields.InputDataFields.groundtruth_instance_masks]
with self.test_session() as sess:
(distorted_image_, distorted_boxes_, distorted_labels_,
distorted_masks_) = sess.run(
[distorted_image, distorted_boxes, distorted_labels,
distorted_masks])
expected_boxes = np.array([
[0.0, 0.0, 0.75789469, 1.0],
[0.23157893, 0.24050637, 0.75789469, 1.0],
], dtype=np.float32)
self.assertAllEqual(distorted_image_.shape, [1, 190, 237, 3])
self.assertAllEqual(distorted_masks_.shape, [2, 190, 237])
self.assertAllEqual(distorted_labels_, [1, 2])
self.assertAllClose(
distorted_boxes_.flatten(), expected_boxes.flatten())
def testRunRandomCropImageWithKeypointsInsideCrop(self):
image = self.createColorfulTestImage()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
keypoints = self.createTestKeypointsInsideCrop()
tensor_dict = {
fields.InputDataFields.image: image,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels,
fields.InputDataFields.groundtruth_keypoints: keypoints
}
preprocessor_arg_map = preprocessor.get_default_func_arg_map(
include_keypoints=True)
preprocessing_options = [(preprocessor.random_crop_image, {})]
with mock.patch.object(
tf.image,
'sample_distorted_bounding_box'
) as mock_sample_distorted_bounding_box:
mock_sample_distorted_bounding_box.return_value = (
tf.constant([6, 143, 0], dtype=tf.int32),
tf.constant([190, 237, -1], dtype=tf.int32),
tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
distorted_tensor_dict = preprocessor.preprocess(
tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
distorted_boxes = distorted_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
distorted_labels = distorted_tensor_dict[
fields.InputDataFields.groundtruth_classes]
distorted_keypoints = distorted_tensor_dict[
fields.InputDataFields.groundtruth_keypoints]
with self.test_session() as sess:
(distorted_image_, distorted_boxes_, distorted_labels_,
distorted_keypoints_) = sess.run(
[distorted_image, distorted_boxes, distorted_labels,
distorted_keypoints])
expected_boxes = np.array([
[0.0, 0.0, 0.75789469, 1.0],
[0.23157893, 0.24050637, 0.75789469, 1.0],
], dtype=np.float32)
expected_keypoints = np.array([
[[0.38947368, 0.07173],
[0.49473682, 0.24050637],
[0.60000002, 0.40928277]],
[[0.38947368, 0.07173],
[0.49473682, 0.24050637],
[0.60000002, 0.40928277]]
])
self.assertAllEqual(distorted_image_.shape, [1, 190, 237, 3])
self.assertAllEqual(distorted_labels_, [1, 2])
self.assertAllClose(
distorted_boxes_.flatten(), expected_boxes.flatten())
self.assertAllClose(
distorted_keypoints_.flatten(), expected_keypoints.flatten())
def testRunRandomCropImageWithKeypointsOutsideCrop(self):
image = self.createColorfulTestImage()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
keypoints = self.createTestKeypointsOutsideCrop()
tensor_dict = {
fields.InputDataFields.image: image,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels,
fields.InputDataFields.groundtruth_keypoints: keypoints
}
preprocessor_arg_map = preprocessor.get_default_func_arg_map(
include_keypoints=True)
preprocessing_options = [(preprocessor.random_crop_image, {})]
with mock.patch.object(
tf.image,
'sample_distorted_bounding_box'
) as mock_sample_distorted_bounding_box:
mock_sample_distorted_bounding_box.return_value = (
tf.constant([6, 143, 0], dtype=tf.int32),
tf.constant([190, 237, -1], dtype=tf.int32),
tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
distorted_tensor_dict = preprocessor.preprocess(
tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
distorted_boxes = distorted_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
distorted_labels = distorted_tensor_dict[
fields.InputDataFields.groundtruth_classes]
distorted_keypoints = distorted_tensor_dict[
fields.InputDataFields.groundtruth_keypoints]
with self.test_session() as sess:
(distorted_image_, distorted_boxes_, distorted_labels_,
distorted_keypoints_) = sess.run(
[distorted_image, distorted_boxes, distorted_labels,
distorted_keypoints])
expected_boxes = np.array([
[0.0, 0.0, 0.75789469, 1.0],
[0.23157893, 0.24050637, 0.75789469, 1.0],
], dtype=np.float32)
expected_keypoints = np.array([
[[np.nan, np.nan],
[np.nan, np.nan],
[np.nan, np.nan]],
[[np.nan, np.nan],
[np.nan, np.nan],
[np.nan, np.nan]],
])
self.assertAllEqual(distorted_image_.shape, [1, 190, 237, 3])
self.assertAllEqual(distorted_labels_, [1, 2])
self.assertAllClose(
distorted_boxes_.flatten(), expected_boxes.flatten())
self.assertAllClose(
distorted_keypoints_.flatten(), expected_keypoints.flatten())
def testRunRetainBoxesAboveThreshold(self):
boxes = self.createTestBoxes()
labels = self.createTestLabels()
label_scores = self.createTestLabelScores()
tensor_dict = {
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels,
fields.InputDataFields.groundtruth_label_scores: label_scores
}
preprocessing_options = [
(preprocessor.retain_boxes_above_threshold, {'threshold': 0.6})
]
retained_tensor_dict = preprocessor.preprocess(
tensor_dict, preprocessing_options)
retained_boxes = retained_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
retained_labels = retained_tensor_dict[
fields.InputDataFields.groundtruth_classes]
retained_label_scores = retained_tensor_dict[
fields.InputDataFields.groundtruth_label_scores]
with self.test_session() as sess:
(retained_boxes_, retained_labels_,
retained_label_scores_, expected_retained_boxes_,
expected_retained_labels_, expected_retained_label_scores_) = sess.run(
[retained_boxes, retained_labels, retained_label_scores,
self.expectedBoxesAfterThresholding(),
self.expectedLabelsAfterThresholding(),
self.expectedLabelScoresAfterThresholding()])
self.assertAllClose(retained_boxes_, expected_retained_boxes_)
self.assertAllClose(retained_labels_, expected_retained_labels_)
self.assertAllClose(
retained_label_scores_, expected_retained_label_scores_)
def testRunRetainBoxesAboveThresholdWithMasks(self):
boxes = self.createTestBoxes()
labels = self.createTestLabels()
label_scores = self.createTestLabelScores()
masks = self.createTestMasks()
tensor_dict = {
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels,
fields.InputDataFields.groundtruth_label_scores: label_scores,
fields.InputDataFields.groundtruth_instance_masks: masks
}
preprocessor_arg_map = preprocessor.get_default_func_arg_map(
include_instance_masks=True)
preprocessing_options = [
(preprocessor.retain_boxes_above_threshold, {'threshold': 0.6})
]
retained_tensor_dict = preprocessor.preprocess(
tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
retained_masks = retained_tensor_dict[
fields.InputDataFields.groundtruth_instance_masks]
with self.test_session() as sess:
(retained_masks_, expected_masks_) = sess.run(
[retained_masks,
self.expectedMasksAfterThresholding()])
self.assertAllClose(retained_masks_, expected_masks_)
def testRunRetainBoxesAboveThresholdWithKeypoints(self):
boxes = self.createTestBoxes()
labels = self.createTestLabels()
label_scores = self.createTestLabelScores()
keypoints = self.createTestKeypoints()
tensor_dict = {
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels,
fields.InputDataFields.groundtruth_label_scores: label_scores,
fields.InputDataFields.groundtruth_keypoints: keypoints
}
preprocessor_arg_map = preprocessor.get_default_func_arg_map(
include_keypoints=True)
preprocessing_options = [
(preprocessor.retain_boxes_above_threshold, {'threshold': 0.6})
]
retained_tensor_dict = preprocessor.preprocess(
tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
retained_keypoints = retained_tensor_dict[
fields.InputDataFields.groundtruth_keypoints]
with self.test_session() as sess:
(retained_keypoints_, expected_keypoints_) = sess.run(
[retained_keypoints,
self.expectedKeypointsAfterThresholding()])
self.assertAllClose(retained_keypoints_, expected_keypoints_)
def testRunRandomCropToAspectRatioWithMasks(self):
image = self.createColorfulTestImage()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
masks = tf.random_uniform([2, 200, 400], dtype=tf.float32)
tensor_dict = {
fields.InputDataFields.image: image,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels,
fields.InputDataFields.groundtruth_instance_masks: masks
}
preprocessor_arg_map = preprocessor.get_default_func_arg_map(
include_instance_masks=True)
preprocessing_options = [(preprocessor.random_crop_to_aspect_ratio, {})]
with mock.patch.object(preprocessor,
'_random_integer') as mock_random_integer:
mock_random_integer.return_value = tf.constant(0, dtype=tf.int32)
distorted_tensor_dict = preprocessor.preprocess(
tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
distorted_boxes = distorted_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
distorted_labels = distorted_tensor_dict[
fields.InputDataFields.groundtruth_classes]
distorted_masks = distorted_tensor_dict[
fields.InputDataFields.groundtruth_instance_masks]
with self.test_session() as sess:
(distorted_image_, distorted_boxes_, distorted_labels_,
distorted_masks_) = sess.run([
distorted_image, distorted_boxes, distorted_labels, distorted_masks
])
expected_boxes = np.array([0.0, 0.5, 0.75, 1.0], dtype=np.float32)
self.assertAllEqual(distorted_image_.shape, [1, 200, 200, 3])
self.assertAllEqual(distorted_labels_, [1])
self.assertAllClose(distorted_boxes_.flatten(),
expected_boxes.flatten())
self.assertAllEqual(distorted_masks_.shape, [1, 200, 200])
def testRunRandomCropToAspectRatioWithKeypoints(self):
image = self.createColorfulTestImage()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
keypoints = self.createTestKeypoints()
tensor_dict = {
fields.InputDataFields.image: image,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels,
fields.InputDataFields.groundtruth_keypoints: keypoints
}
preprocessor_arg_map = preprocessor.get_default_func_arg_map(
include_keypoints=True)
preprocessing_options = [(preprocessor.random_crop_to_aspect_ratio, {})]
with mock.patch.object(preprocessor,
'_random_integer') as mock_random_integer:
mock_random_integer.return_value = tf.constant(0, dtype=tf.int32)
distorted_tensor_dict = preprocessor.preprocess(
tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
distorted_boxes = distorted_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
distorted_labels = distorted_tensor_dict[
fields.InputDataFields.groundtruth_classes]
distorted_keypoints = distorted_tensor_dict[
fields.InputDataFields.groundtruth_keypoints]
with self.test_session() as sess:
(distorted_image_, distorted_boxes_, distorted_labels_,
distorted_keypoints_) = sess.run([
distorted_image, distorted_boxes, distorted_labels,
distorted_keypoints
])
expected_boxes = np.array([0.0, 0.5, 0.75, 1.0], dtype=np.float32)
expected_keypoints = np.array(
[[0.1, 0.2], [0.2, 0.4], [0.3, 0.6]], dtype=np.float32)
self.assertAllEqual(distorted_image_.shape, [1, 200, 200, 3])
self.assertAllEqual(distorted_labels_, [1])
self.assertAllClose(distorted_boxes_.flatten(),
expected_boxes.flatten())
self.assertAllClose(distorted_keypoints_.flatten(),
expected_keypoints.flatten())
def testRandomPadImage(self):
preprocessing_options = [(preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
})]
images = self.createTestImages()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
tensor_dict = {fields.InputDataFields.image: images,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels}
tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
images = tensor_dict[fields.InputDataFields.image]
preprocessing_options = [(preprocessor.random_pad_image, {})]
padded_tensor_dict = preprocessor.preprocess(tensor_dict,
preprocessing_options)
padded_images = padded_tensor_dict[fields.InputDataFields.image]
padded_boxes = padded_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
boxes_shape = tf.shape(boxes)
padded_boxes_shape = tf.shape(padded_boxes)
images_shape = tf.shape(images)
padded_images_shape = tf.shape(padded_images)
with self.test_session() as sess:
(boxes_shape_, padded_boxes_shape_, images_shape_,
padded_images_shape_, boxes_, padded_boxes_) = sess.run(
[boxes_shape, padded_boxes_shape, images_shape,
padded_images_shape, boxes, padded_boxes])
self.assertAllEqual(boxes_shape_, padded_boxes_shape_)
self.assertTrue((images_shape_[1] >= padded_images_shape_[1] * 0.5).all)
self.assertTrue((images_shape_[2] >= padded_images_shape_[2] * 0.5).all)
self.assertTrue((images_shape_[1] <= padded_images_shape_[1]).all)
self.assertTrue((images_shape_[2] <= padded_images_shape_[2]).all)
self.assertTrue(np.all((boxes_[:, 2] - boxes_[:, 0]) >= (
padded_boxes_[:, 2] - padded_boxes_[:, 0])))
self.assertTrue(np.all((boxes_[:, 3] - boxes_[:, 1]) >= (
padded_boxes_[:, 3] - padded_boxes_[:, 1])))
def testRandomCropPadImageWithRandomCoefOne(self):
preprocessing_options = [(preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
})]
images = self.createTestImages()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
tensor_dict = {fields.InputDataFields.image: images,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels}
tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
images = tensor_dict[fields.InputDataFields.image]
preprocessing_options = [(preprocessor.random_crop_pad_image, {
'random_coef': 1.0
})]
padded_tensor_dict = preprocessor.preprocess(tensor_dict,
preprocessing_options)
padded_images = padded_tensor_dict[fields.InputDataFields.image]
padded_boxes = padded_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
boxes_shape = tf.shape(boxes)
padded_boxes_shape = tf.shape(padded_boxes)
images_shape = tf.shape(images)
padded_images_shape = tf.shape(padded_images)
with self.test_session() as sess:
(boxes_shape_, padded_boxes_shape_, images_shape_,
padded_images_shape_, boxes_, padded_boxes_) = sess.run(
[boxes_shape, padded_boxes_shape, images_shape,
padded_images_shape, boxes, padded_boxes])
self.assertAllEqual(boxes_shape_, padded_boxes_shape_)
self.assertTrue((images_shape_[1] >= padded_images_shape_[1] * 0.5).all)
self.assertTrue((images_shape_[2] >= padded_images_shape_[2] * 0.5).all)
self.assertTrue((images_shape_[1] <= padded_images_shape_[1]).all)
self.assertTrue((images_shape_[2] <= padded_images_shape_[2]).all)
self.assertTrue(np.all((boxes_[:, 2] - boxes_[:, 0]) >= (
padded_boxes_[:, 2] - padded_boxes_[:, 0])))
self.assertTrue(np.all((boxes_[:, 3] - boxes_[:, 1]) >= (
padded_boxes_[:, 3] - padded_boxes_[:, 1])))
def testRandomCropToAspectRatio(self):
preprocessing_options = [(preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
})]
images = self.createTestImages()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
tensor_dict = {
fields.InputDataFields.image: images,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels
}
tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
images = tensor_dict[fields.InputDataFields.image]
preprocessing_options = [(preprocessor.random_crop_to_aspect_ratio, {
'aspect_ratio': 2.0
})]
cropped_tensor_dict = preprocessor.preprocess(tensor_dict,
preprocessing_options)
cropped_images = cropped_tensor_dict[fields.InputDataFields.image]
cropped_boxes = cropped_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
boxes_shape = tf.shape(boxes)
cropped_boxes_shape = tf.shape(cropped_boxes)
images_shape = tf.shape(images)
cropped_images_shape = tf.shape(cropped_images)
with self.test_session() as sess:
(boxes_shape_, cropped_boxes_shape_, images_shape_,
cropped_images_shape_) = sess.run([
boxes_shape, cropped_boxes_shape, images_shape, cropped_images_shape
])
self.assertAllEqual(boxes_shape_, cropped_boxes_shape_)
self.assertEqual(images_shape_[1], cropped_images_shape_[1] * 2)
self.assertEqual(images_shape_[2], cropped_images_shape_[2])
def testRandomBlackPatches(self):
preprocessing_options = []
preprocessing_options.append((preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}))
preprocessing_options.append((preprocessor.random_black_patches, {
'size_to_image_ratio': 0.5
}))
images = self.createTestImages()
tensor_dict = {fields.InputDataFields.image: images}
blacked_tensor_dict = preprocessor.preprocess(tensor_dict,
preprocessing_options)
blacked_images = blacked_tensor_dict[fields.InputDataFields.image]
images_shape = tf.shape(images)
blacked_images_shape = tf.shape(blacked_images)
with self.test_session() as sess:
(images_shape_, blacked_images_shape_) = sess.run(
[images_shape, blacked_images_shape])
self.assertAllEqual(images_shape_, blacked_images_shape_)
def testRandomResizeMethod(self):
preprocessing_options = []
preprocessing_options.append((preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}))
preprocessing_options.append((preprocessor.random_resize_method, {
'target_size': (75, 150)
}))
images = self.createTestImages()
tensor_dict = {fields.InputDataFields.image: images}
resized_tensor_dict = preprocessor.preprocess(tensor_dict,
preprocessing_options)
resized_images = resized_tensor_dict[fields.InputDataFields.image]
resized_images_shape = tf.shape(resized_images)
expected_images_shape = tf.constant([1, 75, 150, 3], dtype=tf.int32)
with self.test_session() as sess:
(expected_images_shape_, resized_images_shape_) = sess.run(
[expected_images_shape, resized_images_shape])
self.assertAllEqual(expected_images_shape_,
resized_images_shape_)
def testResizeToRange(self):
"""Tests image resizing, checking output sizes."""
in_shape_list = [[60, 40, 3], [15, 30, 3], [15, 50, 3]]
min_dim = 50
max_dim = 100
expected_shape_list = [[75, 50, 3], [50, 100, 3], [30, 100, 3]]
for in_shape, expected_shape in zip(in_shape_list, expected_shape_list):
in_image = tf.random_uniform(in_shape)
out_image = preprocessor.resize_to_range(
in_image, min_dimension=min_dim, max_dimension=max_dim)
out_image_shape = tf.shape(out_image)
with self.test_session() as sess:
out_image_shape = sess.run(out_image_shape)
self.assertAllEqual(out_image_shape, expected_shape)
def testResizeToRangeWithMasks(self):
"""Tests image resizing, checking output sizes."""
in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
in_masks_shape_list = [[15, 60, 40], [10, 15, 30]]
min_dim = 50
max_dim = 100
expected_image_shape_list = [[75, 50, 3], [50, 100, 3]]
expected_masks_shape_list = [[15, 75, 50], [10, 50, 100]]
for (in_image_shape, expected_image_shape, in_masks_shape,
expected_mask_shape) in zip(in_image_shape_list,
expected_image_shape_list,
in_masks_shape_list,
expected_masks_shape_list):
in_image = tf.random_uniform(in_image_shape)
in_masks = tf.random_uniform(in_masks_shape)
out_image, out_masks = preprocessor.resize_to_range(
in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim)
out_image_shape = tf.shape(out_image)
out_masks_shape = tf.shape(out_masks)
with self.test_session() as sess:
out_image_shape, out_masks_shape = sess.run(
[out_image_shape, out_masks_shape])
self.assertAllEqual(out_image_shape, expected_image_shape)
self.assertAllEqual(out_masks_shape, expected_mask_shape)
def testResizeToRangeWithNoInstanceMask(self):
"""Tests image resizing, checking output sizes."""
in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
in_masks_shape_list = [[0, 60, 40], [0, 15, 30]]
min_dim = 50
max_dim = 100
expected_image_shape_list = [[75, 50, 3], [50, 100, 3]]
expected_masks_shape_list = [[0, 75, 50], [0, 50, 100]]
for (in_image_shape, expected_image_shape, in_masks_shape,
expected_mask_shape) in zip(in_image_shape_list,
expected_image_shape_list,
in_masks_shape_list,
expected_masks_shape_list):
in_image = tf.random_uniform(in_image_shape)
in_masks = tf.random_uniform(in_masks_shape)
out_image, out_masks = preprocessor.resize_to_range(
in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim)
out_image_shape = tf.shape(out_image)
out_masks_shape = tf.shape(out_masks)
with self.test_session() as sess:
out_image_shape, out_masks_shape = sess.run(
[out_image_shape, out_masks_shape])
self.assertAllEqual(out_image_shape, expected_image_shape)
self.assertAllEqual(out_masks_shape, expected_mask_shape)
def testResizeImageWithMasks(self):
"""Tests image resizing, checking output sizes."""
in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
in_masks_shape_list = [[15, 60, 40], [10, 15, 30]]
height = 50
width = 100
expected_image_shape_list = [[50, 100, 3], [50, 100, 3]]
expected_masks_shape_list = [[15, 50, 100], [10, 50, 100]]
for (in_image_shape, expected_image_shape, in_masks_shape,
expected_mask_shape) in zip(in_image_shape_list,
expected_image_shape_list,
in_masks_shape_list,
expected_masks_shape_list):
in_image = tf.random_uniform(in_image_shape)
in_masks = tf.random_uniform(in_masks_shape)
out_image, out_masks = preprocessor.resize_image(
in_image, in_masks, new_height=height, new_width=width)
out_image_shape = tf.shape(out_image)
out_masks_shape = tf.shape(out_masks)
with self.test_session() as sess:
out_image_shape, out_masks_shape = sess.run(
[out_image_shape, out_masks_shape])
self.assertAllEqual(out_image_shape, expected_image_shape)
self.assertAllEqual(out_masks_shape, expected_mask_shape)
def testResizeImageWithNoInstanceMask(self):
"""Tests image resizing, checking output sizes."""
in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
in_masks_shape_list = [[0, 60, 40], [0, 15, 30]]
height = 50
width = 100
expected_image_shape_list = [[50, 100, 3], [50, 100, 3]]
expected_masks_shape_list = [[0, 50, 100], [0, 50, 100]]
for (in_image_shape, expected_image_shape, in_masks_shape,
expected_mask_shape) in zip(in_image_shape_list,
expected_image_shape_list,
in_masks_shape_list,
expected_masks_shape_list):
in_image = tf.random_uniform(in_image_shape)
in_masks = tf.random_uniform(in_masks_shape)
out_image, out_masks = preprocessor.resize_image(
in_image, in_masks, new_height=height, new_width=width)
out_image_shape = tf.shape(out_image)
out_masks_shape = tf.shape(out_masks)
with self.test_session() as sess:
out_image_shape, out_masks_shape = sess.run(
[out_image_shape, out_masks_shape])
self.assertAllEqual(out_image_shape, expected_image_shape)
self.assertAllEqual(out_masks_shape, expected_mask_shape)
def testResizeToRange4DImageTensor(self):
image = tf.random_uniform([1, 200, 300, 3])
with self.assertRaises(ValueError):
preprocessor.resize_to_range(image, 500, 600)
def testResizeToRangeSameMinMax(self):
"""Tests image resizing, checking output sizes."""
in_shape_list = [[312, 312, 3], [299, 299, 3]]
min_dim = 320
max_dim = 320
expected_shape_list = [[320, 320, 3], [320, 320, 3]]
for in_shape, expected_shape in zip(in_shape_list, expected_shape_list):
in_image = tf.random_uniform(in_shape)
out_image = preprocessor.resize_to_range(
in_image, min_dimension=min_dim, max_dimension=max_dim)
out_image_shape = tf.shape(out_image)
with self.test_session() as sess:
out_image_shape = sess.run(out_image_shape)
self.assertAllEqual(out_image_shape, expected_shape)
def testScaleBoxesToPixelCoordinates(self):
"""Tests box scaling, checking scaled values."""
in_shape = [60, 40, 3]
in_boxes = [[0.1, 0.2, 0.4, 0.6],
[0.5, 0.3, 0.9, 0.7]]
expected_boxes = [[6., 8., 24., 24.],
[30., 12., 54., 28.]]
in_image = tf.random_uniform(in_shape)
in_boxes = tf.constant(in_boxes)
_, out_boxes = preprocessor.scale_boxes_to_pixel_coordinates(
in_image, boxes=in_boxes)
with self.test_session() as sess:
out_boxes = sess.run(out_boxes)
self.assertAllClose(out_boxes, expected_boxes)
def testScaleBoxesToPixelCoordinatesWithKeypoints(self):
"""Tests box and keypoint scaling, checking scaled values."""
in_shape = [60, 40, 3]
in_boxes = self.createTestBoxes()
in_keypoints = self.createTestKeypoints()
expected_boxes = [[0., 10., 45., 40.],
[15., 20., 45., 40.]]
expected_keypoints = [
[[6., 4.], [12., 8.], [18., 12.]],
[[24., 16.], [30., 20.], [36., 24.]],
]
in_image = tf.random_uniform(in_shape)
_, out_boxes, out_keypoints = preprocessor.scale_boxes_to_pixel_coordinates(
in_image, boxes=in_boxes, keypoints=in_keypoints)
with self.test_session() as sess:
out_boxes_, out_keypoints_ = sess.run([out_boxes, out_keypoints])
self.assertAllClose(out_boxes_, expected_boxes)
self.assertAllClose(out_keypoints_, expected_keypoints)
def testSubtractChannelMean(self):
"""Tests whether channel means have been subtracted."""
with self.test_session():
image = tf.zeros((240, 320, 3))
means = [1, 2, 3]
actual = preprocessor.subtract_channel_mean(image, means=means)
actual = actual.eval()
self.assertTrue((actual[:, :, 0] == -1).all())
self.assertTrue((actual[:, :, 1] == -2).all())
self.assertTrue((actual[:, :, 2] == -3).all())
def testOneHotEncoding(self):
"""Tests one hot encoding of multiclass labels."""
with self.test_session():
labels = tf.constant([1, 4, 2], dtype=tf.int32)
one_hot = preprocessor.one_hot_encoding(labels, num_classes=5)
one_hot = one_hot.eval()
self.assertAllEqual([0, 1, 1, 0, 1], one_hot)
def testSSDRandomCrop(self):
preprocessing_options = [
(preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}),
(preprocessor.ssd_random_crop, {})]
images = self.createTestImages()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
tensor_dict = {fields.InputDataFields.image: images,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels}
distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
preprocessing_options)
distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
distorted_boxes = distorted_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
images_rank = tf.rank(images)
distorted_images_rank = tf.rank(distorted_images)
boxes_rank = tf.rank(boxes)
distorted_boxes_rank = tf.rank(distorted_boxes)
with self.test_session() as sess:
(boxes_rank_, distorted_boxes_rank_, images_rank_,
distorted_images_rank_) = sess.run(
[boxes_rank, distorted_boxes_rank, images_rank,
distorted_images_rank])
self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
self.assertAllEqual(images_rank_, distorted_images_rank_)
def testSSDRandomCropPad(self):
images = self.createTestImages()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
preprocessing_options = [
(preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}),
(preprocessor.ssd_random_crop_pad, {})]
tensor_dict = {fields.InputDataFields.image: images,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels}
distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
preprocessing_options)
distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
distorted_boxes = distorted_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
images_rank = tf.rank(images)
distorted_images_rank = tf.rank(distorted_images)
boxes_rank = tf.rank(boxes)
distorted_boxes_rank = tf.rank(distorted_boxes)
with self.test_session() as sess:
(boxes_rank_, distorted_boxes_rank_, images_rank_,
distorted_images_rank_) = sess.run([
boxes_rank, distorted_boxes_rank, images_rank, distorted_images_rank
])
self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
self.assertAllEqual(images_rank_, distorted_images_rank_)
def testSSDRandomCropFixedAspectRatio(self):
images = self.createTestImages()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
preprocessing_options = [
(preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}),
(preprocessor.ssd_random_crop_fixed_aspect_ratio, {})]
tensor_dict = {
fields.InputDataFields.image: images,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels
}
distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
preprocessing_options)
distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
distorted_boxes = distorted_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
images_rank = tf.rank(images)
distorted_images_rank = tf.rank(distorted_images)
boxes_rank = tf.rank(boxes)
distorted_boxes_rank = tf.rank(distorted_boxes)
with self.test_session() as sess:
(boxes_rank_, distorted_boxes_rank_, images_rank_,
distorted_images_rank_) = sess.run(
[boxes_rank, distorted_boxes_rank, images_rank,
distorted_images_rank])
self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
self.assertAllEqual(images_rank_, distorted_images_rank_)
def testSSDRandomCropFixedAspectRatioWithMasksAndKeypoints(self):
images = self.createTestImages()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
masks = self.createTestMasks()
keypoints = self.createTestKeypoints()
preprocessing_options = [
(preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}),
(preprocessor.ssd_random_crop_fixed_aspect_ratio, {})]
tensor_dict = {
fields.InputDataFields.image: images,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels,
fields.InputDataFields.groundtruth_instance_masks: masks,
fields.InputDataFields.groundtruth_keypoints: keypoints,
}
preprocessor_arg_map = preprocessor.get_default_func_arg_map(
include_instance_masks=True, include_keypoints=True)
distorted_tensor_dict = preprocessor.preprocess(
tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
distorted_boxes = distorted_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
images_rank = tf.rank(images)
distorted_images_rank = tf.rank(distorted_images)
boxes_rank = tf.rank(boxes)
distorted_boxes_rank = tf.rank(distorted_boxes)
with self.test_session() as sess:
(boxes_rank_, distorted_boxes_rank_, images_rank_,
distorted_images_rank_) = sess.run(
[boxes_rank, distorted_boxes_rank, images_rank,
distorted_images_rank])
self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
self.assertAllEqual(images_rank_, distorted_images_rank_)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Region Similarity Calculators for BoxLists.
Region Similarity Calculators compare a pairwise measure of similarity
between the boxes in two BoxLists.
"""
from abc import ABCMeta
from abc import abstractmethod
import tensorflow as tf
from object_detection.core import box_list_ops
class RegionSimilarityCalculator(object):
"""Abstract base class for region similarity calculator."""
__metaclass__ = ABCMeta
def compare(self, boxlist1, boxlist2, scope=None):
"""Computes matrix of pairwise similarity between BoxLists.
This op (to be overriden) computes a measure of pairwise similarity between
the boxes in the given BoxLists. Higher values indicate more similarity.
Note that this method simply measures similarity and does not explicitly
perform a matching.
Args:
boxlist1: BoxList holding N boxes.
boxlist2: BoxList holding M boxes.
scope: Op scope name. Defaults to 'Compare' if None.
Returns:
a (float32) tensor of shape [N, M] with pairwise similarity score.
"""
with tf.name_scope(scope, 'Compare', [boxlist1, boxlist2]) as scope:
return self._compare(boxlist1, boxlist2)
@abstractmethod
def _compare(self, boxlist1, boxlist2):
pass
class IouSimilarity(RegionSimilarityCalculator):
"""Class to compute similarity based on Intersection over Union (IOU) metric.
This class computes pairwise similarity between two BoxLists based on IOU.
"""
def _compare(self, boxlist1, boxlist2):
"""Compute pairwise IOU similarity between the two BoxLists.
Args:
boxlist1: BoxList holding N boxes.
boxlist2: BoxList holding M boxes.
Returns:
A tensor with shape [N, M] representing pairwise iou scores.
"""
return box_list_ops.iou(boxlist1, boxlist2)
class NegSqDistSimilarity(RegionSimilarityCalculator):
"""Class to compute similarity based on the squared distance metric.
This class computes pairwise similarity between two BoxLists based on the
negative squared distance metric.
"""
def _compare(self, boxlist1, boxlist2):
"""Compute matrix of (negated) sq distances.
Args:
boxlist1: BoxList holding N boxes.
boxlist2: BoxList holding M boxes.
Returns:
A tensor with shape [N, M] representing negated pairwise squared distance.
"""
return -1 * box_list_ops.sq_dist(boxlist1, boxlist2)
class IoaSimilarity(RegionSimilarityCalculator):
"""Class to compute similarity based on Intersection over Area (IOA) metric.
This class computes pairwise similarity between two BoxLists based on their
pairwise intersections divided by the areas of second BoxLists.
"""
def _compare(self, boxlist1, boxlist2):
"""Compute pairwise IOA similarity between the two BoxLists.
Args:
boxlist1: BoxList holding N boxes.
boxlist2: BoxList holding M boxes.
Returns:
A tensor with shape [N, M] representing pairwise IOA scores.
"""
return box_list_ops.ioa(boxlist1, boxlist2)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for region_similarity_calculator."""
import tensorflow as tf
from object_detection.core import box_list
from object_detection.core import region_similarity_calculator
class RegionSimilarityCalculatorTest(tf.test.TestCase):
def test_get_correct_pairwise_similarity_based_on_iou(self):
corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0]])
exp_output = [[2.0 / 16.0, 0, 6.0 / 400.0], [1.0 / 16.0, 0.0, 5.0 / 400.0]]
boxes1 = box_list.BoxList(corners1)
boxes2 = box_list.BoxList(corners2)
iou_similarity_calculator = region_similarity_calculator.IouSimilarity()
iou_similarity = iou_similarity_calculator.compare(boxes1, boxes2)
with self.test_session() as sess:
iou_output = sess.run(iou_similarity)
self.assertAllClose(iou_output, exp_output)
def test_get_correct_pairwise_similarity_based_on_squared_distances(self):
corners1 = tf.constant([[0.0, 0.0, 0.0, 0.0],
[1.0, 1.0, 0.0, 2.0]])
corners2 = tf.constant([[3.0, 4.0, 1.0, 0.0],
[-4.0, 0.0, 0.0, 3.0],
[0.0, 0.0, 0.0, 0.0]])
exp_output = [[-26, -25, 0], [-18, -27, -6]]
boxes1 = box_list.BoxList(corners1)
boxes2 = box_list.BoxList(corners2)
dist_similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
dist_similarity = dist_similarity_calc.compare(boxes1, boxes2)
with self.test_session() as sess:
dist_output = sess.run(dist_similarity)
self.assertAllClose(dist_output, exp_output)
def test_get_correct_pairwise_similarity_based_on_ioa(self):
corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0]])
exp_output_1 = [[2.0 / 12.0, 0, 6.0 / 400.0],
[1.0 / 12.0, 0.0, 5.0 / 400.0]]
exp_output_2 = [[2.0 / 6.0, 1.0 / 5.0],
[0, 0],
[6.0 / 6.0, 5.0 / 5.0]]
boxes1 = box_list.BoxList(corners1)
boxes2 = box_list.BoxList(corners2)
ioa_similarity_calculator = region_similarity_calculator.IoaSimilarity()
ioa_similarity_1 = ioa_similarity_calculator.compare(boxes1, boxes2)
ioa_similarity_2 = ioa_similarity_calculator.compare(boxes2, boxes1)
with self.test_session() as sess:
iou_output_1, iou_output_2 = sess.run(
[ioa_similarity_1, ioa_similarity_2])
self.assertAllClose(iou_output_1, exp_output_1)
self.assertAllClose(iou_output_2, exp_output_2)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains classes specifying naming conventions used for object detection.
Specifies:
InputDataFields: standard fields used by reader/preprocessor/batcher.
BoxListFields: standard field used by BoxList
TfExampleFields: standard fields for tf-example data format (go/tf-example).
"""
class InputDataFields(object):
"""Names for the input tensors.
Holds the standard data field names to use for identifying input tensors. This
should be used by the decoder to identify keys for the returned tensor_dict
containing input tensors. And it should be used by the model to identify the
tensors it needs.
Attributes:
image: image.
original_image: image in the original input size.
key: unique key corresponding to image.
source_id: source of the original image.
filename: original filename of the dataset (without common path).
groundtruth_image_classes: image-level class labels.
groundtruth_boxes: coordinates of the ground truth boxes in the image.
groundtruth_classes: box-level class labels.
groundtruth_label_types: box-level label types (e.g. explicit negative).
groundtruth_is_crowd: is the groundtruth a single object or a crowd.
groundtruth_area: area of a groundtruth segment.
groundtruth_difficult: is a `difficult` object
proposal_boxes: coordinates of object proposal boxes.
proposal_objectness: objectness score of each proposal.
groundtruth_instance_masks: ground truth instance masks.
groundtruth_instance_classes: instance mask-level class labels.
groundtruth_keypoints: ground truth keypoints.
groundtruth_keypoint_visibilities: ground truth keypoint visibilities.
groundtruth_label_scores: groundtruth label scores.
"""
image = 'image'
original_image = 'original_image'
key = 'key'
source_id = 'source_id'
filename = 'filename'
groundtruth_image_classes = 'groundtruth_image_classes'
groundtruth_boxes = 'groundtruth_boxes'
groundtruth_classes = 'groundtruth_classes'
groundtruth_label_types = 'groundtruth_label_types'
groundtruth_is_crowd = 'groundtruth_is_crowd'
groundtruth_area = 'groundtruth_area'
groundtruth_difficult = 'groundtruth_difficult'
proposal_boxes = 'proposal_boxes'
proposal_objectness = 'proposal_objectness'
groundtruth_instance_masks = 'groundtruth_instance_masks'
groundtruth_instance_classes = 'groundtruth_instance_classes'
groundtruth_keypoints = 'groundtruth_keypoints'
groundtruth_keypoint_visibilities = 'groundtruth_keypoint_visibilities'
groundtruth_label_scores = 'groundtruth_label_scores'
class BoxListFields(object):
"""Naming conventions for BoxLists.
Attributes:
boxes: bounding box coordinates.
classes: classes per bounding box.
scores: scores per bounding box.
weights: sample weights per bounding box.
objectness: objectness score per bounding box.
masks: masks per bounding box.
keypoints: keypoints per bounding box.
keypoint_heatmaps: keypoint heatmaps per bounding box.
"""
boxes = 'boxes'
classes = 'classes'
scores = 'scores'
weights = 'weights'
objectness = 'objectness'
masks = 'masks'
keypoints = 'keypoints'
keypoint_heatmaps = 'keypoint_heatmaps'
class TfExampleFields(object):
"""TF-example proto feature names for object detection.
Holds the standard feature names to load from an Example proto for object
detection.
Attributes:
image_encoded: JPEG encoded string
image_format: image format, e.g. "JPEG"
filename: filename
channels: number of channels of image
colorspace: colorspace, e.g. "RGB"
height: height of image in pixels, e.g. 462
width: width of image in pixels, e.g. 581
source_id: original source of the image
object_class_text: labels in text format, e.g. ["person", "cat"]
object_class_text: labels in numbers, e.g. [16, 8]
object_bbox_xmin: xmin coordinates of groundtruth box, e.g. 10, 30
object_bbox_xmax: xmax coordinates of groundtruth box, e.g. 50, 40
object_bbox_ymin: ymin coordinates of groundtruth box, e.g. 40, 50
object_bbox_ymax: ymax coordinates of groundtruth box, e.g. 80, 70
object_view: viewpoint of object, e.g. ["frontal", "left"]
object_truncated: is object truncated, e.g. [true, false]
object_occluded: is object occluded, e.g. [true, false]
object_difficult: is object difficult, e.g. [true, false]
object_is_crowd: is the object a single object or a crowd
object_segment_area: the area of the segment.
instance_masks: instance segmentation masks.
instance_classes: Classes for each instance segmentation mask.
"""
image_encoded = 'image/encoded'
image_format = 'image/format' # format is reserved keyword
filename = 'image/filename'
channels = 'image/channels'
colorspace = 'image/colorspace'
height = 'image/height'
width = 'image/width'
source_id = 'image/source_id'
object_class_text = 'image/object/class/text'
object_class_label = 'image/object/class/label'
object_bbox_ymin = 'image/object/bbox/ymin'
object_bbox_xmin = 'image/object/bbox/xmin'
object_bbox_ymax = 'image/object/bbox/ymax'
object_bbox_xmax = 'image/object/bbox/xmax'
object_view = 'image/object/view'
object_truncated = 'image/object/truncated'
object_occluded = 'image/object/occluded'
object_difficult = 'image/object/difficult'
object_is_crowd = 'image/object/is_crowd'
object_segment_area = 'image/object/segment/area'
instance_masks = 'image/segmentation/object'
instance_classes = 'image/segmentation/object/class'
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Base target assigner module.
The job of a TargetAssigner is, for a given set of anchors (bounding boxes) and
groundtruth detections (bounding boxes), to assign classification and regression
targets to each anchor as well as weights to each anchor (specifying, e.g.,
which anchors should not contribute to training loss).
It assigns classification/regression targets by performing the following steps:
1) Computing pairwise similarity between anchors and groundtruth boxes using a
provided RegionSimilarity Calculator
2) Computing a matching based on the similarity matrix using a provided Matcher
3) Assigning regression targets based on the matching and a provided BoxCoder
4) Assigning classification targets based on the matching and groundtruth labels
Note that TargetAssigners only operate on detections from a single
image at a time, so any logic for applying a TargetAssigner to multiple
images must be handled externally.
"""
import tensorflow as tf
from object_detection.box_coders import faster_rcnn_box_coder
from object_detection.box_coders import mean_stddev_box_coder
from object_detection.core import box_coder as bcoder
from object_detection.core import box_list
from object_detection.core import box_list_ops
from object_detection.core import matcher as mat
from object_detection.core import region_similarity_calculator as sim_calc
from object_detection.matchers import argmax_matcher
from object_detection.matchers import bipartite_matcher
class TargetAssigner(object):
"""Target assigner to compute classification and regression targets."""
def __init__(self, similarity_calc, matcher, box_coder,
positive_class_weight=1.0, negative_class_weight=1.0,
unmatched_cls_target=None):
"""Construct Multibox Target Assigner.
Args:
similarity_calc: a RegionSimilarityCalculator
matcher: an object_detection.core.Matcher used to match groundtruth to
anchors.
box_coder: an object_detection.core.BoxCoder used to encode matching
groundtruth boxes with respect to anchors.
positive_class_weight: classification weight to be associated to positive
anchors (default: 1.0)
negative_class_weight: classification weight to be associated to negative
anchors (default: 1.0)
unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k]
which is consistent with the classification target for each
anchor (and can be empty for scalar targets). This shape must thus be
compatible with the groundtruth labels that are passed to the "assign"
function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]).
If set to None, unmatched_cls_target is set to be [0] for each anchor.
Raises:
ValueError: if similarity_calc is not a RegionSimilarityCalculator or
if matcher is not a Matcher or if box_coder is not a BoxCoder
"""
if not isinstance(similarity_calc, sim_calc.RegionSimilarityCalculator):
raise ValueError('similarity_calc must be a RegionSimilarityCalculator')
if not isinstance(matcher, mat.Matcher):
raise ValueError('matcher must be a Matcher')
if not isinstance(box_coder, bcoder.BoxCoder):
raise ValueError('box_coder must be a BoxCoder')
self._similarity_calc = similarity_calc
self._matcher = matcher
self._box_coder = box_coder
self._positive_class_weight = positive_class_weight
self._negative_class_weight = negative_class_weight
if unmatched_cls_target is None:
self._unmatched_cls_target = tf.constant([0], tf.float32)
else:
self._unmatched_cls_target = unmatched_cls_target
@property
def box_coder(self):
return self._box_coder
def assign(self, anchors, groundtruth_boxes, groundtruth_labels=None,
**params):
"""Assign classification and regression targets to each anchor.
For a given set of anchors and groundtruth detections, match anchors
to groundtruth_boxes and assign classification and regression targets to
each anchor as well as weights based on the resulting match (specifying,
e.g., which anchors should not contribute to training loss).
Anchors that are not matched to anything are given a classification target
of self._unmatched_cls_target which can be specified via the constructor.
Args:
anchors: a BoxList representing N anchors
groundtruth_boxes: a BoxList representing M groundtruth boxes
groundtruth_labels: a tensor of shape [num_gt_boxes, d_1, ... d_k]
with labels for each of the ground_truth boxes. The subshape
[d_1, ... d_k] can be empty (corresponding to scalar inputs). When set
to None, groundtruth_labels assumes a binary problem where all
ground_truth boxes get a positive label (of 1).
**params: Additional keyword arguments for specific implementations of
the Matcher.
Returns:
cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels
which has shape [num_gt_boxes, d_1, d_2, ... d_k].
cls_weights: a float32 tensor with shape [num_anchors]
reg_targets: a float32 tensor with shape [num_anchors, box_code_dimension]
reg_weights: a float32 tensor with shape [num_anchors]
match: a matcher.Match object encoding the match between anchors and
groundtruth boxes, with rows corresponding to groundtruth boxes
and columns corresponding to anchors.
Raises:
ValueError: if anchors or groundtruth_boxes are not of type
box_list.BoxList
"""
if not isinstance(anchors, box_list.BoxList):
raise ValueError('anchors must be an BoxList')
if not isinstance(groundtruth_boxes, box_list.BoxList):
raise ValueError('groundtruth_boxes must be an BoxList')
if groundtruth_labels is None:
groundtruth_labels = tf.ones(tf.expand_dims(groundtruth_boxes.num_boxes(),
0))
groundtruth_labels = tf.expand_dims(groundtruth_labels, -1)
shape_assert = tf.assert_equal(tf.shape(groundtruth_labels)[1:],
tf.shape(self._unmatched_cls_target))
with tf.control_dependencies([shape_assert]):
match_quality_matrix = self._similarity_calc.compare(groundtruth_boxes,
anchors)
match = self._matcher.match(match_quality_matrix, **params)
reg_targets = self._create_regression_targets(anchors,
groundtruth_boxes,
match)
cls_targets = self._create_classification_targets(groundtruth_labels,
match)
reg_weights = self._create_regression_weights(match)
cls_weights = self._create_classification_weights(
match, self._positive_class_weight, self._negative_class_weight)
num_anchors = anchors.num_boxes_static()
if num_anchors is not None:
reg_targets = self._reset_target_shape(reg_targets, num_anchors)
cls_targets = self._reset_target_shape(cls_targets, num_anchors)
reg_weights = self._reset_target_shape(reg_weights, num_anchors)
cls_weights = self._reset_target_shape(cls_weights, num_anchors)
return cls_targets, cls_weights, reg_targets, reg_weights, match
def _reset_target_shape(self, target, num_anchors):
"""Sets the static shape of the target.
Args:
target: the target tensor. Its first dimension will be overwritten.
num_anchors: the number of anchors, which is used to override the target's
first dimension.
Returns:
A tensor with the shape info filled in.
"""
target_shape = target.get_shape().as_list()
target_shape[0] = num_anchors
target.set_shape(target_shape)
return target
def _create_regression_targets(self, anchors, groundtruth_boxes, match):
"""Returns a regression target for each anchor.
Args:
anchors: a BoxList representing N anchors
groundtruth_boxes: a BoxList representing M groundtruth_boxes
match: a matcher.Match object
Returns:
reg_targets: a float32 tensor with shape [N, box_code_dimension]
"""
matched_anchor_indices = match.matched_column_indices()
unmatched_ignored_anchor_indices = (match.
unmatched_or_ignored_column_indices())
matched_gt_indices = match.matched_row_indices()
matched_anchors = box_list_ops.gather(anchors,
matched_anchor_indices)
matched_gt_boxes = box_list_ops.gather(groundtruth_boxes,
matched_gt_indices)
matched_reg_targets = self._box_coder.encode(matched_gt_boxes,
matched_anchors)
unmatched_ignored_reg_targets = tf.tile(
self._default_regression_target(),
tf.stack([tf.size(unmatched_ignored_anchor_indices), 1]))
reg_targets = tf.dynamic_stitch(
[matched_anchor_indices, unmatched_ignored_anchor_indices],
[matched_reg_targets, unmatched_ignored_reg_targets])
# TODO: summarize the number of matches on average.
return reg_targets
def _default_regression_target(self):
"""Returns the default target for anchors to regress to.
Default regression targets are set to zero (though in
this implementation what these targets are set to should
not matter as the regression weight of any box set to
regress to the default target is zero).
Returns:
default_target: a float32 tensor with shape [1, box_code_dimension]
"""
return tf.constant([self._box_coder.code_size*[0]], tf.float32)
def _create_classification_targets(self, groundtruth_labels, match):
"""Create classification targets for each anchor.
Assign a classification target of for each anchor to the matching
groundtruth label that is provided by match. Anchors that are not matched
to anything are given the target self._unmatched_cls_target
Args:
groundtruth_labels: a tensor of shape [num_gt_boxes, d_1, ... d_k]
with labels for each of the ground_truth boxes. The subshape
[d_1, ... d_k] can be empty (corresponding to scalar labels).
match: a matcher.Match object that provides a matching between anchors
and groundtruth boxes.
Returns:
cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels
which has shape [num_gt_boxes, d_1, d_2, ... d_k].
"""
matched_anchor_indices = match.matched_column_indices()
unmatched_ignored_anchor_indices = (match.
unmatched_or_ignored_column_indices())
matched_gt_indices = match.matched_row_indices()
matched_cls_targets = tf.gather(groundtruth_labels, matched_gt_indices)
ones = self._unmatched_cls_target.shape.ndims * [1]
unmatched_ignored_cls_targets = tf.tile(
tf.expand_dims(self._unmatched_cls_target, 0),
tf.stack([tf.size(unmatched_ignored_anchor_indices)] + ones))
cls_targets = tf.dynamic_stitch(
[matched_anchor_indices, unmatched_ignored_anchor_indices],
[matched_cls_targets, unmatched_ignored_cls_targets])
return cls_targets
def _create_regression_weights(self, match):
"""Set regression weight for each anchor.
Only positive anchors are set to contribute to the regression loss, so this
method returns a weight of 1 for every positive anchor and 0 for every
negative anchor.
Args:
match: a matcher.Match object that provides a matching between anchors
and groundtruth boxes.
Returns:
reg_weights: a float32 tensor with shape [num_anchors] representing
regression weights
"""
reg_weights = tf.cast(match.matched_column_indicator(), tf.float32)
return reg_weights
def _create_classification_weights(self,
match,
positive_class_weight=1.0,
negative_class_weight=1.0):
"""Create classification weights for each anchor.
Positive (matched) anchors are associated with a weight of
positive_class_weight and negative (unmatched) anchors are associated with
a weight of negative_class_weight. When anchors are ignored, weights are set
to zero. By default, both positive/negative weights are set to 1.0,
but they can be adjusted to handle class imbalance (which is almost always
the case in object detection).
Args:
match: a matcher.Match object that provides a matching between anchors
and groundtruth boxes.
positive_class_weight: weight to be associated to positive anchors
negative_class_weight: weight to be associated to negative anchors
Returns:
cls_weights: a float32 tensor with shape [num_anchors] representing
classification weights.
"""
matched_indicator = tf.cast(match.matched_column_indicator(), tf.float32)
ignore_indicator = tf.cast(match.ignored_column_indicator(), tf.float32)
unmatched_indicator = 1.0 - matched_indicator - ignore_indicator
cls_weights = (positive_class_weight * matched_indicator
+ negative_class_weight * unmatched_indicator)
return cls_weights
def get_box_coder(self):
"""Get BoxCoder of this TargetAssigner.
Returns:
BoxCoder: BoxCoder object.
"""
return self._box_coder
# TODO: This method pulls in all the implementation dependencies into core.
# Therefore its best to have this factory method outside of core.
def create_target_assigner(reference, stage=None,
positive_class_weight=1.0,
negative_class_weight=1.0,
unmatched_cls_target=None):
"""Factory function for creating standard target assigners.
Args:
reference: string referencing the type of TargetAssigner.
stage: string denoting stage: {proposal, detection}.
positive_class_weight: classification weight to be associated to positive
anchors (default: 1.0)
negative_class_weight: classification weight to be associated to negative
anchors (default: 1.0)
unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k]
which is consistent with the classification target for each
anchor (and can be empty for scalar targets). This shape must thus be
compatible with the groundtruth labels that are passed to the Assign
function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]).
If set to None, unmatched_cls_target is set to be 0 for each anchor.
Returns:
TargetAssigner: desired target assigner.
Raises:
ValueError: if combination reference+stage is invalid.
"""
if reference == 'Multibox' and stage == 'proposal':
similarity_calc = sim_calc.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
elif reference == 'FasterRCNN' and stage == 'proposal':
similarity_calc = sim_calc.IouSimilarity()
matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.7,
unmatched_threshold=0.3,
force_match_for_each_row=True)
box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
scale_factors=[10.0, 10.0, 5.0, 5.0])
elif reference == 'FasterRCNN' and stage == 'detection':
similarity_calc = sim_calc.IouSimilarity()
# Uses all proposals with IOU < 0.5 as candidate negatives.
matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
negatives_lower_than_unmatched=True)
box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
scale_factors=[10.0, 10.0, 5.0, 5.0])
elif reference == 'FastRCNN':
similarity_calc = sim_calc.IouSimilarity()
matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
unmatched_threshold=0.1,
force_match_for_each_row=False,
negatives_lower_than_unmatched=False)
box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
else:
raise ValueError('No valid combination of reference and stage.')
return TargetAssigner(similarity_calc, matcher, box_coder,
positive_class_weight=positive_class_weight,
negative_class_weight=negative_class_weight,
unmatched_cls_target=unmatched_cls_target)
def batch_assign_targets(target_assigner,
anchors_batch,
gt_box_batch,
gt_class_targets_batch):
"""Batched assignment of classification and regression targets.
Args:
target_assigner: a target assigner.
anchors_batch: BoxList representing N box anchors or list of BoxList objects
with length batch_size representing anchor sets.
gt_box_batch: a list of BoxList objects with length batch_size
representing groundtruth boxes for each image in the batch
gt_class_targets_batch: a list of tensors with length batch_size, where
each tensor has shape [num_gt_boxes_i, classification_target_size] and
num_gt_boxes_i is the number of boxes in the ith boxlist of
gt_box_batch.
Returns:
batch_cls_targets: a tensor with shape [batch_size, num_anchors,
num_classes],
batch_cls_weights: a tensor with shape [batch_size, num_anchors],
batch_reg_targets: a tensor with shape [batch_size, num_anchors,
box_code_dimension]
batch_reg_weights: a tensor with shape [batch_size, num_anchors],
match_list: a list of matcher.Match objects encoding the match between
anchors and groundtruth boxes for each image of the batch,
with rows of the Match objects corresponding to groundtruth boxes
and columns corresponding to anchors.
Raises:
ValueError: if input list lengths are inconsistent, i.e.,
batch_size == len(gt_box_batch) == len(gt_class_targets_batch)
and batch_size == len(anchors_batch) unless anchors_batch is a single
BoxList.
"""
if not isinstance(anchors_batch, list):
anchors_batch = len(gt_box_batch) * [anchors_batch]
if not all(
isinstance(anchors, box_list.BoxList) for anchors in anchors_batch):
raise ValueError('anchors_batch must be a BoxList or list of BoxLists.')
if not (len(anchors_batch)
== len(gt_box_batch)
== len(gt_class_targets_batch)):
raise ValueError('batch size incompatible with lengths of anchors_batch, '
'gt_box_batch and gt_class_targets_batch.')
cls_targets_list = []
cls_weights_list = []
reg_targets_list = []
reg_weights_list = []
match_list = []
for anchors, gt_boxes, gt_class_targets in zip(
anchors_batch, gt_box_batch, gt_class_targets_batch):
(cls_targets, cls_weights, reg_targets,
reg_weights, match) = target_assigner.assign(
anchors, gt_boxes, gt_class_targets)
cls_targets_list.append(cls_targets)
cls_weights_list.append(cls_weights)
reg_targets_list.append(reg_targets)
reg_weights_list.append(reg_weights)
match_list.append(match)
batch_cls_targets = tf.stack(cls_targets_list)
batch_cls_weights = tf.stack(cls_weights_list)
batch_reg_targets = tf.stack(reg_targets_list)
batch_reg_weights = tf.stack(reg_weights_list)
return (batch_cls_targets, batch_cls_weights, batch_reg_targets,
batch_reg_weights, match_list)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.core.target_assigner."""
import numpy as np
import tensorflow as tf
from object_detection.box_coders import mean_stddev_box_coder
from object_detection.core import box_list
from object_detection.core import region_similarity_calculator
from object_detection.core import target_assigner as targetassigner
from object_detection.matchers import argmax_matcher
from object_detection.matchers import bipartite_matcher
class TargetAssignerTest(tf.test.TestCase):
def test_assign_agnostic(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
target_assigner = targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder, unmatched_cls_target=None)
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 1.0, 0.8],
[0, 0.5, .5, 1.0]])
prior_stddevs = tf.constant(3 * [4 * [.1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
box_corners = [[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 0.9, 0.9]]
boxes = box_list.BoxList(tf.constant(box_corners))
exp_cls_targets = [[1], [1], [0]]
exp_cls_weights = [1, 1, 1]
exp_reg_targets = [[0, 0, 0, 0],
[0, 0, -1, 1],
[0, 0, 0, 0]]
exp_reg_weights = [1, 1, 0]
exp_matching_anchors = [0, 1]
result = target_assigner.assign(priors, boxes, num_valid_rows=2)
(cls_targets, cls_weights, reg_targets, reg_weights, match) = result
with self.test_session() as sess:
(cls_targets_out, cls_weights_out,
reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
[cls_targets, cls_weights, reg_targets, reg_weights,
match.matched_column_indices()])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(matching_anchors_out, exp_matching_anchors)
self.assertEquals(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32)
self.assertEquals(matching_anchors_out.dtype, np.int32)
def test_assign_with_ignored_matches(self):
# Note: test is very similar to above. The third box matched with an IOU
# of 0.35, which is between the matched and unmatched threshold. This means
# That like above the expected classification targets are [1, 1, 0].
# Unlike above, the third target is ignored and therefore expected
# classification weights are [1, 1, 0].
similarity_calc = region_similarity_calculator.IouSimilarity()
matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
unmatched_threshold=0.3)
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
target_assigner = targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder)
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 1.0, 0.8],
[0.0, 0.5, .9, 1.0]])
prior_stddevs = tf.constant(3 * [4 * [.1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
box_corners = [[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.9, 0.9]]
boxes = box_list.BoxList(tf.constant(box_corners))
exp_cls_targets = [[1], [1], [0]]
exp_cls_weights = [1, 1, 0]
exp_reg_targets = [[0, 0, 0, 0],
[0, 0, -1, 1],
[0, 0, 0, 0]]
exp_reg_weights = [1, 1, 0]
exp_matching_anchors = [0, 1]
result = target_assigner.assign(priors, boxes)
(cls_targets, cls_weights, reg_targets, reg_weights, match) = result
with self.test_session() as sess:
(cls_targets_out, cls_weights_out,
reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
[cls_targets, cls_weights, reg_targets, reg_weights,
match.matched_column_indices()])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(matching_anchors_out, exp_matching_anchors)
self.assertEquals(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32)
self.assertEquals(matching_anchors_out.dtype, np.int32)
def test_assign_multiclass(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
unmatched_cls_target = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32)
target_assigner = targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder,
unmatched_cls_target=unmatched_cls_target)
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 1.0, 0.8],
[0, 0.5, .5, 1.0],
[.75, 0, 1.0, .25]])
prior_stddevs = tf.constant(4 * [4 * [.1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
box_corners = [[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.9, 0.9],
[.75, 0, .95, .27]]
boxes = box_list.BoxList(tf.constant(box_corners))
groundtruth_labels = tf.constant([[0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 1, 0, 0, 0]], tf.float32)
exp_cls_targets = [[0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0],
[1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0]]
exp_cls_weights = [1, 1, 1, 1]
exp_reg_targets = [[0, 0, 0, 0],
[0, 0, -1, 1],
[0, 0, 0, 0],
[0, 0, -.5, .2]]
exp_reg_weights = [1, 1, 0, 1]
exp_matching_anchors = [0, 1, 3]
result = target_assigner.assign(priors, boxes, groundtruth_labels,
num_valid_rows=3)
(cls_targets, cls_weights, reg_targets, reg_weights, match) = result
with self.test_session() as sess:
(cls_targets_out, cls_weights_out,
reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
[cls_targets, cls_weights, reg_targets, reg_weights,
match.matched_column_indices()])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(matching_anchors_out, exp_matching_anchors)
self.assertEquals(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32)
self.assertEquals(matching_anchors_out.dtype, np.int32)
def test_assign_multiclass_unequal_class_weights(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
unmatched_cls_target = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32)
target_assigner = targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder,
positive_class_weight=1.0, negative_class_weight=0.5,
unmatched_cls_target=unmatched_cls_target)
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 1.0, 0.8],
[0, 0.5, .5, 1.0],
[.75, 0, 1.0, .25]])
prior_stddevs = tf.constant(4 * [4 * [.1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
box_corners = [[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.9, 0.9],
[.75, 0, .95, .27]]
boxes = box_list.BoxList(tf.constant(box_corners))
groundtruth_labels = tf.constant([[0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 1, 0, 0, 0]], tf.float32)
exp_cls_weights = [1, 1, .5, 1]
result = target_assigner.assign(priors, boxes, groundtruth_labels,
num_valid_rows=3)
(_, cls_weights, _, _, _) = result
with self.test_session() as sess:
cls_weights_out = sess.run(cls_weights)
self.assertAllClose(cls_weights_out, exp_cls_weights)
def test_assign_multidimensional_class_targets(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
unmatched_cls_target = tf.constant([[0, 0], [0, 0]], tf.float32)
target_assigner = targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder,
unmatched_cls_target=unmatched_cls_target)
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 1.0, 0.8],
[0, 0.5, .5, 1.0],
[.75, 0, 1.0, .25]])
prior_stddevs = tf.constant(4 * [4 * [.1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
box_corners = [[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.9, 0.9],
[.75, 0, .95, .27]]
boxes = box_list.BoxList(tf.constant(box_corners))
groundtruth_labels = tf.constant([[[0, 1], [1, 0]],
[[1, 0], [0, 1]],
[[0, 1], [1, .5]]], tf.float32)
exp_cls_targets = [[[0, 1], [1, 0]],
[[1, 0], [0, 1]],
[[0, 0], [0, 0]],
[[0, 1], [1, .5]]]
exp_cls_weights = [1, 1, 1, 1]
exp_reg_targets = [[0, 0, 0, 0],
[0, 0, -1, 1],
[0, 0, 0, 0],
[0, 0, -.5, .2]]
exp_reg_weights = [1, 1, 0, 1]
exp_matching_anchors = [0, 1, 3]
result = target_assigner.assign(priors, boxes, groundtruth_labels,
num_valid_rows=3)
(cls_targets, cls_weights, reg_targets, reg_weights, match) = result
with self.test_session() as sess:
(cls_targets_out, cls_weights_out,
reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
[cls_targets, cls_weights, reg_targets, reg_weights,
match.matched_column_indices()])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(matching_anchors_out, exp_matching_anchors)
self.assertEquals(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32)
self.assertEquals(matching_anchors_out.dtype, np.int32)
def test_assign_empty_groundtruth(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
unmatched_cls_target = tf.constant([0, 0, 0], tf.float32)
target_assigner = targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder,
unmatched_cls_target=unmatched_cls_target)
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 1.0, 0.8],
[0, 0.5, .5, 1.0],
[.75, 0, 1.0, .25]])
prior_stddevs = tf.constant(4 * [4 * [.1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
box_corners_expanded = tf.constant([[0.0, 0.0, 0.0, 0.0]])
box_corners = tf.slice(box_corners_expanded, [0, 0], [0, 4])
boxes = box_list.BoxList(box_corners)
groundtruth_labels_expanded = tf.constant([[0, 0, 0]], tf.float32)
groundtruth_labels = tf.slice(groundtruth_labels_expanded, [0, 0], [0, 3])
exp_cls_targets = [[0, 0, 0],
[0, 0, 0],
[0, 0, 0],
[0, 0, 0]]
exp_cls_weights = [1, 1, 1, 1]
exp_reg_targets = [[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]]
exp_reg_weights = [0, 0, 0, 0]
exp_matching_anchors = []
result = target_assigner.assign(priors, boxes, groundtruth_labels)
(cls_targets, cls_weights, reg_targets, reg_weights, match) = result
with self.test_session() as sess:
(cls_targets_out, cls_weights_out,
reg_targets_out, reg_weights_out, matching_anchors_out) = sess.run(
[cls_targets, cls_weights, reg_targets, reg_weights,
match.matched_column_indices()])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(matching_anchors_out, exp_matching_anchors)
self.assertEquals(cls_targets_out.dtype, np.float32)
self.assertEquals(cls_weights_out.dtype, np.float32)
self.assertEquals(reg_targets_out.dtype, np.float32)
self.assertEquals(reg_weights_out.dtype, np.float32)
self.assertEquals(matching_anchors_out.dtype, np.int32)
def test_raises_error_on_invalid_groundtruth_labels(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
unmatched_cls_target = tf.constant([[0, 0], [0, 0], [0, 0]], tf.float32)
target_assigner = targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder,
unmatched_cls_target=unmatched_cls_target)
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5]])
prior_stddevs = tf.constant([[1.0, 1.0, 1.0, 1.0]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
box_corners = [[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.9, 0.9],
[.75, 0, .95, .27]]
boxes = box_list.BoxList(tf.constant(box_corners))
groundtruth_labels = tf.constant([[[0, 1], [1, 0]]], tf.float32)
with self.assertRaises(ValueError):
target_assigner.assign(priors, boxes, groundtruth_labels,
num_valid_rows=3)
class BatchTargetAssignerTest(tf.test.TestCase):
def _get_agnostic_target_assigner(self):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
return targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder,
positive_class_weight=1.0,
negative_class_weight=1.0,
unmatched_cls_target=None)
def _get_multi_class_target_assigner(self, num_classes):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
unmatched_cls_target = tf.constant([1] + num_classes * [0], tf.float32)
return targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder,
positive_class_weight=1.0,
negative_class_weight=1.0,
unmatched_cls_target=unmatched_cls_target)
def _get_multi_dimensional_target_assigner(self, target_dimensions):
similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
matcher = bipartite_matcher.GreedyBipartiteMatcher()
box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
unmatched_cls_target = tf.constant(np.zeros(target_dimensions),
tf.float32)
return targetassigner.TargetAssigner(
similarity_calc, matcher, box_coder,
positive_class_weight=1.0,
negative_class_weight=1.0,
unmatched_cls_target=unmatched_cls_target)
def test_batch_assign_targets(self):
box_list1 = box_list.BoxList(tf.constant([[0., 0., 0.2, 0.2]]))
box_list2 = box_list.BoxList(tf.constant(
[[0, 0.25123152, 1, 1],
[0.015789, 0.0985, 0.55789, 0.3842]]
))
gt_box_batch = [box_list1, box_list2]
gt_class_targets = [None, None]
prior_means = tf.constant([[0, 0, .25, .25],
[0, .25, 1, 1],
[0, .1, .5, .5],
[.75, .75, 1, 1]])
prior_stddevs = tf.constant([[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
exp_reg_targets = [[[0, 0, -0.5, -0.5],
[0, 0, 0, 0],
[0, 0, 0, 0,],
[0, 0, 0, 0,],],
[[0, 0, 0, 0,],
[0, 0.01231521, 0, 0],
[0.15789001, -0.01500003, 0.57889998, -1.15799987],
[0, 0, 0, 0]]]
exp_cls_weights = [[1, 1, 1, 1],
[1, 1, 1, 1]]
exp_cls_targets = [[[1], [0], [0], [0]],
[[0], [1], [1], [0]]]
exp_reg_weights = [[1, 0, 0, 0],
[0, 1, 1, 0]]
exp_match_0 = [0]
exp_match_1 = [1, 2]
agnostic_target_assigner = self._get_agnostic_target_assigner()
(cls_targets, cls_weights, reg_targets, reg_weights,
match_list) = targetassigner.batch_assign_targets(
agnostic_target_assigner, priors, gt_box_batch, gt_class_targets)
self.assertTrue(isinstance(match_list, list) and len(match_list) == 2)
with self.test_session() as sess:
(cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out,
match_out_0, match_out_1) = sess.run([
cls_targets, cls_weights, reg_targets, reg_weights] + [
match.matched_column_indices() for match in match_list])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(match_out_0, exp_match_0)
self.assertAllClose(match_out_1, exp_match_1)
def test_batch_assign_multiclass_targets(self):
box_list1 = box_list.BoxList(tf.constant([[0., 0., 0.2, 0.2]]))
box_list2 = box_list.BoxList(tf.constant(
[[0, 0.25123152, 1, 1],
[0.015789, 0.0985, 0.55789, 0.3842]]
))
gt_box_batch = [box_list1, box_list2]
class_targets1 = tf.constant([[0, 1, 0, 0]], tf.float32)
class_targets2 = tf.constant([[0, 0, 0, 1],
[0, 0, 1, 0]], tf.float32)
gt_class_targets = [class_targets1, class_targets2]
prior_means = tf.constant([[0, 0, .25, .25],
[0, .25, 1, 1],
[0, .1, .5, .5],
[.75, .75, 1, 1]])
prior_stddevs = tf.constant([[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
exp_reg_targets = [[[0, 0, -0.5, -0.5],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]],
[[0, 0, 0, 0],
[0, 0.01231521, 0, 0],
[0.15789001, -0.01500003, 0.57889998, -1.15799987],
[0, 0, 0, 0]]]
exp_cls_weights = [[1, 1, 1, 1],
[1, 1, 1, 1]]
exp_cls_targets = [[[0, 1, 0, 0],
[1, 0, 0, 0],
[1, 0, 0, 0],
[1, 0, 0, 0]],
[[1, 0, 0, 0],
[0, 0, 0, 1],
[0, 0, 1, 0],
[1, 0, 0, 0]]]
exp_reg_weights = [[1, 0, 0, 0],
[0, 1, 1, 0]]
exp_match_0 = [0]
exp_match_1 = [1, 2]
multiclass_target_assigner = self._get_multi_class_target_assigner(
num_classes=3)
(cls_targets, cls_weights, reg_targets, reg_weights,
match_list) = targetassigner.batch_assign_targets(
multiclass_target_assigner, priors, gt_box_batch, gt_class_targets)
self.assertTrue(isinstance(match_list, list) and len(match_list) == 2)
with self.test_session() as sess:
(cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out,
match_out_0, match_out_1) = sess.run([
cls_targets, cls_weights, reg_targets, reg_weights] + [
match.matched_column_indices() for match in match_list])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(match_out_0, exp_match_0)
self.assertAllClose(match_out_1, exp_match_1)
def test_batch_assign_multidimensional_targets(self):
box_list1 = box_list.BoxList(tf.constant([[0., 0., 0.2, 0.2]]))
box_list2 = box_list.BoxList(tf.constant(
[[0, 0.25123152, 1, 1],
[0.015789, 0.0985, 0.55789, 0.3842]]
))
gt_box_batch = [box_list1, box_list2]
class_targets1 = tf.constant([[[0, 1, 1],
[1, 1, 0]]], tf.float32)
class_targets2 = tf.constant([[[0, 1, 1],
[1, 1, 0]],
[[0, 0, 1],
[0, 0, 1]]], tf.float32)
gt_class_targets = [class_targets1, class_targets2]
prior_means = tf.constant([[0, 0, .25, .25],
[0, .25, 1, 1],
[0, .1, .5, .5],
[.75, .75, 1, 1]])
prior_stddevs = tf.constant([[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1],
[.1, .1, .1, .1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
exp_reg_targets = [[[0, 0, -0.5, -0.5],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]],
[[0, 0, 0, 0],
[0, 0.01231521, 0, 0],
[0.15789001, -0.01500003, 0.57889998, -1.15799987],
[0, 0, 0, 0]]]
exp_cls_weights = [[1, 1, 1, 1],
[1, 1, 1, 1]]
exp_cls_targets = [[[[0., 1., 1.],
[1., 1., 0.]],
[[0., 0., 0.],
[0., 0., 0.]],
[[0., 0., 0.],
[0., 0., 0.]],
[[0., 0., 0.],
[0., 0., 0.]]],
[[[0., 0., 0.],
[0., 0., 0.]],
[[0., 1., 1.],
[1., 1., 0.]],
[[0., 0., 1.],
[0., 0., 1.]],
[[0., 0., 0.],
[0., 0., 0.]]]]
exp_reg_weights = [[1, 0, 0, 0],
[0, 1, 1, 0]]
exp_match_0 = [0]
exp_match_1 = [1, 2]
multiclass_target_assigner = self._get_multi_dimensional_target_assigner(
target_dimensions=(2, 3))
(cls_targets, cls_weights, reg_targets, reg_weights,
match_list) = targetassigner.batch_assign_targets(
multiclass_target_assigner, priors, gt_box_batch, gt_class_targets)
self.assertTrue(isinstance(match_list, list) and len(match_list) == 2)
with self.test_session() as sess:
(cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out,
match_out_0, match_out_1) = sess.run([
cls_targets, cls_weights, reg_targets, reg_weights] + [
match.matched_column_indices() for match in match_list])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(match_out_0, exp_match_0)
self.assertAllClose(match_out_1, exp_match_1)
def test_batch_assign_empty_groundtruth(self):
box_coords_expanded = tf.zeros((1, 4), tf.float32)
box_coords = tf.slice(box_coords_expanded, [0, 0], [0, 4])
box_list1 = box_list.BoxList(box_coords)
gt_box_batch = [box_list1]
prior_means = tf.constant([[0, 0, .25, .25],
[0, .25, 1, 1]])
prior_stddevs = tf.constant([[.1, .1, .1, .1],
[.1, .1, .1, .1]])
priors = box_list.BoxList(prior_means)
priors.add_field('stddev', prior_stddevs)
exp_reg_targets = [[[0, 0, 0, 0],
[0, 0, 0, 0]]]
exp_cls_weights = [[1, 1]]
exp_cls_targets = [[[1, 0, 0, 0],
[1, 0, 0, 0]]]
exp_reg_weights = [[0, 0]]
exp_match_0 = []
num_classes = 3
pad = 1
gt_class_targets = tf.zeros((0, num_classes + pad))
gt_class_targets_batch = [gt_class_targets]
multiclass_target_assigner = self._get_multi_class_target_assigner(
num_classes=3)
(cls_targets, cls_weights, reg_targets, reg_weights,
match_list) = targetassigner.batch_assign_targets(
multiclass_target_assigner, priors,
gt_box_batch, gt_class_targets_batch)
self.assertTrue(isinstance(match_list, list) and len(match_list) == 1)
with self.test_session() as sess:
(cls_targets_out, cls_weights_out, reg_targets_out, reg_weights_out,
match_out_0) = sess.run([
cls_targets, cls_weights, reg_targets, reg_weights] + [
match.matched_column_indices() for match in match_list])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertAllClose(match_out_0, exp_match_0)
class CreateTargetAssignerTest(tf.test.TestCase):
def test_create_target_assigner(self):
"""Tests that named constructor gives working target assigners.
TODO: Make this test more general.
"""
corners = [[0.0, 0.0, 1.0, 1.0]]
groundtruth = box_list.BoxList(tf.constant(corners))
priors = box_list.BoxList(tf.constant(corners))
prior_stddevs = tf.constant([[1.0, 1.0, 1.0, 1.0]])
priors.add_field('stddev', prior_stddevs)
multibox_ta = (targetassigner
.create_target_assigner('Multibox', stage='proposal'))
multibox_ta.assign(priors, groundtruth)
# No tests on output, as that may vary arbitrarily as new target assigners
# are added. As long as it is constructed correctly and runs without errors,
# tests on the individual assigners cover correctness of the assignments.
anchors = box_list.BoxList(tf.constant(corners))
faster_rcnn_proposals_ta = (targetassigner
.create_target_assigner('FasterRCNN',
stage='proposal'))
faster_rcnn_proposals_ta.assign(anchors, groundtruth)
fast_rcnn_ta = (targetassigner
.create_target_assigner('FastRCNN'))
fast_rcnn_ta.assign(anchors, groundtruth)
faster_rcnn_detection_ta = (targetassigner
.create_target_assigner('FasterRCNN',
stage='detection'))
faster_rcnn_detection_ta.assign(anchors, groundtruth)
with self.assertRaises(ValueError):
targetassigner.create_target_assigner('InvalidDetector',
stage='invalid_stage')
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Convert raw PASCAL dataset to TFRecord for object_detection.
Example usage:
./create_pascal_tf_record --data_dir=/home/user/VOCdevkit \
--year=VOC2012 \
--output_path=/home/user/pascal.record
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import hashlib
import io
import logging
import os
from lxml import etree
import PIL.Image
import tensorflow as tf
from object_detection.utils import dataset_util
from object_detection.utils import label_map_util
flags = tf.app.flags
flags.DEFINE_string('data_dir', '', 'Root directory to raw PASCAL VOC dataset.')
flags.DEFINE_string('set', 'train', 'Convert training set, validation set or '
'merged set.')
flags.DEFINE_string('annotations_dir', 'Annotations',
'(Relative) path to annotations directory.')
flags.DEFINE_string('year', 'VOC2007', 'Desired challenge year.')
flags.DEFINE_string('output_path', '', 'Path to output TFRecord')
flags.DEFINE_string('label_map_path', 'data/pascal_label_map.pbtxt',
'Path to label map proto')
flags.DEFINE_boolean('ignore_difficult_instances', False, 'Whether to ignore '
'difficult instances')
FLAGS = flags.FLAGS
SETS = ['train', 'val', 'trainval', 'test']
YEARS = ['VOC2007', 'VOC2012', 'merged']
def dict_to_tf_example(data,
dataset_directory,
label_map_dict,
ignore_difficult_instances=False,
image_subdirectory='JPEGImages'):
"""Convert XML derived dict to tf.Example proto.
Notice that this function normalizes the bounding box coordinates provided
by the raw data.
Args:
data: dict holding PASCAL XML fields for a single image (obtained by
running dataset_util.recursive_parse_xml_to_dict)
dataset_directory: Path to root directory holding PASCAL dataset
label_map_dict: A map from string label names to integers ids.
ignore_difficult_instances: Whether to skip difficult instances in the
dataset (default: False).
image_subdirectory: String specifying subdirectory within the
PASCAL dataset directory holding the actual image data.
Returns:
example: The converted tf.Example.
Raises:
ValueError: if the image pointed to by data['filename'] is not a valid JPEG
"""
img_path = os.path.join(data['folder'], image_subdirectory, data['filename'])
full_path = os.path.join(dataset_directory, img_path)
with tf.gfile.GFile(full_path, 'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = PIL.Image.open(encoded_jpg_io)
if image.format != 'JPEG':
raise ValueError('Image format not JPEG')
key = hashlib.sha256(encoded_jpg).hexdigest()
width = int(data['size']['width'])
height = int(data['size']['height'])
xmin = []
ymin = []
xmax = []
ymax = []
classes = []
classes_text = []
truncated = []
poses = []
difficult_obj = []
for obj in data['object']:
difficult = bool(int(obj['difficult']))
if ignore_difficult_instances and difficult:
continue
difficult_obj.append(int(difficult))
xmin.append(float(obj['bndbox']['xmin']) / width)
ymin.append(float(obj['bndbox']['ymin']) / height)
xmax.append(float(obj['bndbox']['xmax']) / width)
ymax.append(float(obj['bndbox']['ymax']) / height)
classes_text.append(obj['name'].encode('utf8'))
classes.append(label_map_dict[obj['name']])
truncated.append(int(obj['truncated']))
poses.append(obj['pose'].encode('utf8'))
example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(
data['filename'].encode('utf8')),
'image/source_id': dataset_util.bytes_feature(
data['filename'].encode('utf8')),
'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
'image/encoded': dataset_util.bytes_feature(encoded_jpg),
'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
'image/object/truncated': dataset_util.int64_list_feature(truncated),
'image/object/view': dataset_util.bytes_list_feature(poses),
}))
return example
def main(_):
if FLAGS.set not in SETS:
raise ValueError('set must be in : {}'.format(SETS))
if FLAGS.year not in YEARS:
raise ValueError('year must be in : {}'.format(YEARS))
data_dir = FLAGS.data_dir
years = ['VOC2007', 'VOC2012']
if FLAGS.year != 'merged':
years = [FLAGS.year]
writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)
for year in years:
logging.info('Reading from PASCAL %s dataset.', year)
examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main',
'aeroplane_' + FLAGS.set + '.txt')
annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir)
examples_list = dataset_util.read_examples_list(examples_path)
for idx, example in enumerate(examples_list):
if idx % 100 == 0:
logging.info('On image %d of %d', idx, len(examples_list))
path = os.path.join(annotations_dir, example + '.xml')
with tf.gfile.GFile(path, 'r') as fid:
xml_str = fid.read()
xml = etree.fromstring(xml_str)
data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict,
FLAGS.ignore_difficult_instances)
writer.write(tf_example.SerializeToString())
writer.close()
if __name__ == '__main__':
tf.app.run()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Test for create_pascal_tf_record.py."""
import os
import numpy as np
import PIL.Image
import tensorflow as tf
from object_detection import create_pascal_tf_record
class DictToTFExampleTest(tf.test.TestCase):
def _assertProtoEqual(self, proto_field, expectation):
"""Helper function to assert if a proto field equals some value.
Args:
proto_field: The protobuf field to compare.
expectation: The expected value of the protobuf field.
"""
proto_list = [p for p in proto_field]
self.assertListEqual(proto_list, expectation)
def test_dict_to_tf_example(self):
image_file_name = 'tmp_image.jpg'
image_data = np.random.rand(256, 256, 3)
save_path = os.path.join(self.get_temp_dir(), image_file_name)
image = PIL.Image.fromarray(image_data, 'RGB')
image.save(save_path)
data = {
'folder': '',
'filename': image_file_name,
'size': {
'height': 256,
'width': 256,
},
'object': [
{
'difficult': 1,
'bndbox': {
'xmin': 64,
'ymin': 64,
'xmax': 192,
'ymax': 192,
},
'name': 'person',
'truncated': 0,
'pose': '',
},
],
}
label_map_dict = {
'background': 0,
'person': 1,
'notperson': 2,
}
example = create_pascal_tf_record.dict_to_tf_example(
data, self.get_temp_dir(), label_map_dict, image_subdirectory='')
self._assertProtoEqual(
example.features.feature['image/height'].int64_list.value, [256])
self._assertProtoEqual(
example.features.feature['image/width'].int64_list.value, [256])
self._assertProtoEqual(
example.features.feature['image/filename'].bytes_list.value,
[image_file_name])
self._assertProtoEqual(
example.features.feature['image/source_id'].bytes_list.value,
[image_file_name])
self._assertProtoEqual(
example.features.feature['image/format'].bytes_list.value, ['jpeg'])
self._assertProtoEqual(
example.features.feature['image/object/bbox/xmin'].float_list.value,
[0.25])
self._assertProtoEqual(
example.features.feature['image/object/bbox/ymin'].float_list.value,
[0.25])
self._assertProtoEqual(
example.features.feature['image/object/bbox/xmax'].float_list.value,
[0.75])
self._assertProtoEqual(
example.features.feature['image/object/bbox/ymax'].float_list.value,
[0.75])
self._assertProtoEqual(
example.features.feature['image/object/class/text'].bytes_list.value,
['person'])
self._assertProtoEqual(
example.features.feature['image/object/class/label'].int64_list.value,
[1])
self._assertProtoEqual(
example.features.feature['image/object/difficult'].int64_list.value,
[1])
self._assertProtoEqual(
example.features.feature['image/object/truncated'].int64_list.value,
[0])
self._assertProtoEqual(
example.features.feature['image/object/view'].bytes_list.value, [''])
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Convert the Oxford pet dataset to TFRecord for object_detection.
See: O. M. Parkhi, A. Vedaldi, A. Zisserman, C. V. Jawahar
Cats and Dogs
IEEE Conference on Computer Vision and Pattern Recognition, 2012
http://www.robots.ox.ac.uk/~vgg/data/pets/
Example usage:
./create_pet_tf_record --data_dir=/home/user/pet \
--output_dir=/home/user/pet/output
"""
import hashlib
import io
import logging
import os
import random
import re
from lxml import etree
import PIL.Image
import tensorflow as tf
from object_detection.utils import dataset_util
from object_detection.utils import label_map_util
flags = tf.app.flags
flags.DEFINE_string('data_dir', '', 'Root directory to raw pet dataset.')
flags.DEFINE_string('output_dir', '', 'Path to directory to output TFRecords.')
flags.DEFINE_string('label_map_path', 'data/pet_label_map.pbtxt',
'Path to label map proto')
FLAGS = flags.FLAGS
def get_class_name_from_filename(file_name):
"""Gets the class name from a file.
Args:
file_name: The file name to get the class name from.
ie. "american_pit_bull_terrier_105.jpg"
Returns:
example: The converted tf.Example.
"""
match = re.match(r'([A-Za-z_]+)(_[0-9]+\.jpg)', file_name, re.I)
return match.groups()[0]
def dict_to_tf_example(data,
label_map_dict,
image_subdirectory,
ignore_difficult_instances=False):
"""Convert XML derived dict to tf.Example proto.
Notice that this function normalizes the bounding box coordinates provided
by the raw data.
Args:
data: dict holding PASCAL XML fields for a single image (obtained by
running dataset_util.recursive_parse_xml_to_dict)
label_map_dict: A map from string label names to integers ids.
image_subdirectory: String specifying subdirectory within the
Pascal dataset directory holding the actual image data.
ignore_difficult_instances: Whether to skip difficult instances in the
dataset (default: False).
Returns:
example: The converted tf.Example.
Raises:
ValueError: if the image pointed to by data['filename'] is not a valid JPEG
"""
img_path = os.path.join(image_subdirectory, data['filename'])
with tf.gfile.GFile(img_path, 'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = PIL.Image.open(encoded_jpg_io)
if image.format != 'JPEG':
raise ValueError('Image format not JPEG')
key = hashlib.sha256(encoded_jpg).hexdigest()
width = int(data['size']['width'])
height = int(data['size']['height'])
xmin = []
ymin = []
xmax = []
ymax = []
classes = []
classes_text = []
truncated = []
poses = []
difficult_obj = []
for obj in data['object']:
difficult = bool(int(obj['difficult']))
if ignore_difficult_instances and difficult:
continue
difficult_obj.append(int(difficult))
xmin.append(float(obj['bndbox']['xmin']) / width)
ymin.append(float(obj['bndbox']['ymin']) / height)
xmax.append(float(obj['bndbox']['xmax']) / width)
ymax.append(float(obj['bndbox']['ymax']) / height)
class_name = get_class_name_from_filename(data['filename'])
classes_text.append(class_name.encode('utf8'))
classes.append(label_map_dict[class_name])
truncated.append(int(obj['truncated']))
poses.append(obj['pose'].encode('utf8'))
example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(
data['filename'].encode('utf8')),
'image/source_id': dataset_util.bytes_feature(
data['filename'].encode('utf8')),
'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
'image/encoded': dataset_util.bytes_feature(encoded_jpg),
'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
'image/object/truncated': dataset_util.int64_list_feature(truncated),
'image/object/view': dataset_util.bytes_list_feature(poses),
}))
return example
def create_tf_record(output_filename,
label_map_dict,
annotations_dir,
image_dir,
examples):
"""Creates a TFRecord file from examples.
Args:
output_filename: Path to where output file is saved.
label_map_dict: The label map dictionary.
annotations_dir: Directory where annotation files are stored.
image_dir: Directory where image files are stored.
examples: Examples to parse and save to tf record.
"""
writer = tf.python_io.TFRecordWriter(output_filename)
for idx, example in enumerate(examples):
if idx % 100 == 0:
logging.info('On image %d of %d', idx, len(examples))
path = os.path.join(annotations_dir, 'xmls', example + '.xml')
if not os.path.exists(path):
logging.warning('Could not find %s, ignoring example.', path)
continue
with tf.gfile.GFile(path, 'r') as fid:
xml_str = fid.read()
xml = etree.fromstring(xml_str)
data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
tf_example = dict_to_tf_example(data, label_map_dict, image_dir)
writer.write(tf_example.SerializeToString())
writer.close()
# TODO: Add test for pet/PASCAL main files.
def main(_):
data_dir = FLAGS.data_dir
label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)
logging.info('Reading from Pet dataset.')
image_dir = os.path.join(data_dir, 'images')
annotations_dir = os.path.join(data_dir, 'annotations')
examples_path = os.path.join(annotations_dir, 'trainval.txt')
examples_list = dataset_util.read_examples_list(examples_path)
# Test images are not included in the downloaded data set, so we shall perform
# our own split.
random.seed(42)
random.shuffle(examples_list)
num_examples = len(examples_list)
num_train = int(0.7 * num_examples)
train_examples = examples_list[:num_train]
val_examples = examples_list[num_train:]
logging.info('%d training and %d validation examples.',
len(train_examples), len(val_examples))
train_output_path = os.path.join(FLAGS.output_dir, 'pet_train.record')
val_output_path = os.path.join(FLAGS.output_dir, 'pet_val.record')
create_tf_record(train_output_path, label_map_dict, annotations_dir,
image_dir, train_examples)
create_tf_record(val_output_path, label_map_dict, annotations_dir,
image_dir, val_examples)
if __name__ == '__main__':
tf.app.run()
item {
name: "/m/01g317"
id: 1
display_name: "person"
}
item {
name: "/m/0199g"
id: 2
display_name: "bicycle"
}
item {
name: "/m/0k4j"
id: 3
display_name: "car"
}
item {
name: "/m/04_sv"
id: 4
display_name: "motorcycle"
}
item {
name: "/m/05czz6l"
id: 5
display_name: "airplane"
}
item {
name: "/m/01bjv"
id: 6
display_name: "bus"
}
item {
name: "/m/07jdr"
id: 7
display_name: "train"
}
item {
name: "/m/07r04"
id: 8
display_name: "truck"
}
item {
name: "/m/019jd"
id: 9
display_name: "boat"
}
item {
name: "/m/015qff"
id: 10
display_name: "traffic light"
}
item {
name: "/m/01pns0"
id: 11
display_name: "fire hydrant"
}
item {
name: "/m/02pv19"
id: 13
display_name: "stop sign"
}
item {
name: "/m/015qbp"
id: 14
display_name: "parking meter"
}
item {
name: "/m/0cvnqh"
id: 15
display_name: "bench"
}
item {
name: "/m/015p6"
id: 16
display_name: "bird"
}
item {
name: "/m/01yrx"
id: 17
display_name: "cat"
}
item {
name: "/m/0bt9lr"
id: 18
display_name: "dog"
}
item {
name: "/m/03k3r"
id: 19
display_name: "horse"
}
item {
name: "/m/07bgp"
id: 20
display_name: "sheep"
}
item {
name: "/m/01xq0k1"
id: 21
display_name: "cow"
}
item {
name: "/m/0bwd_0j"
id: 22
display_name: "elephant"
}
item {
name: "/m/01dws"
id: 23
display_name: "bear"
}
item {
name: "/m/0898b"
id: 24
display_name: "zebra"
}
item {
name: "/m/03bk1"
id: 25
display_name: "giraffe"
}
item {
name: "/m/01940j"
id: 27
display_name: "backpack"
}
item {
name: "/m/0hnnb"
id: 28
display_name: "umbrella"
}
item {
name: "/m/080hkjn"
id: 31
display_name: "handbag"
}
item {
name: "/m/01rkbr"
id: 32
display_name: "tie"
}
item {
name: "/m/01s55n"
id: 33
display_name: "suitcase"
}
item {
name: "/m/02wmf"
id: 34
display_name: "frisbee"
}
item {
name: "/m/071p9"
id: 35
display_name: "skis"
}
item {
name: "/m/06__v"
id: 36
display_name: "snowboard"
}
item {
name: "/m/018xm"
id: 37
display_name: "sports ball"
}
item {
name: "/m/02zt3"
id: 38
display_name: "kite"
}
item {
name: "/m/03g8mr"
id: 39
display_name: "baseball bat"
}
item {
name: "/m/03grzl"
id: 40
display_name: "baseball glove"
}
item {
name: "/m/06_fw"
id: 41
display_name: "skateboard"
}
item {
name: "/m/019w40"
id: 42
display_name: "surfboard"
}
item {
name: "/m/0dv9c"
id: 43
display_name: "tennis racket"
}
item {
name: "/m/04dr76w"
id: 44
display_name: "bottle"
}
item {
name: "/m/09tvcd"
id: 46
display_name: "wine glass"
}
item {
name: "/m/08gqpm"
id: 47
display_name: "cup"
}
item {
name: "/m/0dt3t"
id: 48
display_name: "fork"
}
item {
name: "/m/04ctx"
id: 49
display_name: "knife"
}
item {
name: "/m/0cmx8"
id: 50
display_name: "spoon"
}
item {
name: "/m/04kkgm"
id: 51
display_name: "bowl"
}
item {
name: "/m/09qck"
id: 52
display_name: "banana"
}
item {
name: "/m/014j1m"
id: 53
display_name: "apple"
}
item {
name: "/m/0l515"
id: 54
display_name: "sandwich"
}
item {
name: "/m/0cyhj_"
id: 55
display_name: "orange"
}
item {
name: "/m/0hkxq"
id: 56
display_name: "broccoli"
}
item {
name: "/m/0fj52s"
id: 57
display_name: "carrot"
}
item {
name: "/m/01b9xk"
id: 58
display_name: "hot dog"
}
item {
name: "/m/0663v"
id: 59
display_name: "pizza"
}
item {
name: "/m/0jy4k"
id: 60
display_name: "donut"
}
item {
name: "/m/0fszt"
id: 61
display_name: "cake"
}
item {
name: "/m/01mzpv"
id: 62
display_name: "chair"
}
item {
name: "/m/02crq1"
id: 63
display_name: "couch"
}
item {
name: "/m/03fp41"
id: 64
display_name: "potted plant"
}
item {
name: "/m/03ssj5"
id: 65
display_name: "bed"
}
item {
name: "/m/04bcr3"
id: 67
display_name: "dining table"
}
item {
name: "/m/09g1w"
id: 70
display_name: "toilet"
}
item {
name: "/m/07c52"
id: 72
display_name: "tv"
}
item {
name: "/m/01c648"
id: 73
display_name: "laptop"
}
item {
name: "/m/020lf"
id: 74
display_name: "mouse"
}
item {
name: "/m/0qjjc"
id: 75
display_name: "remote"
}
item {
name: "/m/01m2v"
id: 76
display_name: "keyboard"
}
item {
name: "/m/050k8"
id: 77
display_name: "cell phone"
}
item {
name: "/m/0fx9l"
id: 78
display_name: "microwave"
}
item {
name: "/m/029bxz"
id: 79
display_name: "oven"
}
item {
name: "/m/01k6s3"
id: 80
display_name: "toaster"
}
item {
name: "/m/0130jx"
id: 81
display_name: "sink"
}
item {
name: "/m/040b_t"
id: 82
display_name: "refrigerator"
}
item {
name: "/m/0bt_c3"
id: 84
display_name: "book"
}
item {
name: "/m/01x3z"
id: 85
display_name: "clock"
}
item {
name: "/m/02s195"
id: 86
display_name: "vase"
}
item {
name: "/m/01lsmm"
id: 87
display_name: "scissors"
}
item {
name: "/m/0kmg4"
id: 88
display_name: "teddy bear"
}
item {
name: "/m/03wvsk"
id: 89
display_name: "hair drier"
}
item {
name: "/m/012xff"
id: 90
display_name: "toothbrush"
}
item {
id: 1
name: 'aeroplane'
}
item {
id: 2
name: 'bicycle'
}
item {
id: 3
name: 'bird'
}
item {
id: 4
name: 'boat'
}
item {
id: 5
name: 'bottle'
}
item {
id: 6
name: 'bus'
}
item {
id: 7
name: 'car'
}
item {
id: 8
name: 'cat'
}
item {
id: 9
name: 'chair'
}
item {
id: 10
name: 'cow'
}
item {
id: 11
name: 'diningtable'
}
item {
id: 12
name: 'dog'
}
item {
id: 13
name: 'horse'
}
item {
id: 14
name: 'motorbike'
}
item {
id: 15
name: 'person'
}
item {
id: 16
name: 'pottedplant'
}
item {
id: 17
name: 'sheep'
}
item {
id: 18
name: 'sofa'
}
item {
id: 19
name: 'train'
}
item {
id: 20
name: 'tvmonitor'
}
item {
id: 1
name: 'Abyssinian'
}
item {
id: 2
name: 'american_bulldog'
}
item {
id: 3
name: 'american_pit_bull_terrier'
}
item {
id: 4
name: 'basset_hound'
}
item {
id: 5
name: 'beagle'
}
item {
id: 6
name: 'Bengal'
}
item {
id: 7
name: 'Birman'
}
item {
id: 8
name: 'Bombay'
}
item {
id: 9
name: 'boxer'
}
item {
id: 10
name: 'British_Shorthair'
}
item {
id: 11
name: 'chihuahua'
}
item {
id: 12
name: 'Egyptian_Mau'
}
item {
id: 13
name: 'english_cocker_spaniel'
}
item {
id: 14
name: 'english_setter'
}
item {
id: 15
name: 'german_shorthaired'
}
item {
id: 16
name: 'great_pyrenees'
}
item {
id: 17
name: 'havanese'
}
item {
id: 18
name: 'japanese_chin'
}
item {
id: 19
name: 'keeshond'
}
item {
id: 20
name: 'leonberger'
}
item {
id: 21
name: 'Maine_Coon'
}
item {
id: 22
name: 'miniature_pinscher'
}
item {
id: 23
name: 'newfoundland'
}
item {
id: 24
name: 'Persian'
}
item {
id: 25
name: 'pomeranian'
}
item {
id: 26
name: 'pug'
}
item {
id: 27
name: 'Ragdoll'
}
item {
id: 28
name: 'Russian_Blue'
}
item {
id: 29
name: 'saint_bernard'
}
item {
id: 30
name: 'samoyed'
}
item {
id: 31
name: 'scottish_terrier'
}
item {
id: 32
name: 'shiba_inu'
}
item {
id: 33
name: 'Siamese'
}
item {
id: 34
name: 'Sphynx'
}
item {
id: 35
name: 'staffordshire_bull_terrier'
}
item {
id: 36
name: 'wheaten_terrier'
}
item {
id: 37
name: 'yorkshire_terrier'
}
# Tensorflow Object Detection API: data decoders.
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"])
# Apache 2.0
py_library(
name = "tf_example_decoder",
srcs = ["tf_example_decoder.py"],
deps = [
"//tensorflow",
"//tensorflow_models/object_detection/core:data_decoder",
"//tensorflow_models/object_detection/core:standard_fields",
],
)
py_test(
name = "tf_example_decoder_test",
srcs = ["tf_example_decoder_test.py"],
deps = [
":tf_example_decoder",
"//tensorflow",
"//tensorflow_models/object_detection/core:standard_fields",
],
)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tensorflow Example proto decoder for object detection.
A decoder to decode string tensors containing serialized tensorflow.Example
protos for object detection.
"""
import tensorflow as tf
from object_detection.core import data_decoder
from object_detection.core import standard_fields as fields
slim_example_decoder = tf.contrib.slim.tfexample_decoder
class TfExampleDecoder(data_decoder.DataDecoder):
"""Tensorflow Example proto decoder."""
def __init__(self):
"""Constructor sets keys_to_features and items_to_handlers."""
self.keys_to_features = {
'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'),
'image/filename': tf.FixedLenFeature((), tf.string, default_value=''),
'image/key/sha256': tf.FixedLenFeature((), tf.string, default_value=''),
'image/source_id': tf.FixedLenFeature((), tf.string, default_value=''),
'image/height': tf.FixedLenFeature((), tf.int64, 1),
'image/width': tf.FixedLenFeature((), tf.int64, 1),
# Object boxes and classes.
'image/object/bbox/xmin': tf.VarLenFeature(tf.float32),
'image/object/bbox/xmax': tf.VarLenFeature(tf.float32),
'image/object/bbox/ymin': tf.VarLenFeature(tf.float32),
'image/object/bbox/ymax': tf.VarLenFeature(tf.float32),
'image/object/class/label': tf.VarLenFeature(tf.int64),
'image/object/area': tf.VarLenFeature(tf.float32),
'image/object/is_crowd': tf.VarLenFeature(tf.int64),
'image/object/difficult': tf.VarLenFeature(tf.int64),
# Instance masks and classes.
'image/segmentation/object': tf.VarLenFeature(tf.int64),
'image/segmentation/object/class': tf.VarLenFeature(tf.int64)
}
self.items_to_handlers = {
fields.InputDataFields.image: slim_example_decoder.Image(
image_key='image/encoded', format_key='image/format', channels=3),
fields.InputDataFields.source_id: (
slim_example_decoder.Tensor('image/source_id')),
fields.InputDataFields.key: (
slim_example_decoder.Tensor('image/key/sha256')),
fields.InputDataFields.filename: (
slim_example_decoder.Tensor('image/filename')),
# Object boxes and classes.
fields.InputDataFields.groundtruth_boxes: (
slim_example_decoder.BoundingBox(
['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/')),
fields.InputDataFields.groundtruth_classes: (
slim_example_decoder.Tensor('image/object/class/label')),
fields.InputDataFields.groundtruth_area: slim_example_decoder.Tensor(
'image/object/area'),
fields.InputDataFields.groundtruth_is_crowd: (
slim_example_decoder.Tensor('image/object/is_crowd')),
fields.InputDataFields.groundtruth_difficult: (
slim_example_decoder.Tensor('image/object/difficult')),
# Instance masks and classes.
fields.InputDataFields.groundtruth_instance_masks: (
slim_example_decoder.ItemHandlerCallback(
['image/segmentation/object', 'image/height', 'image/width'],
self._reshape_instance_masks)),
fields.InputDataFields.groundtruth_instance_classes: (
slim_example_decoder.Tensor('image/segmentation/object/class')),
}
def decode(self, tf_example_string_tensor):
"""Decodes serialized tensorflow example and returns a tensor dictionary.
Args:
tf_example_string_tensor: a string tensor holding a serialized tensorflow
example proto.
Returns:
A dictionary of the following tensors.
fields.InputDataFields.image - 3D uint8 tensor of shape [None, None, 3]
containing image.
fields.InputDataFields.source_id - string tensor containing original
image id.
fields.InputDataFields.key - string tensor with unique sha256 hash key.
fields.InputDataFields.filename - string tensor with original dataset
filename.
fields.InputDataFields.groundtruth_boxes - 2D float32 tensor of shape
[None, 4] containing box corners.
fields.InputDataFields.groundtruth_classes - 1D int64 tensor of shape
[None] containing classes for the boxes.
fields.InputDataFields.groundtruth_area - 1D float32 tensor of shape
[None] containing containing object mask area in pixel squared.
fields.InputDataFields.groundtruth_is_crowd - 1D bool tensor of shape
[None] indicating if the boxes enclose a crowd.
fields.InputDataFields.groundtruth_difficult - 1D bool tensor of shape
[None] indicating if the boxes represent `difficult` instances.
fields.InputDataFields.groundtruth_instance_masks - 3D int64 tensor of
shape [None, None, None] containing instance masks.
fields.InputDataFields.groundtruth_instance_classes - 1D int64 tensor
of shape [None] containing classes for the instance masks.
"""
serialized_example = tf.reshape(tf_example_string_tensor, shape=[])
decoder = slim_example_decoder.TFExampleDecoder(self.keys_to_features,
self.items_to_handlers)
keys = decoder.list_items()
tensors = decoder.decode(serialized_example, items=keys)
tensor_dict = dict(zip(keys, tensors))
is_crowd = fields.InputDataFields.groundtruth_is_crowd
tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool)
tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3])
return tensor_dict
def _reshape_instance_masks(self, keys_to_tensors):
"""Reshape instance segmentation masks.
The instance segmentation masks are reshaped to [num_instances, height,
width] and cast to boolean type to save memory.
Args:
keys_to_tensors: a dictionary from keys to tensors.
Returns:
A 3-D boolean tensor of shape [num_instances, height, width].
"""
masks = keys_to_tensors['image/segmentation/object']
if isinstance(masks, tf.SparseTensor):
masks = tf.sparse_tensor_to_dense(masks)
height = keys_to_tensors['image/height']
width = keys_to_tensors['image/width']
to_shape = tf.cast(tf.stack([-1, height, width]), tf.int32)
return tf.cast(tf.reshape(masks, to_shape), tf.bool)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.data_decoders.tf_example_decoder."""
import numpy as np
import tensorflow as tf
from object_detection.core import standard_fields as fields
from object_detection.data_decoders import tf_example_decoder
class TfExampleDecoderTest(tf.test.TestCase):
def _EncodeImage(self, image_tensor, encoding_type='jpeg'):
with self.test_session():
if encoding_type == 'jpeg':
image_encoded = tf.image.encode_jpeg(tf.constant(image_tensor)).eval()
elif encoding_type == 'png':
image_encoded = tf.image.encode_png(tf.constant(image_tensor)).eval()
else:
raise ValueError('Invalid encoding type.')
return image_encoded
def _DecodeImage(self, image_encoded, encoding_type='jpeg'):
with self.test_session():
if encoding_type == 'jpeg':
image_decoded = tf.image.decode_jpeg(tf.constant(image_encoded)).eval()
elif encoding_type == 'png':
image_decoded = tf.image.decode_png(tf.constant(image_encoded)).eval()
else:
raise ValueError('Invalid encoding type.')
return image_decoded
def _Int64Feature(self, value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
def _FloatFeature(self, value):
return tf.train.Feature(float_list=tf.train.FloatList(value=value))
def _BytesFeature(self, value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def testDecodeJpegImage(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
decoded_jpeg = self._DecodeImage(encoded_jpeg)
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/source_id': self._BytesFeature('image_id'),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.image].
get_shape().as_list()), [None, None, 3])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(decoded_jpeg, tensor_dict[fields.InputDataFields.image])
self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])
def testDecodeImageKeyAndFilename(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/key/sha256': self._BytesFeature('abc'),
'image/filename': self._BytesFeature('filename')
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertEqual('abc', tensor_dict[fields.InputDataFields.key])
self.assertEqual('filename', tensor_dict[fields.InputDataFields.filename])
def testDecodePngImage(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
encoded_png = self._EncodeImage(image_tensor, encoding_type='png')
decoded_png = self._DecodeImage(encoded_png, encoding_type='png')
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_png),
'image/format': self._BytesFeature('png'),
'image/source_id': self._BytesFeature('image_id')
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.image].
get_shape().as_list()), [None, None, 3])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(decoded_png, tensor_dict[fields.InputDataFields.image])
self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])
def testDecodeBoundingBox(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
bbox_ymins = [0.0, 4.0]
bbox_xmins = [1.0, 5.0]
bbox_ymaxs = [2.0, 6.0]
bbox_xmaxs = [3.0, 7.0]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/bbox/ymin': self._FloatFeature(bbox_ymins),
'image/object/bbox/xmin': self._FloatFeature(bbox_xmins),
'image/object/bbox/ymax': self._FloatFeature(bbox_ymaxs),
'image/object/bbox/xmax': self._FloatFeature(bbox_xmaxs),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes].
get_shape().as_list()), [None, 4])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
expected_boxes = np.vstack([bbox_ymins, bbox_xmins,
bbox_ymaxs, bbox_xmaxs]).transpose()
self.assertAllEqual(expected_boxes,
tensor_dict[fields.InputDataFields.groundtruth_boxes])
def testDecodeObjectLabel(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
bbox_classes = [0, 1]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/class/label': self._Int64Feature(bbox_classes),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[
fields.InputDataFields.groundtruth_classes].get_shape().as_list()),
[None])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(bbox_classes,
tensor_dict[fields.InputDataFields.groundtruth_classes])
def testDecodeObjectArea(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
object_area = [100., 174.]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/area': self._FloatFeature(object_area),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_area].
get_shape().as_list()), [None])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(object_area,
tensor_dict[fields.InputDataFields.groundtruth_area])
def testDecodeObjectIsCrowd(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
object_is_crowd = [0, 1]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/is_crowd': self._Int64Feature(object_is_crowd),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[
fields.InputDataFields.groundtruth_is_crowd].get_shape().as_list()),
[None])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual([bool(item) for item in object_is_crowd],
tensor_dict[
fields.InputDataFields.groundtruth_is_crowd])
def testDecodeObjectDifficult(self):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
object_difficult = [0, 1]
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/object/difficult': self._Int64Feature(object_difficult),
})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((tensor_dict[
fields.InputDataFields.groundtruth_difficult].get_shape().as_list()),
[None])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual([bool(item) for item in object_difficult],
tensor_dict[
fields.InputDataFields.groundtruth_difficult])
def testDecodeInstanceSegmentation(self):
num_instances = 4
image_height = 5
image_width = 3
# Randomly generate image.
image_tensor = np.random.randint(255, size=(image_height,
image_width,
3)).astype(np.uint8)
encoded_jpeg = self._EncodeImage(image_tensor)
# Randomly generate instance segmentation masks.
instance_segmentation = (
np.random.randint(2, size=(num_instances,
image_height,
image_width)).astype(np.int64))
# Randomly generate class labels for each instance.
instance_segmentation_classes = np.random.randint(
100, size=(num_instances)).astype(np.int64)
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': self._BytesFeature(encoded_jpeg),
'image/format': self._BytesFeature('jpeg'),
'image/height': self._Int64Feature([image_height]),
'image/width': self._Int64Feature([image_width]),
'image/segmentation/object': self._Int64Feature(
instance_segmentation.flatten()),
'image/segmentation/object/class': self._Int64Feature(
instance_segmentation_classes)})).SerializeToString()
example_decoder = tf_example_decoder.TfExampleDecoder()
tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
self.assertAllEqual((
tensor_dict[fields.InputDataFields.groundtruth_instance_masks].
get_shape().as_list()), [None, None, None])
self.assertAllEqual((
tensor_dict[fields.InputDataFields.groundtruth_instance_classes].
get_shape().as_list()), [None])
with self.test_session() as sess:
tensor_dict = sess.run(tensor_dict)
self.assertAllEqual(
instance_segmentation.astype(np.bool),
tensor_dict[fields.InputDataFields.groundtruth_instance_masks])
self.assertAllEqual(
instance_segmentation_classes,
tensor_dict[fields.InputDataFields.groundtruth_instance_classes])
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Evaluation executable for detection models.
This executable is used to evaluate DetectionModels. There are two ways of
configuring the eval job.
1) A single pipeline_pb2.TrainEvalPipelineConfig file maybe specified instead.
In this mode, the --eval_training_data flag may be given to force the pipeline
to evaluate on training data instead.
Example usage:
./eval \
--logtostderr \
--checkpoint_dir=path/to/checkpoint_dir \
--eval_dir=path/to/eval_dir \
--pipeline_config_path=pipeline_config.pbtxt
2) Three configuration files may be provided: a model_pb2.DetectionModel
configuration file to define what type of DetectionModel is being evaulated, an
input_reader_pb2.InputReader file to specify what data the model is evaluating
and an eval_pb2.EvalConfig file to configure evaluation parameters.
Example usage:
./eval \
--logtostderr \
--checkpoint_dir=path/to/checkpoint_dir \
--eval_dir=path/to/eval_dir \
--eval_config_path=eval_config.pbtxt \
--model_config_path=model_config.pbtxt \
--input_config_path=eval_input_config.pbtxt
"""
import functools
import tensorflow as tf
from google.protobuf import text_format
from object_detection import evaluator
from object_detection.builders import input_reader_builder
from object_detection.builders import model_builder
from object_detection.protos import eval_pb2
from object_detection.protos import input_reader_pb2
from object_detection.protos import model_pb2
from object_detection.protos import pipeline_pb2
from object_detection.utils import label_map_util
tf.logging.set_verbosity(tf.logging.INFO)
flags = tf.app.flags
flags.DEFINE_boolean('eval_training_data', False,
'If training data should be evaluated for this job.')
flags.DEFINE_string('checkpoint_dir', '',
'Directory containing checkpoints to evaluate, typically '
'set to `train_dir` used in the training job.')
flags.DEFINE_string('eval_dir', '',
'Directory to write eval summaries to.')
flags.DEFINE_string('pipeline_config_path', '',
'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
'file. If provided, other configs are ignored')
flags.DEFINE_string('eval_config_path', '',
'Path to an eval_pb2.EvalConfig config file.')
flags.DEFINE_string('input_config_path', '',
'Path to an input_reader_pb2.InputReader config file.')
flags.DEFINE_string('model_config_path', '',
'Path to a model_pb2.DetectionModel config file.')
FLAGS = flags.FLAGS
def get_configs_from_pipeline_file():
"""Reads evaluation configuration from a pipeline_pb2.TrainEvalPipelineConfig.
Reads evaluation config from file specified by pipeline_config_path flag.
Returns:
model_config: a model_pb2.DetectionModel
eval_config: a eval_pb2.EvalConfig
input_config: a input_reader_pb2.InputReader
"""
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
with tf.gfile.GFile(FLAGS.pipeline_config_path, 'r') as f:
text_format.Merge(f.read(), pipeline_config)
model_config = pipeline_config.model
if FLAGS.eval_training_data:
eval_config = pipeline_config.train_config
else:
eval_config = pipeline_config.eval_config
input_config = pipeline_config.eval_input_reader
return model_config, eval_config, input_config
def get_configs_from_multiple_files():
"""Reads evaluation configuration from multiple config files.
Reads the evaluation config from the following files:
model_config: Read from --model_config_path
eval_config: Read from --eval_config_path
input_config: Read from --input_config_path
Returns:
model_config: a model_pb2.DetectionModel
eval_config: a eval_pb2.EvalConfig
input_config: a input_reader_pb2.InputReader
"""
eval_config = eval_pb2.EvalConfig()
with tf.gfile.GFile(FLAGS.eval_config_path, 'r') as f:
text_format.Merge(f.read(), eval_config)
model_config = model_pb2.DetectionModel()
with tf.gfile.GFile(FLAGS.model_config_path, 'r') as f:
text_format.Merge(f.read(), model_config)
input_config = input_reader_pb2.InputReader()
with tf.gfile.GFile(FLAGS.input_config_path, 'r') as f:
text_format.Merge(f.read(), input_config)
return model_config, eval_config, input_config
def main(unused_argv):
assert FLAGS.checkpoint_dir, '`checkpoint_dir` is missing.'
assert FLAGS.eval_dir, '`eval_dir` is missing.'
if FLAGS.pipeline_config_path:
model_config, eval_config, input_config = get_configs_from_pipeline_file()
else:
model_config, eval_config, input_config = get_configs_from_multiple_files()
model_fn = functools.partial(
model_builder.build,
model_config=model_config,
is_training=False)
create_input_dict_fn = functools.partial(
input_reader_builder.build,
input_config)
label_map = label_map_util.load_labelmap(input_config.label_map_path)
max_num_classes = max([item.id for item in label_map.item])
categories = label_map_util.convert_label_map_to_categories(
label_map, max_num_classes)
evaluator.evaluate(create_input_dict_fn, model_fn, eval_config, categories,
FLAGS.checkpoint_dir, FLAGS.eval_dir)
if __name__ == '__main__':
tf.app.run()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Common functions for repeatedly evaluating a checkpoint.
"""
import copy
import logging
import os
import time
import numpy as np
import tensorflow as tf
from object_detection.utils import label_map_util
from object_detection.utils import object_detection_evaluation
from object_detection.utils import visualization_utils as vis_utils
slim = tf.contrib.slim
def write_metrics(metrics, global_step, summary_dir):
"""Write metrics to a summary directory.
Args:
metrics: A dictionary containing metric names and values.
global_step: Global step at which the metrics are computed.
summary_dir: Directory to write tensorflow summaries to.
"""
logging.info('Writing metrics to tf summary.')
summary_writer = tf.summary.FileWriter(summary_dir)
for key in sorted(metrics):
summary = tf.Summary(value=[
tf.Summary.Value(tag=key, simple_value=metrics[key]),
])
summary_writer.add_summary(summary, global_step)
logging.info('%s: %f', key, metrics[key])
summary_writer.close()
logging.info('Metrics written to tf summary.')
def evaluate_detection_results_pascal_voc(result_lists,
categories,
label_id_offset=0,
iou_thres=0.5,
corloc_summary=False):
"""Computes Pascal VOC detection metrics given groundtruth and detections.
This function computes Pascal VOC metrics. This function by default
takes detections and groundtruth boxes encoded in result_lists and writes
evaluation results to tf summaries which can be viewed on tensorboard.
Args:
result_lists: a dictionary holding lists of groundtruth and detection
data corresponding to each image being evaluated. The following keys
are required:
'image_id': a list of string ids
'detection_boxes': a list of float32 numpy arrays of shape [N, 4]
'detection_scores': a list of float32 numpy arrays of shape [N]
'detection_classes': a list of int32 numpy arrays of shape [N]
'groundtruth_boxes': a list of float32 numpy arrays of shape [M, 4]
'groundtruth_classes': a list of int32 numpy arrays of shape [M]
and the remaining fields below are optional:
'difficult': a list of boolean arrays of shape [M] indicating the
difficulty of groundtruth boxes. Some datasets like PASCAL VOC provide
this information and it is used to remove difficult examples from eval
in order to not penalize the models on them.
Note that it is okay to have additional fields in result_lists --- they
are simply ignored.
categories: a list of dictionaries representing all possible categories.
Each dict in this list has the following keys:
'id': (required) an integer id uniquely identifying this category
'name': (required) string representing category name
e.g., 'cat', 'dog', 'pizza'
label_id_offset: an integer offset for the label space.
iou_thres: float determining the IoU threshold at which a box is considered
correct. Defaults to the standard 0.5.
corloc_summary: boolean. If True, also outputs CorLoc metrics.
Returns:
A dictionary of metric names to scalar values.
Raises:
ValueError: if the set of keys in result_lists is not a superset of the
expected list of keys. Unexpected keys are ignored.
ValueError: if the lists in result_lists have inconsistent sizes.
"""
# check for expected keys in result_lists
expected_keys = [
'detection_boxes', 'detection_scores', 'detection_classes', 'image_id'
]
expected_keys += ['groundtruth_boxes', 'groundtruth_classes']
if not set(expected_keys).issubset(set(result_lists.keys())):
raise ValueError('result_lists does not have expected key set.')
num_results = len(result_lists[expected_keys[0]])
for key in expected_keys:
if len(result_lists[key]) != num_results:
raise ValueError('Inconsistent list sizes in result_lists')
# Pascal VOC evaluator assumes foreground index starts from zero.
categories = copy.deepcopy(categories)
for idx in range(len(categories)):
categories[idx]['id'] -= label_id_offset
# num_classes (maybe encoded as categories)
num_classes = max([cat['id'] for cat in categories]) + 1
logging.info('Computing Pascal VOC metrics on results.')
if all(image_id.isdigit() for image_id in result_lists['image_id']):
image_ids = [int(image_id) for image_id in result_lists['image_id']]
else:
image_ids = range(num_results)
evaluator = object_detection_evaluation.ObjectDetectionEvaluation(
num_classes, matching_iou_threshold=iou_thres)
difficult_lists = None
if 'difficult' in result_lists and result_lists['difficult']:
difficult_lists = result_lists['difficult']
for idx, image_id in enumerate(image_ids):
difficult = None
if difficult_lists is not None and difficult_lists[idx].size:
difficult = difficult_lists[idx].astype(np.bool)
evaluator.add_single_ground_truth_image_info(
image_id, result_lists['groundtruth_boxes'][idx],
result_lists['groundtruth_classes'][idx] - label_id_offset,
difficult)
evaluator.add_single_detected_image_info(
image_id, result_lists['detection_boxes'][idx],
result_lists['detection_scores'][idx],
result_lists['detection_classes'][idx] - label_id_offset)
per_class_ap, mean_ap, _, _, per_class_corloc, mean_corloc = (
evaluator.evaluate())
metrics = {'Precision/mAP@{}IOU'.format(iou_thres): mean_ap}
category_index = label_map_util.create_category_index(categories)
for idx in range(per_class_ap.size):
if idx in category_index:
display_name = ('PerformanceByCategory/mAP@{}IOU/{}'
.format(iou_thres, category_index[idx]['name']))
metrics[display_name] = per_class_ap[idx]
if corloc_summary:
metrics['CorLoc/CorLoc@{}IOU'.format(iou_thres)] = mean_corloc
for idx in range(per_class_corloc.size):
if idx in category_index:
display_name = (
'PerformanceByCategory/CorLoc@{}IOU/{}'.format(
iou_thres, category_index[idx]['name']))
metrics[display_name] = per_class_corloc[idx]
return metrics
# TODO: Add tests.
def visualize_detection_results(result_dict,
tag,
global_step,
categories,
summary_dir='',
export_dir='',
agnostic_mode=False,
show_groundtruth=False,
min_score_thresh=.5,
max_num_predictions=20):
"""Visualizes detection results and writes visualizations to image summaries.
This function visualizes an image with its detected bounding boxes and writes
to image summaries which can be viewed on tensorboard. It optionally also
writes images to a directory. In the case of missing entry in the label map,
unknown class name in the visualization is shown as "N/A".
Args:
result_dict: a dictionary holding groundtruth and detection
data corresponding to each image being evaluated. The following keys
are required:
'original_image': a numpy array representing the image with shape
[1, height, width, 3]
'detection_boxes': a numpy array of shape [N, 4]
'detection_scores': a numpy array of shape [N]
'detection_classes': a numpy array of shape [N]
The following keys are optional:
'groundtruth_boxes': a numpy array of shape [N, 4]
'groundtruth_keypoints': a numpy array of shape [N, num_keypoints, 2]
Detections are assumed to be provided in decreasing order of score and for
display, and we assume that scores are probabilities between 0 and 1.
tag: tensorboard tag (string) to associate with image.
global_step: global step at which the visualization are generated.
categories: a list of dictionaries representing all possible categories.
Each dict in this list has the following keys:
'id': (required) an integer id uniquely identifying this category
'name': (required) string representing category name
e.g., 'cat', 'dog', 'pizza'
'supercategory': (optional) string representing the supercategory
e.g., 'animal', 'vehicle', 'food', etc
summary_dir: the output directory to which the image summaries are written.
export_dir: the output directory to which images are written. If this is
empty (default), then images are not exported.
agnostic_mode: boolean (default: False) controlling whether to evaluate in
class-agnostic mode or not.
show_groundtruth: boolean (default: False) controlling whether to show
groundtruth boxes in addition to detected boxes
min_score_thresh: minimum score threshold for a box to be visualized
max_num_predictions: maximum number of detections to visualize
Raises:
ValueError: if result_dict does not contain the expected keys (i.e.,
'original_image', 'detection_boxes', 'detection_scores',
'detection_classes')
"""
if not set([
'original_image', 'detection_boxes', 'detection_scores',
'detection_classes'
]).issubset(set(result_dict.keys())):
raise ValueError('result_dict does not contain all expected keys.')
if show_groundtruth and 'groundtruth_boxes' not in result_dict:
raise ValueError('If show_groundtruth is enabled, result_dict must contain '
'groundtruth_boxes.')
logging.info('Creating detection visualizations.')
category_index = label_map_util.create_category_index(categories)
image = np.squeeze(result_dict['original_image'], axis=0)
detection_boxes = result_dict['detection_boxes']
detection_scores = result_dict['detection_scores']
detection_classes = np.int32((result_dict['detection_classes']))
detection_keypoints = result_dict.get('detection_keypoints', None)
detection_masks = result_dict.get('detection_masks', None)
# Plot groundtruth underneath detections
if show_groundtruth:
groundtruth_boxes = result_dict['groundtruth_boxes']
groundtruth_keypoints = result_dict.get('groundtruth_keypoints', None)
vis_utils.visualize_boxes_and_labels_on_image_array(
image,
groundtruth_boxes,
None,
None,
category_index,
keypoints=groundtruth_keypoints,
use_normalized_coordinates=False,
max_boxes_to_draw=None)
vis_utils.visualize_boxes_and_labels_on_image_array(
image,
detection_boxes,
detection_classes,
detection_scores,
category_index,
instance_masks=detection_masks,
keypoints=detection_keypoints,
use_normalized_coordinates=False,
max_boxes_to_draw=max_num_predictions,
min_score_thresh=min_score_thresh,
agnostic_mode=agnostic_mode)
if export_dir:
export_path = os.path.join(export_dir, 'export-{}.png'.format(tag))
vis_utils.save_image_array_as_png(image, export_path)
summary = tf.Summary(value=[
tf.Summary.Value(tag=tag, image=tf.Summary.Image(
encoded_image_string=vis_utils.encode_image_array_as_png_str(
image)))
])
summary_writer = tf.summary.FileWriter(summary_dir)
summary_writer.add_summary(summary, global_step)
summary_writer.close()
logging.info('Detection visualizations written to summary with tag %s.', tag)
# TODO: Add tests.
# TODO: Have an argument called `aggregated_processor_tensor_keys` that contains
# a whitelist of tensors used by the `aggregated_result_processor` instead of a
# blacklist. This will prevent us from inadvertently adding any evaluated
# tensors into the `results_list` data structure that are not needed by
# `aggregated_result_preprocessor`.
def run_checkpoint_once(tensor_dict,
update_op,
summary_dir,
aggregated_result_processor=None,
batch_processor=None,
checkpoint_dirs=None,
variables_to_restore=None,
restore_fn=None,
num_batches=1,
master='',
save_graph=False,
save_graph_dir='',
metric_names_to_values=None,
keys_to_exclude_from_results=()):
"""Evaluates both python metrics and tensorflow slim metrics.
Python metrics are processed in batch by the aggregated_result_processor,
while tensorflow slim metrics statistics are computed by running
metric_names_to_updates tensors and aggregated using metric_names_to_values
tensor.
Args:
tensor_dict: a dictionary holding tensors representing a batch of detections
and corresponding groundtruth annotations.
update_op: a tensorflow update op that will run for each batch along with
the tensors in tensor_dict..
summary_dir: a directory to write metrics summaries.
aggregated_result_processor: a function taking one arguments:
1. result_lists: a dictionary with keys matching those in tensor_dict
and corresponding values being the list of results for each tensor
in tensor_dict. The length of each such list is num_batches.
batch_processor: a function taking four arguments:
1. tensor_dict: the same tensor_dict that is passed in as the first
argument to this function.
2. sess: a tensorflow session
3. batch_index: an integer representing the index of the batch amongst
all batches
4. update_op: a tensorflow update op that will run for each batch.
and returns result_dict, a dictionary of results for that batch.
By default, batch_processor is None, which defaults to running:
return sess.run(tensor_dict)
To skip an image, it suffices to return an empty dictionary in place of
result_dict.
checkpoint_dirs: list of directories to load into an EnsembleModel. If it
has only one directory, EnsembleModel will not be used -- a DetectionModel
will be instantiated directly. Not used if restore_fn is set.
variables_to_restore: None, or a dictionary mapping variable names found in
a checkpoint to model variables. The dictionary would normally be
generated by creating a tf.train.ExponentialMovingAverage object and
calling its variables_to_restore() method. Not used if restore_fn is set.
restore_fn: None, or a function that takes a tf.Session object and correctly
restores all necessary variables from the correct checkpoint file. If
None, attempts to restore from the first directory in checkpoint_dirs.
num_batches: the number of batches to use for evaluation.
master: the location of the Tensorflow session.
save_graph: whether or not the Tensorflow graph is stored as a pbtxt file.
save_graph_dir: where to store the Tensorflow graph on disk. If save_graph
is True this must be non-empty.
metric_names_to_values: A dictionary containing metric names to tensors
which will be evaluated after processing all batches
of [tensor_dict, update_op]. If any metrics depend on statistics computed
during each batch ensure that `update_op` tensor has a control dependency
on the update ops that compute the statistics.
keys_to_exclude_from_results: keys in tensor_dict that will be excluded
from results_list. Note that the tensors corresponding to these keys will
still be evaluated for each batch, but won't be added to results_list.
Raises:
ValueError: if restore_fn is None and checkpoint_dirs doesn't have at least
one element.
ValueError: if save_graph is True and save_graph_dir is not defined.
"""
if save_graph and not save_graph_dir:
raise ValueError('`save_graph_dir` must be defined.')
sess = tf.Session(master, graph=tf.get_default_graph())
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
if restore_fn:
restore_fn(sess)
else:
if not checkpoint_dirs:
raise ValueError('`checkpoint_dirs` must have at least one entry.')
checkpoint_file = tf.train.latest_checkpoint(checkpoint_dirs[0])
saver = tf.train.Saver(variables_to_restore)
saver.restore(sess, checkpoint_file)
if save_graph:
tf.train.write_graph(sess.graph_def, save_graph_dir, 'eval.pbtxt')
valid_keys = list(set(tensor_dict.keys()) - set(keys_to_exclude_from_results))
result_lists = {key: [] for key in valid_keys}
counters = {'skipped': 0, 'success': 0}
other_metrics = None
with tf.contrib.slim.queues.QueueRunners(sess):
try:
for batch in range(int(num_batches)):
if (batch + 1) % 100 == 0:
logging.info('Running eval ops batch %d/%d', batch + 1, num_batches)
if not batch_processor:
try:
(result_dict, _) = sess.run([tensor_dict, update_op])
counters['success'] += 1
except tf.errors.InvalidArgumentError:
logging.info('Skipping image')
counters['skipped'] += 1
result_dict = {}
else:
result_dict = batch_processor(
tensor_dict, sess, batch, counters, update_op)
for key in result_dict:
if key in valid_keys:
result_lists[key].append(result_dict[key])
if metric_names_to_values is not None:
other_metrics = sess.run(metric_names_to_values)
logging.info('Running eval batches done.')
except tf.errors.OutOfRangeError:
logging.info('Done evaluating -- epoch limit reached')
finally:
# When done, ask the threads to stop.
metrics = aggregated_result_processor(result_lists)
if other_metrics is not None:
metrics.update(other_metrics)
global_step = tf.train.global_step(sess, slim.get_global_step())
write_metrics(metrics, global_step, summary_dir)
logging.info('# success: %d', counters['success'])
logging.info('# skipped: %d', counters['skipped'])
sess.close()
# TODO: Add tests.
def repeated_checkpoint_run(tensor_dict,
update_op,
summary_dir,
aggregated_result_processor=None,
batch_processor=None,
checkpoint_dirs=None,
variables_to_restore=None,
restore_fn=None,
num_batches=1,
eval_interval_secs=120,
max_number_of_evaluations=None,
master='',
save_graph=False,
save_graph_dir='',
metric_names_to_values=None,
keys_to_exclude_from_results=()):
"""Periodically evaluates desired tensors using checkpoint_dirs or restore_fn.
This function repeatedly loads a checkpoint and evaluates a desired
set of tensors (provided by tensor_dict) and hands the resulting numpy
arrays to a function result_processor which can be used to further
process/save/visualize the results.
Args:
tensor_dict: a dictionary holding tensors representing a batch of detections
and corresponding groundtruth annotations.
update_op: a tensorflow update op that will run for each batch along with
the tensors in tensor_dict.
summary_dir: a directory to write metrics summaries.
aggregated_result_processor: a function taking one argument:
1. result_lists: a dictionary with keys matching those in tensor_dict
and corresponding values being the list of results for each tensor
in tensor_dict. The length of each such list is num_batches.
batch_processor: a function taking three arguments:
1. tensor_dict: the same tensor_dict that is passed in as the first
argument to this function.
2. sess: a tensorflow session
3. batch_index: an integer representing the index of the batch amongst
all batches
4. update_op: a tensorflow update op that will run for each batch.
and returns result_dict, a dictionary of results for that batch.
By default, batch_processor is None, which defaults to running:
return sess.run(tensor_dict)
checkpoint_dirs: list of directories to load into a DetectionModel or an
EnsembleModel if restore_fn isn't set. Also used to determine when to run
next evaluation. Must have at least one element.
variables_to_restore: None, or a dictionary mapping variable names found in
a checkpoint to model variables. The dictionary would normally be
generated by creating a tf.train.ExponentialMovingAverage object and
calling its variables_to_restore() method. Not used if restore_fn is set.
restore_fn: a function that takes a tf.Session object and correctly restores
all necessary variables from the correct checkpoint file.
num_batches: the number of batches to use for evaluation.
eval_interval_secs: the number of seconds between each evaluation run.
max_number_of_evaluations: the max number of iterations of the evaluation.
If the value is left as None the evaluation continues indefinitely.
master: the location of the Tensorflow session.
save_graph: whether or not the Tensorflow graph is saved as a pbtxt file.
save_graph_dir: where to save on disk the Tensorflow graph. If store_graph
is True this must be non-empty.
metric_names_to_values: A dictionary containing metric names to tensors
which will be evaluated after processing all batches
of [tensor_dict, update_op]. If any metrics depend on statistics computed
during each batch ensure that `update_op` tensor has a control dependency
on the update ops that compute the statistics.
keys_to_exclude_from_results: keys in tensor_dict that will be excluded
from results_list. Note that the tensors corresponding to these keys will
still be evaluated for each batch, but won't be added to results_list.
Raises:
ValueError: if max_num_of_evaluations is not None or a positive number.
ValueError: if checkpoint_dirs doesn't have at least one element.
"""
if max_number_of_evaluations and max_number_of_evaluations <= 0:
raise ValueError(
'`number_of_steps` must be either None or a positive number.')
if not checkpoint_dirs:
raise ValueError('`checkpoint_dirs` must have at least one entry.')
last_evaluated_model_path = None
number_of_evaluations = 0
while True:
start = time.time()
logging.info('Starting evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S',
time.gmtime()))
model_path = tf.train.latest_checkpoint(checkpoint_dirs[0])
if not model_path:
logging.info('No model found in %s. Will try again in %d seconds',
checkpoint_dirs[0], eval_interval_secs)
elif model_path == last_evaluated_model_path:
logging.info('Found already evaluated checkpoint. Will try again in %d '
'seconds', eval_interval_secs)
else:
last_evaluated_model_path = model_path
run_checkpoint_once(tensor_dict, update_op, summary_dir,
aggregated_result_processor,
batch_processor, checkpoint_dirs,
variables_to_restore, restore_fn, num_batches, master,
save_graph, save_graph_dir, metric_names_to_values,
keys_to_exclude_from_results)
number_of_evaluations += 1
if (max_number_of_evaluations and
number_of_evaluations >= max_number_of_evaluations):
logging.info('Finished evaluation!')
break
time_to_next_eval = start + eval_interval_secs - time.time()
if time_to_next_eval > 0:
time.sleep(time_to_next_eval)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Detection model evaluator.
This file provides a generic evaluation method that can be used to evaluate a
DetectionModel.
"""
import logging
import tensorflow as tf
from object_detection import eval_util
from object_detection.core import box_list
from object_detection.core import box_list_ops
from object_detection.core import prefetcher
from object_detection.core import standard_fields as fields
from object_detection.utils import ops
slim = tf.contrib.slim
EVAL_METRICS_FN_DICT = {
'pascal_voc_metrics': eval_util.evaluate_detection_results_pascal_voc
}
def _extract_prediction_tensors(model,
create_input_dict_fn,
ignore_groundtruth=False):
"""Restores the model in a tensorflow session.
Args:
model: model to perform predictions with.
create_input_dict_fn: function to create input tensor dictionaries.
ignore_groundtruth: whether groundtruth should be ignored.
Returns:
tensor_dict: A tensor dictionary with evaluations.
"""
input_dict = create_input_dict_fn()
prefetch_queue = prefetcher.prefetch(input_dict, capacity=500)
input_dict = prefetch_queue.dequeue()
original_image = tf.expand_dims(input_dict[fields.InputDataFields.image], 0)
preprocessed_image = model.preprocess(tf.to_float(original_image))
prediction_dict = model.predict(preprocessed_image)
detections = model.postprocess(prediction_dict)
original_image_shape = tf.shape(original_image)
absolute_detection_boxlist = box_list_ops.to_absolute_coordinates(
box_list.BoxList(tf.squeeze(detections['detection_boxes'], axis=0)),
original_image_shape[1], original_image_shape[2])
label_id_offset = 1
tensor_dict = {
'original_image': original_image,
'image_id': input_dict[fields.InputDataFields.source_id],
'detection_boxes': absolute_detection_boxlist.get(),
'detection_scores': tf.squeeze(detections['detection_scores'], axis=0),
'detection_classes': (
tf.squeeze(detections['detection_classes'], axis=0) +
label_id_offset),
}
if 'detection_masks' in detections:
detection_masks = tf.squeeze(detections['detection_masks'],
axis=0)
detection_boxes = tf.squeeze(detections['detection_boxes'],
axis=0)
# TODO: This should be done in model's postprocess function ideally.
detection_masks_reframed = ops.reframe_box_masks_to_image_masks(
detection_masks,
detection_boxes,
original_image_shape[1], original_image_shape[2])
detection_masks_reframed = tf.to_float(tf.greater(detection_masks_reframed,
0.5))
tensor_dict['detection_masks'] = detection_masks_reframed
# load groundtruth fields into tensor_dict
if not ignore_groundtruth:
normalized_gt_boxlist = box_list.BoxList(
input_dict[fields.InputDataFields.groundtruth_boxes])
gt_boxlist = box_list_ops.scale(normalized_gt_boxlist,
tf.shape(original_image)[1],
tf.shape(original_image)[2])
groundtruth_boxes = gt_boxlist.get()
groundtruth_classes = input_dict[fields.InputDataFields.groundtruth_classes]
tensor_dict['groundtruth_boxes'] = groundtruth_boxes
tensor_dict['groundtruth_classes'] = groundtruth_classes
tensor_dict['area'] = input_dict[fields.InputDataFields.groundtruth_area]
tensor_dict['is_crowd'] = input_dict[
fields.InputDataFields.groundtruth_is_crowd]
tensor_dict['difficult'] = input_dict[
fields.InputDataFields.groundtruth_difficult]
if 'detection_masks' in tensor_dict:
tensor_dict['groundtruth_instance_masks'] = input_dict[
fields.InputDataFields.groundtruth_instance_masks]
return tensor_dict
def evaluate(create_input_dict_fn, create_model_fn, eval_config, categories,
checkpoint_dir, eval_dir):
"""Evaluation function for detection models.
Args:
create_input_dict_fn: a function to create a tensor input dictionary.
create_model_fn: a function that creates a DetectionModel.
eval_config: a eval_pb2.EvalConfig protobuf.
categories: a list of category dictionaries. Each dict in the list should
have an integer 'id' field and string 'name' field.
checkpoint_dir: directory to load the checkpoints to evaluate from.
eval_dir: directory to write evaluation metrics summary to.
"""
model = create_model_fn()
if eval_config.ignore_groundtruth and not eval_config.export_path:
logging.fatal('If ignore_groundtruth=True then an export_path is '
'required. Aborting!!!')
tensor_dict = _extract_prediction_tensors(
model=model,
create_input_dict_fn=create_input_dict_fn,
ignore_groundtruth=eval_config.ignore_groundtruth)
def _process_batch(tensor_dict, sess, batch_index, counters, update_op):
"""Evaluates tensors in tensor_dict, visualizing the first K examples.
This function calls sess.run on tensor_dict, evaluating the original_image
tensor only on the first K examples and visualizing detections overlaid
on this original_image.
Args:
tensor_dict: a dictionary of tensors
sess: tensorflow session
batch_index: the index of the batch amongst all batches in the run.
counters: a dictionary holding 'success' and 'skipped' fields which can
be updated to keep track of number of successful and failed runs,
respectively. If these fields are not updated, then the success/skipped
counter values shown at the end of evaluation will be incorrect.
update_op: An update op that has to be run along with output tensors. For
example this could be an op to compute statistics for slim metrics.
Returns:
result_dict: a dictionary of numpy arrays
"""
if batch_index >= eval_config.num_visualizations:
if 'original_image' in tensor_dict:
tensor_dict = {k: v for (k, v) in tensor_dict.items()
if k != 'original_image'}
try:
(result_dict, _) = sess.run([tensor_dict, update_op])
counters['success'] += 1
except tf.errors.InvalidArgumentError:
logging.info('Skipping image')
counters['skipped'] += 1
return {}
global_step = tf.train.global_step(sess, slim.get_global_step())
if batch_index < eval_config.num_visualizations:
tag = 'image-{}'.format(batch_index)
eval_util.visualize_detection_results(
result_dict, tag, global_step, categories=categories,
summary_dir=eval_dir,
export_dir=eval_config.visualization_export_dir,
show_groundtruth=eval_config.visualization_export_dir)
return result_dict
def _process_aggregated_results(result_lists):
eval_metric_fn_key = eval_config.metrics_set
if eval_metric_fn_key not in EVAL_METRICS_FN_DICT:
raise ValueError('Metric not found: {}'.format(eval_metric_fn_key))
return EVAL_METRICS_FN_DICT[eval_metric_fn_key](result_lists,
categories=categories)
variables_to_restore = tf.global_variables()
global_step = slim.get_or_create_global_step()
variables_to_restore.append(global_step)
if eval_config.use_moving_averages:
variable_averages = tf.train.ExponentialMovingAverage(0.0)
variables_to_restore = variable_averages.variables_to_restore()
saver = tf.train.Saver(variables_to_restore)
def _restore_latest_checkpoint(sess):
latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
saver.restore(sess, latest_checkpoint)
eval_util.repeated_checkpoint_run(
tensor_dict=tensor_dict,
update_op=tf.no_op(),
summary_dir=eval_dir,
aggregated_result_processor=_process_aggregated_results,
batch_processor=_process_batch,
checkpoint_dirs=[checkpoint_dir],
variables_to_restore=None,
restore_fn=_restore_latest_checkpoint,
num_batches=eval_config.num_examples,
eval_interval_secs=eval_config.eval_interval_secs,
max_number_of_evaluations=(
1 if eval_config.ignore_groundtruth else
eval_config.max_evals if eval_config.max_evals else
None),
master=eval_config.eval_master,
save_graph=eval_config.save_graph,
save_graph_dir=(eval_dir if eval_config.save_graph else ''))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment