Commit 88253ce5 authored by Hongkun Yu's avatar Hongkun Yu Committed by A. Unique TensorFlower
Browse files

Internal change

PiperOrigin-RevId: 326286926
parent 52371ffe
......@@ -52,15 +52,15 @@ class RetinanetModel(base_model.Model):
# Predict function.
self._generate_detections_fn = postprocess_ops.MultilevelDetectionGenerator(
params.architecture.min_level,
params.architecture.max_level,
params.architecture.min_level, params.architecture.max_level,
params.postprocess)
self._transpose_input = params.train.transpose_input
assert not self._transpose_input, 'Transpose input is not supported.'
# Input layer.
self._input_layer = tf.keras.layers.Input(
shape=(None, None, params.retinanet_parser.num_channels), name='',
shape=(None, None, params.retinanet_parser.num_channels),
name='',
dtype=tf.bfloat16 if self._use_bfloat16 else tf.float32)
def build_outputs(self, inputs, mode):
......@@ -141,8 +141,8 @@ class RetinanetModel(base_model.Model):
raise ValueError('"%s" is missing in outputs, requried %s found %s',
field, required_label_fields, labels.keys())
boxes, scores, classes, valid_detections = self._generate_detections_fn(
outputs['box_outputs'], outputs['cls_outputs'],
labels['anchor_boxes'], labels['image_info'][:, 1:2, :])
outputs['box_outputs'], outputs['cls_outputs'], labels['anchor_boxes'],
labels['image_info'][:, 1:2, :])
# Discards the old output tensors to save memory. The `cls_outputs` and
# `box_outputs` are pretty big and could potentiall lead to memory issue.
outputs = {
......
......@@ -61,13 +61,11 @@ class ShapeMaskModel(base_model.Model):
params.shapemask_loss.shape_prior_loss_weight)
self._coarse_mask_loss_weight = (
params.shapemask_loss.coarse_mask_loss_weight)
self._fine_mask_loss_weight = (
params.shapemask_loss.fine_mask_loss_weight)
self._fine_mask_loss_weight = (params.shapemask_loss.fine_mask_loss_weight)
# Predict function.
self._generate_detections_fn = postprocess_ops.MultilevelDetectionGenerator(
params.architecture.min_level,
params.architecture.max_level,
params.architecture.min_level, params.architecture.max_level,
params.postprocess)
def build_outputs(self, inputs, mode):
......@@ -79,10 +77,8 @@ class ShapeMaskModel(base_model.Model):
else:
anchor_boxes = anchor.Anchor(
self._params.architecture.min_level,
self._params.architecture.max_level,
self._params.anchor.num_scales,
self._params.anchor.aspect_ratios,
self._params.anchor.anchor_size,
self._params.architecture.max_level, self._params.anchor.num_scales,
self._params.anchor.aspect_ratios, self._params.anchor.anchor_size,
images.get_shape().as_list()[1:3]).multilevel_boxes
batch_size = tf.shape(images)[0]
......@@ -96,8 +92,7 @@ class ShapeMaskModel(base_model.Model):
fpn_features, is_training=is_training)
valid_boxes, valid_scores, valid_classes, valid_detections = (
self._generate_detections_fn(box_outputs, cls_outputs,
anchor_boxes,
self._generate_detections_fn(box_outputs, cls_outputs, anchor_boxes,
inputs['image_info'][:, 1:2, :]))
image_size = images.get_shape().as_list()[1:3]
......@@ -124,22 +119,18 @@ class ShapeMaskModel(base_model.Model):
return boxes, classes, outer_boxes
boxes, classes, outer_boxes = SampledBoxesLayer()(
inputs, valid_boxes, valid_classes,
valid_outer_boxes, training=is_training)
instance_features, prior_masks = self._shape_prior_head_fn(fpn_features,
boxes,
outer_boxes,
classes,
is_training)
coarse_mask_logits = self._coarse_mask_fn(instance_features,
prior_masks,
classes,
is_training)
fine_mask_logits = self._fine_mask_fn(instance_features,
coarse_mask_logits,
classes,
is_training)
inputs,
valid_boxes,
valid_classes,
valid_outer_boxes,
training=is_training)
instance_features, prior_masks = self._shape_prior_head_fn(
fpn_features, boxes, outer_boxes, classes, is_training)
coarse_mask_logits = self._coarse_mask_fn(instance_features, prior_masks,
classes, is_training)
fine_mask_logits = self._fine_mask_fn(instance_features, coarse_mask_logits,
classes, is_training)
model_outputs = {
'cls_outputs': cls_outputs,
......@@ -177,18 +168,15 @@ class ShapeMaskModel(base_model.Model):
labels['num_positives'])
# Adds Shapemask model losses.
shape_prior_loss = self._shapemask_prior_loss_fn(
outputs['prior_masks'],
labels['mask_targets'],
labels['mask_is_valid'])
coarse_mask_loss = self._shapemask_loss_fn(
outputs['coarse_mask_logits'],
labels['mask_targets'],
labels['mask_is_valid'])
fine_mask_loss = self._shapemask_loss_fn(
outputs['fine_mask_logits'],
labels['fine_mask_targets'],
labels['mask_is_valid'])
shape_prior_loss = self._shapemask_prior_loss_fn(outputs['prior_masks'],
labels['mask_targets'],
labels['mask_is_valid'])
coarse_mask_loss = self._shapemask_loss_fn(outputs['coarse_mask_logits'],
labels['mask_targets'],
labels['mask_is_valid'])
fine_mask_loss = self._shapemask_loss_fn(outputs['fine_mask_logits'],
labels['fine_mask_targets'],
labels['mask_is_valid'])
model_loss = (
cls_loss + self._box_loss_weight * box_loss +
......@@ -222,43 +210,46 @@ class ShapeMaskModel(base_model.Model):
if is_training:
batch_size = params.train.batch_size
input_layer = {
'image': tf.keras.layers.Input(
shape=input_shape,
batch_size=batch_size,
name='image',
dtype=tf.bfloat16 if self._use_bfloat16 else tf.float32),
'image_info': tf.keras.layers.Input(
shape=[4, 2],
batch_size=batch_size,
name='image_info'),
'mask_classes': tf.keras.layers.Input(
shape=[params.shapemask_parser.num_sampled_masks],
batch_size=batch_size,
name='mask_classes',
dtype=tf.int64),
'mask_outer_boxes': tf.keras.layers.Input(
shape=[params.shapemask_parser.num_sampled_masks, 4],
batch_size=batch_size,
name='mask_outer_boxes',
dtype=tf.float32),
'mask_boxes': tf.keras.layers.Input(
shape=[params.shapemask_parser.num_sampled_masks, 4],
batch_size=batch_size,
name='mask_boxes',
dtype=tf.float32),
'image':
tf.keras.layers.Input(
shape=input_shape,
batch_size=batch_size,
name='image',
dtype=tf.bfloat16 if self._use_bfloat16 else tf.float32),
'image_info':
tf.keras.layers.Input(
shape=[4, 2], batch_size=batch_size, name='image_info'),
'mask_classes':
tf.keras.layers.Input(
shape=[params.shapemask_parser.num_sampled_masks],
batch_size=batch_size,
name='mask_classes',
dtype=tf.int64),
'mask_outer_boxes':
tf.keras.layers.Input(
shape=[params.shapemask_parser.num_sampled_masks, 4],
batch_size=batch_size,
name='mask_outer_boxes',
dtype=tf.float32),
'mask_boxes':
tf.keras.layers.Input(
shape=[params.shapemask_parser.num_sampled_masks, 4],
batch_size=batch_size,
name='mask_boxes',
dtype=tf.float32),
}
else:
batch_size = params.eval.batch_size
input_layer = {
'image': tf.keras.layers.Input(
shape=input_shape,
batch_size=batch_size,
name='image',
dtype=tf.bfloat16 if self._use_bfloat16 else tf.float32),
'image_info': tf.keras.layers.Input(
shape=[4, 2],
batch_size=batch_size,
name='image_info'),
'image':
tf.keras.layers.Input(
shape=input_shape,
batch_size=batch_size,
name='image',
dtype=tf.bfloat16 if self._use_bfloat16 else tf.float32),
'image_info':
tf.keras.layers.Input(
shape=[4, 2], batch_size=batch_size, name='image_info'),
}
return input_layer
......@@ -277,9 +268,10 @@ class ShapeMaskModel(base_model.Model):
return self._keras_model
def post_processing(self, labels, outputs):
required_output_fields = ['num_detections', 'detection_boxes',
'detection_classes', 'detection_masks',
'detection_scores']
required_output_fields = [
'num_detections', 'detection_boxes', 'detection_classes',
'detection_masks', 'detection_scores'
]
for field in required_output_fields:
if field not in outputs:
......
......@@ -22,7 +22,6 @@ import tensorflow as tf
from official.vision.detection.utils import box_utils
NMS_TILE_SIZE = 512
......@@ -106,9 +105,7 @@ def _suppression_loop_body(boxes, iou_threshold, output_size, idx):
return boxes, iou_threshold, output_size, idx + 1
def sorted_non_max_suppression_padded(scores,
boxes,
max_output_size,
def sorted_non_max_suppression_padded(scores, boxes, max_output_size,
iou_threshold):
"""A wrapper that handles non-maximum suppression.
......@@ -177,19 +174,18 @@ def sorted_non_max_suppression_padded(scores,
idx < num_boxes // NMS_TILE_SIZE)
selected_boxes, _, output_size, _ = tf.while_loop(
_loop_cond, _suppression_loop_body, [
boxes, iou_threshold,
tf.zeros([batch_size], tf.int32),
tf.constant(0)
])
_loop_cond, _suppression_loop_body,
[boxes, iou_threshold,
tf.zeros([batch_size], tf.int32),
tf.constant(0)])
idx = num_boxes - tf.cast(
tf.nn.top_k(
tf.cast(tf.reduce_any(selected_boxes > 0, [2]), tf.int32) *
tf.expand_dims(tf.range(num_boxes, 0, -1), 0), max_output_size)[0],
tf.int32)
idx = tf.minimum(idx, num_boxes - 1)
idx = tf.reshape(
idx + tf.reshape(tf.range(batch_size) * num_boxes, [-1, 1]), [-1])
idx = tf.reshape(idx + tf.reshape(tf.range(batch_size) * num_boxes, [-1, 1]),
[-1])
boxes = tf.reshape(
tf.gather(tf.reshape(boxes, [-1, 4]), idx),
[batch_size, max_output_size, 4])
......
......@@ -19,6 +19,7 @@ from __future__ import division
from __future__ import print_function
import functools
import tensorflow as tf
from official.vision.detection.ops import nms
......@@ -202,15 +203,14 @@ def _generate_detections_per_image(boxes,
scores_i, k=tf.minimum(tf.shape(input=scores_i)[-1], pre_nms_num_boxes))
boxes_i = tf.gather(boxes_i, indices)
(nmsed_indices_i,
nmsed_num_valid_i) = tf.image.non_max_suppression_padded(
tf.cast(boxes_i, tf.float32),
tf.cast(scores_i, tf.float32),
max_total_size,
iou_threshold=nms_iou_threshold,
score_threshold=score_threshold,
pad_to_max_output_size=True,
name='nms_detections_' + str(i))
(nmsed_indices_i, nmsed_num_valid_i) = tf.image.non_max_suppression_padded(
tf.cast(boxes_i, tf.float32),
tf.cast(scores_i, tf.float32),
max_total_size,
iou_threshold=nms_iou_threshold,
score_threshold=score_threshold,
pad_to_max_output_size=True,
name='nms_detections_' + str(i))
nmsed_boxes_i = tf.gather(boxes_i, nmsed_indices_i)
nmsed_scores_i = tf.gather(scores_i, nmsed_indices_i)
# Sets scores of invalid boxes to -1.
......@@ -235,11 +235,8 @@ def _generate_detections_per_image(boxes,
return nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections
def _generate_detections_batched(boxes,
scores,
max_total_size,
nms_iou_threshold,
score_threshold):
def _generate_detections_batched(boxes, scores, max_total_size,
nms_iou_threshold, score_threshold):
"""Generates detected boxes with scores and classes for one-stage detector.
The function takes output of multi-level ConvNets and anchor boxes and
......@@ -247,19 +244,20 @@ def _generate_detections_batched(boxes,
supported on TPU currently.
Args:
boxes: a tensor with shape [batch_size, N, num_classes, 4] or
[batch_size, N, 1, 4], which box predictions on all feature levels. The N
is the number of total anchors on all levels.
scores: a tensor with shape [batch_size, N, num_classes], which
stacks class probability on all feature levels. The N is the number of
total anchors on all levels. The num_classes is the number of classes
predicted by the model. Note that the class_outputs here is the raw score.
boxes: a tensor with shape [batch_size, N, num_classes, 4] or [batch_size,
N, 1, 4], which box predictions on all feature levels. The N is the number
of total anchors on all levels.
scores: a tensor with shape [batch_size, N, num_classes], which stacks class
probability on all feature levels. The N is the number of total anchors on
all levels. The num_classes is the number of classes predicted by the
model. Note that the class_outputs here is the raw score.
max_total_size: a scalar representing maximum number of boxes retained over
all classes.
nms_iou_threshold: a float representing the threshold for deciding whether
boxes overlap too much with respect to IOU.
score_threshold: a float representing the threshold for deciding when to
remove boxes based on score.
Returns:
nms_boxes: `float` Tensor of shape [batch_size, max_total_size, 4]
representing top detected boxes in [y1, x1, y2, x2].
......@@ -285,7 +283,8 @@ def _generate_detections_batched(boxes,
max_total_size=max_total_size,
iou_threshold=nms_iou_threshold,
score_threshold=score_threshold,
pad_per_class=False,)
pad_per_class=False,
)
# De-normalizes box cooridinates.
nmsed_boxes *= normalizer
nmsed_classes = tf.cast(nmsed_classes, tf.int32)
......@@ -382,16 +381,13 @@ class GenericDetectionGenerator(object):
box_outputs = tf.reshape(
box_outputs,
tf.stack([batch_size, num_locations, num_classes, 4], axis=-1))
box_outputs = tf.slice(
box_outputs, [0, 0, 1, 0], [-1, -1, -1, -1])
box_outputs = tf.slice(box_outputs, [0, 0, 1, 0], [-1, -1, -1, -1])
anchor_boxes = tf.tile(
tf.expand_dims(anchor_boxes, axis=2), [1, 1, num_classes - 1, 1])
box_outputs = tf.reshape(
box_outputs,
tf.stack([batch_size, num_detections, 4], axis=-1))
box_outputs = tf.reshape(box_outputs,
tf.stack([batch_size, num_detections, 4], axis=-1))
anchor_boxes = tf.reshape(
anchor_boxes,
tf.stack([batch_size, num_detections, 4], axis=-1))
anchor_boxes, tf.stack([batch_size, num_detections, 4], axis=-1))
# Box decoding.
decoded_boxes = box_utils.decode_boxes(
......
......@@ -56,8 +56,8 @@ def multilevel_propose_rois(rpn_boxes,
rpn_scores: a dict with keys representing FPN levels and values representing
logit tensors of shape [batch_size, feature_h, feature_w, num_anchors].
anchor_boxes: a dict with keys representing FPN levels and values
representing anchor box tensors of shape
[batch_size, feature_h, feature_w, num_anchors * 4].
representing anchor box tensors of shape [batch_size, feature_h,
feature_w, num_anchors * 4].
image_shape: a tensor of shape [batch_size, 2] where the last dimension are
[height, width] of the scaled image.
rpn_pre_nms_top_k: an integer of top scoring RPN proposals *per level* to
......@@ -112,17 +112,14 @@ def multilevel_propose_rois(rpn_boxes,
this_level_scores = tf.sigmoid(this_level_scores)
if decode_boxes:
this_level_boxes = box_utils.decode_boxes(
this_level_boxes, this_level_anchors)
this_level_boxes = box_utils.decode_boxes(this_level_boxes,
this_level_anchors)
if clip_boxes:
this_level_boxes = box_utils.clip_boxes(
this_level_boxes, image_shape)
this_level_boxes = box_utils.clip_boxes(this_level_boxes, image_shape)
if rpn_min_size_threshold > 0.0:
this_level_boxes, this_level_scores = box_utils.filter_boxes(
this_level_boxes,
this_level_scores,
image_shape,
this_level_boxes, this_level_scores, image_shape,
rpn_min_size_threshold)
this_level_pre_nms_top_k = min(num_boxes, rpn_pre_nms_top_k)
......@@ -142,8 +139,9 @@ def multilevel_propose_rois(rpn_boxes,
else:
if rpn_score_threshold > 0.0:
this_level_boxes, this_level_scores = (
box_utils.filter_boxes_by_scores(
this_level_boxes, this_level_scores, rpn_score_threshold))
box_utils.filter_boxes_by_scores(this_level_boxes,
this_level_scores,
rpn_score_threshold))
this_level_boxes, this_level_scores = box_utils.top_k_boxes(
this_level_boxes, this_level_scores, k=this_level_pre_nms_top_k)
this_level_roi_scores, this_level_rois = (
......@@ -154,9 +152,7 @@ def multilevel_propose_rois(rpn_boxes,
iou_threshold=rpn_nms_threshold))
else:
this_level_rois, this_level_roi_scores = box_utils.top_k_boxes(
this_level_rois,
this_level_scores,
k=this_level_post_nms_top_k)
this_level_rois, this_level_scores, k=this_level_post_nms_top_k)
rois.append(this_level_rois)
roi_scores.append(this_level_roi_scores)
......@@ -199,8 +195,8 @@ class ROIGenerator(object):
scores: a dict with keys representing FPN levels and values representing
logit tensors of shape [batch_size, feature_h, feature_w, num_anchors].
anchor_boxes: a dict with keys representing FPN levels and values
representing anchor box tensors of shape
[batch_size, feature_h, feature_w, num_anchors * 4].
representing anchor box tensors of shape [batch_size, feature_h,
feature_w, num_anchors * 4].
image_shape: a tensor of shape [batch_size, 2] where the last dimension
are [height, width] of the scaled image.
is_training: a bool indicating whether it is in training or inference
......@@ -220,16 +216,16 @@ class ROIGenerator(object):
scores,
anchor_boxes,
image_shape,
rpn_pre_nms_top_k=(self._rpn_pre_nms_top_k if is_training
else self._test_rpn_pre_nms_top_k),
rpn_post_nms_top_k=(self._rpn_post_nms_top_k if is_training
else self._test_rpn_post_nms_top_k),
rpn_nms_threshold=(self._rpn_nms_threshold if is_training
else self._test_rpn_nms_threshold),
rpn_score_threshold=(self._rpn_score_threshold if is_training
else self._test_rpn_score_threshold),
rpn_min_size_threshold=(self._rpn_min_size_threshold if is_training
else self._test_rpn_min_size_threshold),
rpn_pre_nms_top_k=(self._rpn_pre_nms_top_k
if is_training else self._test_rpn_pre_nms_top_k),
rpn_post_nms_top_k=(self._rpn_post_nms_top_k
if is_training else self._test_rpn_post_nms_top_k),
rpn_nms_threshold=(self._rpn_nms_threshold
if is_training else self._test_rpn_nms_threshold),
rpn_score_threshold=(self._rpn_score_threshold if is_training else
self._test_rpn_score_threshold),
rpn_min_size_threshold=(self._rpn_min_size_threshold if is_training else
self._test_rpn_min_size_threshold),
decode_boxes=True,
clip_boxes=True,
use_batched_nms=self._use_batched_nms,
......
......@@ -20,7 +20,6 @@ from __future__ import print_function
import tensorflow as tf
_EPSILON = 1e-8
......@@ -30,6 +29,7 @@ def nearest_upsampling(data, scale):
Args:
data: A tensor with a shape of [batch, height_in, width_in, channels].
scale: An integer multiple to scale resolution of input data.
Returns:
data_up: A tensor with a shape of
[batch, height_in*scale, width_in*scale, channels]. Same dtype as input
......@@ -382,8 +382,7 @@ def multilevel_crop_and_resize(features, boxes, output_size=7):
areas_sqrt = tf.sqrt(box_height * box_width)
levels = tf.cast(
tf.math.floordiv(
tf.math.log(tf.divide(areas_sqrt, 224.0)), tf.math.log(2.0)) +
4.0,
tf.math.log(tf.divide(areas_sqrt, 224.0)), tf.math.log(2.0)) + 4.0,
dtype=tf.int32)
# Maps levels between [min_level, max_level].
levels = tf.minimum(max_level, tf.maximum(levels, min_level))
......@@ -395,9 +394,12 @@ def multilevel_crop_and_resize(features, boxes, output_size=7):
boxes /= tf.expand_dims(scale_to_level, axis=2)
box_width /= scale_to_level
box_height /= scale_to_level
boxes = tf.concat([boxes[:, :, 0:2],
tf.expand_dims(box_height, -1),
tf.expand_dims(box_width, -1)], axis=-1)
boxes = tf.concat([
boxes[:, :, 0:2],
tf.expand_dims(box_height, -1),
tf.expand_dims(box_width, -1)
],
axis=-1)
# Maps levels to [0, max_level-min_level].
levels -= min_level
......@@ -464,12 +466,12 @@ def single_level_feature_crop(features, level_boxes, detection_prior_levels,
Args:
features: a float tensor of shape [batch_size, num_levels,
max_feature_size, max_feature_size, num_downsample_channels].
level_boxes: a float Tensor of the level boxes to crop from.
[batch_size, num_instances, 4].
features: a float tensor of shape [batch_size, num_levels, max_feature_size,
max_feature_size, num_downsample_channels].
level_boxes: a float Tensor of the level boxes to crop from. [batch_size,
num_instances, 4].
detection_prior_levels: an int Tensor of instance assigned level of shape
[batch_size, num_instances].
[batch_size, num_instances].
min_mask_level: minimum FPN level to crop mask feature from.
mask_crop_size: an int of mask crop size.
......@@ -478,8 +480,8 @@ def single_level_feature_crop(features, level_boxes, detection_prior_levels,
mask_crop_size, mask_crop_size, num_downsample_channels]. This is the
instance feature crop.
"""
(batch_size, num_levels, max_feature_size,
_, num_downsample_channels) = features.get_shape().as_list()
(batch_size, num_levels, max_feature_size, _,
num_downsample_channels) = features.get_shape().as_list()
_, num_of_instances, _ = level_boxes.get_shape().as_list()
level_boxes = tf.cast(level_boxes, tf.int32)
assert num_of_instances == detection_prior_levels.get_shape().as_list()[1]
......@@ -503,32 +505,25 @@ def single_level_feature_crop(features, level_boxes, detection_prior_levels,
indices = tf.reshape(
tf.tile(
tf.reshape(
tf.range(batch_size) * batch_dim_size,
[batch_size, 1, 1, 1]),
[1, num_of_instances,
mask_crop_size, mask_crop_size]) +
tf.tile(
tf.reshape(levels * level_dim_size,
[batch_size, num_of_instances, 1, 1]),
[1, 1, mask_crop_size, mask_crop_size]) +
tf.tile(
tf.reshape(y_indices * height_dim_size,
[batch_size, num_of_instances,
mask_crop_size, 1]),
[1, 1, 1, mask_crop_size]) +
tf.range(batch_size) * batch_dim_size, [batch_size, 1, 1, 1]),
[1, num_of_instances, mask_crop_size, mask_crop_size]) + tf.tile(
tf.reshape(levels * level_dim_size,
[batch_size, num_of_instances, 1, 1]),
[1, 1, mask_crop_size, mask_crop_size]) + tf.tile(
tf.reshape(y_indices * height_dim_size,
[batch_size, num_of_instances, mask_crop_size, 1]),
[1, 1, 1, mask_crop_size]) +
tf.tile(
tf.reshape(x_indices,
[batch_size, num_of_instances,
1, mask_crop_size]),
[batch_size, num_of_instances, 1, mask_crop_size]),
[1, 1, mask_crop_size, 1]), [-1])
features_r2 = tf.reshape(features,
[-1, num_downsample_channels])
features_r2 = tf.reshape(features, [-1, num_downsample_channels])
crop_features = tf.reshape(
tf.gather(features_r2, indices),
[batch_size * num_of_instances,
mask_crop_size, mask_crop_size,
num_downsample_channels])
tf.gather(features_r2, indices), [
batch_size * num_of_instances, mask_crop_size, mask_crop_size,
num_downsample_channels
])
return crop_features
......@@ -546,9 +541,9 @@ def crop_mask_in_target_box(masks,
boxes: a float tensor representing box cooridnates that tightly enclose
masks with a shape of [batch_size, num_masks, 4] in un-normalized
coordinates. A box is represented by [ymin, xmin, ymax, xmax].
target_boxes: a float tensor representing target box cooridnates for
masks with a shape of [batch_size, num_masks, 4] in un-normalized
coordinates. A box is represented by [ymin, xmin, ymax, xmax].
target_boxes: a float tensor representing target box cooridnates for masks
with a shape of [batch_size, num_masks, 4] in un-normalized coordinates. A
box is represented by [ymin, xmin, ymax, xmax].
output_size: A scalar to indicate the output crop size. It currently only
supports to output a square shape outputs.
sample_offset: a float number in [0, 1] indicates the subpixel sample offset
......@@ -561,10 +556,10 @@ def crop_mask_in_target_box(masks,
"""
with tf.name_scope('crop_mask_in_target_box'):
batch_size, num_masks, height, width = masks.get_shape().as_list()
masks = tf.reshape(masks, [batch_size*num_masks, height, width, 1])
masks = tf.reshape(masks, [batch_size * num_masks, height, width, 1])
# Pad zeros on the boundary of masks.
masks = tf.image.pad_to_bounding_box(masks, 2, 2, height + 4, width + 4)
masks = tf.reshape(masks, [batch_size, num_masks, height+4, width+4, 1])
masks = tf.reshape(masks, [batch_size, num_masks, height + 4, width + 4, 1])
# Projects target box locations and sizes to corresponding cropped
# mask coordinates.
......@@ -572,10 +567,10 @@ def crop_mask_in_target_box(masks,
value=boxes, num_or_size_splits=4, axis=2)
bb_y_min, bb_x_min, bb_y_max, bb_x_max = tf.split(
value=target_boxes, num_or_size_splits=4, axis=2)
y_transform = (bb_y_min - gt_y_min) * height / (
gt_y_max - gt_y_min + _EPSILON) + 2
x_transform = (bb_x_min - gt_x_min) * height / (
gt_x_max - gt_x_min + _EPSILON) + 2
y_transform = (bb_y_min - gt_y_min) * height / (gt_y_max - gt_y_min +
_EPSILON) + 2
x_transform = (bb_x_min - gt_x_min) * height / (gt_x_max - gt_x_min +
_EPSILON) + 2
h_transform = (bb_y_max - bb_y_min) * width / (
gt_y_max - gt_y_min + _EPSILON)
w_transform = (bb_x_max - bb_x_min) * width / (
......@@ -592,8 +587,8 @@ def crop_mask_in_target_box(masks,
# Reshape tensors to have the right shape for selective_crop_and_resize.
trasnformed_boxes = tf.concat(
[y_transform, x_transform, h_transform, w_transform], -1)
levels = tf.tile(tf.reshape(tf.range(num_masks), [1, num_masks]),
[batch_size, 1])
levels = tf.tile(
tf.reshape(tf.range(num_masks), [1, num_masks]), [batch_size, 1])
cropped_masks = selective_crop_and_resize(
masks,
......
......@@ -87,18 +87,16 @@ def box_matching(boxes, gt_boxes, gt_classes):
matched_gt_boxes)
matched_gt_classes = tf.gather_nd(gt_classes, gather_nd_indices)
matched_gt_classes = tf.where(
background_box_mask,
tf.zeros_like(matched_gt_classes),
matched_gt_classes)
matched_gt_classes = tf.where(background_box_mask,
tf.zeros_like(matched_gt_classes),
matched_gt_classes)
matched_gt_indices = tf.where(
background_box_mask,
-tf.ones_like(argmax_iou_indices),
argmax_iou_indices)
matched_gt_indices = tf.where(background_box_mask,
-tf.ones_like(argmax_iou_indices),
argmax_iou_indices)
return (matched_gt_boxes, matched_gt_classes, matched_gt_indices,
matched_iou, iou)
return (matched_gt_boxes, matched_gt_classes, matched_gt_indices, matched_iou,
iou)
def assign_and_sample_proposals(proposed_boxes,
......@@ -121,22 +119,21 @@ def assign_and_sample_proposals(proposed_boxes,
returns box_targets, class_targets, and RoIs.
Args:
proposed_boxes: a tensor of shape of [batch_size, N, 4]. N is the number
of proposals before groundtruth assignment. The last dimension is the
box coordinates w.r.t. the scaled images in [ymin, xmin, ymax, xmax]
format.
gt_boxes: a tensor of shape of [batch_size, MAX_NUM_INSTANCES, 4].
The coordinates of gt_boxes are in the pixel coordinates of the scaled
image. This tensor might have padding of values -1 indicating the invalid
box coordinates.
proposed_boxes: a tensor of shape of [batch_size, N, 4]. N is the number of
proposals before groundtruth assignment. The last dimension is the box
coordinates w.r.t. the scaled images in [ymin, xmin, ymax, xmax] format.
gt_boxes: a tensor of shape of [batch_size, MAX_NUM_INSTANCES, 4]. The
coordinates of gt_boxes are in the pixel coordinates of the scaled image.
This tensor might have padding of values -1 indicating the invalid box
coordinates.
gt_classes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This
tensor might have paddings with values of -1 indicating the invalid
classes.
num_samples_per_image: a integer represents RoI minibatch size per image.
mix_gt_boxes: a bool indicating whether to mix the groundtruth boxes before
sampling proposals.
fg_fraction: a float represents the target fraction of RoI minibatch that
is labeled foreground (i.e., class > 0).
fg_fraction: a float represents the target fraction of RoI minibatch that is
labeled foreground (i.e., class > 0).
fg_iou_thresh: a float represents the IoU overlap threshold for an RoI to be
considered foreground (if >= fg_iou_thresh).
bg_iou_thresh_hi: a float represents the IoU overlap threshold for an RoI to
......@@ -163,8 +160,8 @@ def assign_and_sample_proposals(proposed_boxes,
else:
boxes = proposed_boxes
(matched_gt_boxes, matched_gt_classes, matched_gt_indices,
matched_iou, _) = box_matching(boxes, gt_boxes, gt_classes)
(matched_gt_boxes, matched_gt_classes, matched_gt_indices, matched_iou,
_) = box_matching(boxes, gt_boxes, gt_classes)
positive_match = tf.greater(matched_iou, fg_iou_thresh)
negative_match = tf.logical_and(
......@@ -173,10 +170,12 @@ def assign_and_sample_proposals(proposed_boxes,
ignored_match = tf.less(matched_iou, 0.0)
# re-assign negatively matched boxes to the background class.
matched_gt_classes = tf.where(
negative_match, tf.zeros_like(matched_gt_classes), matched_gt_classes)
matched_gt_indices = tf.where(
negative_match, tf.zeros_like(matched_gt_indices), matched_gt_indices)
matched_gt_classes = tf.where(negative_match,
tf.zeros_like(matched_gt_classes),
matched_gt_classes)
matched_gt_indices = tf.where(negative_match,
tf.zeros_like(matched_gt_indices),
matched_gt_indices)
sample_candidates = tf.logical_and(
tf.logical_or(positive_match, negative_match),
......@@ -189,8 +188,9 @@ def assign_and_sample_proposals(proposed_boxes,
batch_size, _ = sample_candidates.get_shape().as_list()
sampled_indicators = []
for i in range(batch_size):
sampled_indicator = sampler.subsample(
sample_candidates[i], num_samples_per_image, positive_match[i])
sampled_indicator = sampler.subsample(sample_candidates[i],
num_samples_per_image,
positive_match[i])
sampled_indicators.append(sampled_indicator)
sampled_indicators = tf.stack(sampled_indicators)
_, sampled_indices = tf.nn.top_k(
......@@ -206,10 +206,8 @@ def assign_and_sample_proposals(proposed_boxes,
sampled_rois = tf.gather_nd(boxes, gather_nd_indices)
sampled_gt_boxes = tf.gather_nd(matched_gt_boxes, gather_nd_indices)
sampled_gt_classes = tf.gather_nd(
matched_gt_classes, gather_nd_indices)
sampled_gt_indices = tf.gather_nd(
matched_gt_indices, gather_nd_indices)
sampled_gt_classes = tf.gather_nd(matched_gt_classes, gather_nd_indices)
sampled_gt_indices = tf.gather_nd(matched_gt_indices, gather_nd_indices)
return (sampled_rois, sampled_gt_boxes, sampled_gt_classes,
sampled_gt_indices)
......@@ -237,8 +235,8 @@ def sample_and_crop_foreground_masks(candidate_rois,
candidate_gt_indices: a tensor of shape [batch_size, N], storing the
corresponding groundtruth instance indices to the `candidate_gt_boxes`,
i.e. gt_boxes[candidate_gt_indices[:, i]] = candidate_gt_boxes[:, i] and
gt_boxes which is of shape [batch_size, MAX_INSTANCES, 4], M >= N, is the
superset of candidate_gt_boxes.
gt_boxes which is of shape [batch_size, MAX_INSTANCES, 4], M >= N, is
the superset of candidate_gt_boxes.
gt_masks: a tensor of [batch_size, MAX_INSTANCES, mask_height, mask_width]
containing all the groundtruth masks which sample masks are drawn from.
num_mask_samples_per_image: an integer which specifies the number of masks
......@@ -266,27 +264,29 @@ def sample_and_crop_foreground_masks(candidate_rois,
tf.expand_dims(tf.range(fg_instance_indices_shape[0]), axis=-1) *
tf.ones([1, fg_instance_indices_shape[-1]], dtype=tf.int32))
gather_nd_instance_indices = tf.stack(
[batch_indices, fg_instance_indices], axis=-1)
foreground_rois = tf.gather_nd(
candidate_rois, gather_nd_instance_indices)
foreground_boxes = tf.gather_nd(
candidate_gt_boxes, gather_nd_instance_indices)
foreground_classes = tf.gather_nd(
candidate_gt_classes, gather_nd_instance_indices)
foreground_gt_indices = tf.gather_nd(
candidate_gt_indices, gather_nd_instance_indices)
gather_nd_instance_indices = tf.stack([batch_indices, fg_instance_indices],
axis=-1)
foreground_rois = tf.gather_nd(candidate_rois, gather_nd_instance_indices)
foreground_boxes = tf.gather_nd(candidate_gt_boxes,
gather_nd_instance_indices)
foreground_classes = tf.gather_nd(candidate_gt_classes,
gather_nd_instance_indices)
foreground_gt_indices = tf.gather_nd(candidate_gt_indices,
gather_nd_instance_indices)
foreground_gt_indices_shape = tf.shape(foreground_gt_indices)
batch_indices = (
tf.expand_dims(tf.range(foreground_gt_indices_shape[0]), axis=-1) *
tf.ones([1, foreground_gt_indices_shape[-1]], dtype=tf.int32))
gather_nd_gt_indices = tf.stack(
[batch_indices, foreground_gt_indices], axis=-1)
gather_nd_gt_indices = tf.stack([batch_indices, foreground_gt_indices],
axis=-1)
foreground_masks = tf.gather_nd(gt_masks, gather_nd_gt_indices)
cropped_foreground_masks = spatial_transform_ops.crop_mask_in_target_box(
foreground_masks, foreground_boxes, foreground_rois, mask_target_size,
foreground_masks,
foreground_boxes,
foreground_rois,
mask_target_size,
sample_offset=0.5)
return foreground_rois, foreground_classes, cropped_foreground_masks
......@@ -307,12 +307,11 @@ class ROISampler(object):
"""Sample and assign RoIs for training.
Args:
rois: a tensor of shape of [batch_size, N, 4]. N is the number
of proposals before groundtruth assignment. The last dimension is the
box coordinates w.r.t. the scaled images in [ymin, xmin, ymax, xmax]
format.
gt_boxes: a tensor of shape of [batch_size, MAX_NUM_INSTANCES, 4].
The coordinates of gt_boxes are in the pixel coordinates of the scaled
rois: a tensor of shape of [batch_size, N, 4]. N is the number of
proposals before groundtruth assignment. The last dimension is the box
coordinates w.r.t. the scaled images in [ymin, xmin, ymax, xmax] format.
gt_boxes: a tensor of shape of [batch_size, MAX_NUM_INSTANCES, 4]. The
coordinates of gt_boxes are in the pixel coordinates of the scaled
image. This tensor might have padding of values -1 indicating the
invalid box coordinates.
gt_classes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This
......@@ -350,12 +349,8 @@ class MaskSampler(object):
self._mask_target_size = mask_target_size
self._num_mask_samples_per_image = num_mask_samples_per_image
def __call__(self,
candidate_rois,
candidate_gt_boxes,
candidate_gt_classes,
candidate_gt_indices,
gt_masks):
def __call__(self, candidate_rois, candidate_gt_boxes, candidate_gt_classes,
candidate_gt_indices, gt_masks):
"""Sample and create mask targets for training.
Args:
......@@ -371,8 +366,8 @@ class MaskSampler(object):
candidate_gt_indices: a tensor of shape [batch_size, N], storing the
corresponding groundtruth instance indices to the `candidate_gt_boxes`,
i.e. gt_boxes[candidate_gt_indices[:, i]] = candidate_gt_boxes[:, i],
where gt_boxes which is of shape [batch_size, MAX_INSTANCES, 4], M >= N,
is the superset of candidate_gt_boxes.
where gt_boxes which is of shape [batch_size, MAX_INSTANCES, 4], M >=
N, is the superset of candidate_gt_boxes.
gt_masks: a tensor of [batch_size, MAX_INSTANCES, mask_height, mask_width]
containing all the groundtruth masks which sample masks are drawn from.
after sampling. The output masks are resized w.r.t the sampled RoIs.
......@@ -388,12 +383,9 @@ class MaskSampler(object):
cropped foreground masks used for training.
"""
foreground_rois, foreground_classes, cropped_foreground_masks = (
sample_and_crop_foreground_masks(
candidate_rois,
candidate_gt_boxes,
candidate_gt_classes,
candidate_gt_indices,
gt_masks,
self._num_mask_samples_per_image,
self._mask_target_size))
sample_and_crop_foreground_masks(candidate_rois, candidate_gt_boxes,
candidate_gt_classes,
candidate_gt_indices, gt_masks,
self._num_mask_samples_per_image,
self._mask_target_size))
return foreground_rois, foreground_classes, cropped_foreground_masks
......@@ -115,8 +115,8 @@ def normalize_boxes(boxes, image_shape):
"""Converts boxes to the normalized coordinates.
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates
of boxes in ymin, xmin, ymax, xmax order.
boxes: a tensor whose last dimension is 4 representing the coordinates of
boxes in ymin, xmin, ymax, xmax order.
image_shape: a list of two integers, a two-element vector or a tensor such
that all but the last dimensions are `broadcastable` to `boxes`. The last
dimension is 2, which represents [height, width].
......@@ -153,8 +153,8 @@ def denormalize_boxes(boxes, image_shape):
"""Converts boxes normalized by [height, width] to pixel coordinates.
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates
of boxes in ymin, xmin, ymax, xmax order.
boxes: a tensor whose last dimension is 4 representing the coordinates of
boxes in ymin, xmin, ymax, xmax order.
image_shape: a list of two integers, a two-element vector or a tensor such
that all but the last dimensions are `broadcastable` to `boxes`. The last
dimension is 2, which represents [height, width].
......@@ -187,8 +187,8 @@ def clip_boxes(boxes, image_shape):
"""Clips boxes to image boundaries.
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates
of boxes in ymin, xmin, ymax, xmax order.
boxes: a tensor whose last dimension is 4 representing the coordinates of
boxes in ymin, xmin, ymax, xmax order.
image_shape: a list of two integers, a two-element vector or a tensor such
that all but the last dimensions are `broadcastable` to `boxes`. The last
dimension is 2, which represents [height, width].
......@@ -255,8 +255,8 @@ def encode_boxes(boxes, anchors, weights=None):
"""Encode boxes to targets.
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates
of boxes in ymin, xmin, ymax, xmax order.
boxes: a tensor whose last dimension is 4 representing the coordinates of
boxes in ymin, xmin, ymax, xmax order.
anchors: a tensor whose shape is the same as, or `broadcastable` to `boxes`,
representing the coordinates of anchors in ymin, xmin, ymax, xmax order.
weights: None or a list of four float numbers used to scale coordinates.
......@@ -302,9 +302,8 @@ def encode_boxes(boxes, anchors, weights=None):
encoded_dh *= weights[2]
encoded_dw *= weights[3]
encoded_boxes = tf.concat(
[encoded_dy, encoded_dx, encoded_dh, encoded_dw],
axis=-1)
encoded_boxes = tf.concat([encoded_dy, encoded_dx, encoded_dh, encoded_dw],
axis=-1)
return encoded_boxes
......@@ -359,10 +358,11 @@ def decode_boxes(encoded_boxes, anchors, weights=None):
decoded_boxes_ymax = decoded_boxes_ymin + decoded_boxes_h - 1.0
decoded_boxes_xmax = decoded_boxes_xmin + decoded_boxes_w - 1.0
decoded_boxes = tf.concat(
[decoded_boxes_ymin, decoded_boxes_xmin,
decoded_boxes_ymax, decoded_boxes_xmax],
axis=-1)
decoded_boxes = tf.concat([
decoded_boxes_ymin, decoded_boxes_xmin, decoded_boxes_ymax,
decoded_boxes_xmax
],
axis=-1)
return decoded_boxes
......@@ -546,6 +546,6 @@ def get_non_empty_box_indices(boxes):
# Selects indices if box height or width is 0.
height = boxes[:, 2] - boxes[:, 0]
width = boxes[:, 3] - boxes[:, 1]
indices = tf.where(tf.logical_and(tf.greater(height, 0),
tf.greater(width, 0)))
indices = tf.where(
tf.logical_and(tf.greater(height, 0), tf.greater(width, 0)))
return indices[:, 0]
......@@ -15,6 +15,7 @@
"""Utility functions for input processing."""
import math
import tensorflow as tf
from official.vision.detection.utils import box_utils
......@@ -91,12 +92,12 @@ def compute_padded_size(desired_size, stride):
[height, width] of the padded output image size.
"""
if isinstance(desired_size, list) or isinstance(desired_size, tuple):
padded_size = [int(math.ceil(d * 1.0 / stride) * stride)
for d in desired_size]
padded_size = [
int(math.ceil(d * 1.0 / stride) * stride) for d in desired_size
]
else:
padded_size = tf.cast(
tf.math.ceil(
tf.cast(desired_size, dtype=tf.float32) / stride) * stride,
tf.math.ceil(tf.cast(desired_size, dtype=tf.float32) / stride) * stride,
tf.int32)
return padded_size
......@@ -158,8 +159,8 @@ def resize_and_crop_image(image,
else:
scaled_size = desired_size
scale = tf.minimum(
scaled_size[0] / image_size[0], scaled_size[1] / image_size[1])
scale = tf.minimum(scaled_size[0] / image_size[0],
scaled_size[1] / image_size[1])
scaled_size = tf.round(image_size * scale)
# Computes 2D image_scale.
......@@ -169,9 +170,8 @@ def resize_and_crop_image(image,
# desired_size.
if random_jittering:
max_offset = scaled_size - desired_size
max_offset = tf.where(tf.less(max_offset, 0),
tf.zeros_like(max_offset),
max_offset)
max_offset = tf.where(
tf.less(max_offset, 0), tf.zeros_like(max_offset), max_offset)
offset = max_offset * tf.random.uniform([
2,
], 0, 1, seed=seed)
......@@ -191,9 +191,9 @@ def resize_and_crop_image(image,
image_info = tf.stack([
image_size,
tf.cast(desired_size, dtype=tf.float32),
image_scale,
tf.cast(offset, tf.float32)])
tf.cast(desired_size, dtype=tf.float32), image_scale,
tf.cast(offset, tf.float32)
])
return output_image, image_info
......@@ -288,25 +288,21 @@ def resize_and_crop_image_v2(image,
image, tf.cast(scaled_size, tf.int32), method=method)
if random_jittering:
scaled_image = scaled_image[
offset[0]:offset[0] + desired_size[0],
offset[1]:offset[1] + desired_size[1], :]
scaled_image = scaled_image[offset[0]:offset[0] + desired_size[0],
offset[1]:offset[1] + desired_size[1], :]
output_image = tf.image.pad_to_bounding_box(
scaled_image, 0, 0, padded_size[0], padded_size[1])
output_image = tf.image.pad_to_bounding_box(scaled_image, 0, 0,
padded_size[0], padded_size[1])
image_info = tf.stack([
image_size,
tf.cast(desired_size, dtype=tf.float32),
image_scale,
tf.cast(offset, tf.float32)])
tf.cast(desired_size, dtype=tf.float32), image_scale,
tf.cast(offset, tf.float32)
])
return output_image, image_info
def resize_and_crop_boxes(boxes,
image_scale,
output_size,
offset):
def resize_and_crop_boxes(boxes, image_scale, output_size, offset):
"""Resizes boxes to output size with scale and offset.
Args:
......@@ -329,10 +325,7 @@ def resize_and_crop_boxes(boxes,
return boxes
def resize_and_crop_masks(masks,
image_scale,
output_size,
offset):
def resize_and_crop_masks(masks, image_scale, output_size, offset):
"""Resizes boxes to output size with scale and offset.
Args:
......
......@@ -18,14 +18,12 @@ from __future__ import division
from __future__ import print_function
import math
import numpy as np
import cv2
def paste_instance_masks(masks,
detected_boxes,
image_height,
image_width):
def paste_instance_masks(masks, detected_boxes, image_height, image_width):
"""Paste instance masks to generate the image segmentation results.
Args:
......@@ -95,10 +93,8 @@ def paste_instance_masks(masks,
y_0 = min(max(ref_box[1], 0), image_height)
y_1 = min(max(ref_box[3] + 1, 0), image_height)
im_mask[y_0:y_1, x_0:x_1] = mask[
(y_0 - ref_box[1]):(y_1 - ref_box[1]),
(x_0 - ref_box[0]):(x_1 - ref_box[0])
]
im_mask[y_0:y_1, x_0:x_1] = mask[(y_0 - ref_box[1]):(y_1 - ref_box[1]),
(x_0 - ref_box[0]):(x_1 - ref_box[0])]
segms.append(im_mask)
segms = np.array(segms)
......@@ -106,10 +102,7 @@ def paste_instance_masks(masks,
return segms
def paste_instance_masks_v2(masks,
detected_boxes,
image_height,
image_width):
def paste_instance_masks_v2(masks, detected_boxes, image_height, image_width):
"""Paste instance masks to generate the image segmentation (v2).
Args:
......@@ -146,34 +139,22 @@ def paste_instance_masks_v2(masks,
beta = box[3] / (1.0 * mask_height)
# pylint: disable=invalid-name
# Transformation from mask pixel indices to image coordinate.
M_mask_to_image = np.array(
[[alpha, 0, xmin],
[0, beta, ymin],
[0, 0, 1]],
dtype=np.float32)
M_mask_to_image = np.array([[alpha, 0, xmin], [0, beta, ymin], [0, 0, 1]],
dtype=np.float32)
# Transformation from image to cropped mask coordinate.
M_image_to_crop = np.array(
[[1, 0, -xmin_int],
[0, 1, -ymin_int],
[0, 0, 1]],
dtype=np.float32)
[[1, 0, -xmin_int], [0, 1, -ymin_int], [0, 0, 1]], dtype=np.float32)
M = np.dot(M_image_to_crop, M_mask_to_image)
# Compensate the half pixel offset that OpenCV has in the
# warpPerspective implementation: the top-left pixel is sampled
# at (0,0), but we want it to be at (0.5, 0.5).
M = np.dot(
np.dot(
np.array([[1, 0, -0.5],
[0, 1, -0.5],
[0, 0, 1]], np.float32),
M),
np.array([[1, 0, 0.5],
[0, 1, 0.5],
[0, 0, 1]], np.float32))
np.array([[1, 0, -0.5], [0, 1, -0.5], [0, 0, 1]], np.float32), M),
np.array([[1, 0, 0.5], [0, 1, 0.5], [0, 0, 1]], np.float32))
# pylint: enable=invalid-name
cropped_mask = cv2.warpPerspective(
mask.astype(np.float32), M,
(xmax_int - xmin_int, ymax_int - ymin_int))
mask.astype(np.float32), M, (xmax_int - xmin_int, ymax_int - ymin_int))
cropped_mask = np.array(cropped_mask > 0.5, dtype=np.uint8)
img_mask = np.zeros((image_height, image_width))
......@@ -181,12 +162,10 @@ def paste_instance_masks_v2(masks,
x1 = max(min(xmax_int, image_width), 0)
y0 = max(min(ymin_int, image_height), 0)
y1 = max(min(ymax_int, image_height), 0)
img_mask[y0:y1, x0:x1] = cropped_mask[
(y0 - ymin_int):(y1 - ymin_int),
(x0 - xmin_int):(x1 - xmin_int)]
img_mask[y0:y1, x0:x1] = cropped_mask[(y0 - ymin_int):(y1 - ymin_int),
(x0 - xmin_int):(x1 - xmin_int)]
segms.append(img_mask)
segms = np.array(segms)
return segms
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Class to subsample minibatches by balancing positives and negatives.
Subsamples minibatches based on a pre-specified positive fraction in range
......@@ -92,10 +91,10 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler):
Args:
input_tensor: An int32 tensor of shape [N] to be sliced.
num_start_samples: Number of examples to be sliced from the beginning
of the input tensor.
num_end_samples: Number of examples to be sliced from the end of the
input tensor.
num_start_samples: Number of examples to be sliced from the beginning of
the input tensor.
num_end_samples: Number of examples to be sliced from the end of the input
tensor.
total_num_samples: Sum of is num_start_samples and num_end_samples. This
should be a scalar.
......@@ -110,13 +109,16 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler):
tf.range(input_length), input_length - num_end_samples)
selected_positions = tf.logical_or(start_positions, end_positions)
selected_positions = tf.cast(selected_positions, tf.float32)
indexed_positions = tf.multiply(tf.cumsum(selected_positions),
selected_positions)
one_hot_selector = tf.one_hot(tf.cast(indexed_positions, tf.int32) - 1,
total_num_samples,
dtype=tf.float32)
return tf.cast(tf.tensordot(tf.cast(input_tensor, tf.float32),
one_hot_selector, axes=[0, 0]), tf.int32)
indexed_positions = tf.multiply(
tf.cumsum(selected_positions), selected_positions)
one_hot_selector = tf.one_hot(
tf.cast(indexed_positions, tf.int32) - 1,
total_num_samples,
dtype=tf.float32)
return tf.cast(
tf.tensordot(
tf.cast(input_tensor, tf.float32), one_hot_selector, axes=[0, 0]),
tf.int32)
def _static_subsample(self, indicator, batch_size, labels):
"""Returns subsampled minibatch.
......@@ -182,13 +184,12 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler):
sorted_signed_indicator_idx = tf.nn.top_k(
signed_indicator_idx, input_length, sorted=True).values
[num_positive_samples,
num_negative_samples] = self._get_num_pos_neg_samples(
sorted_signed_indicator_idx, batch_size)
[num_positive_samples, num_negative_samples
] = self._get_num_pos_neg_samples(sorted_signed_indicator_idx, batch_size)
sampled_idx = self._get_values_from_start_and_end(
sorted_signed_indicator_idx, num_positive_samples,
num_negative_samples, batch_size)
sorted_signed_indicator_idx, num_positive_samples, num_negative_samples,
batch_size)
# Shift the indices to start from 0 and remove any samples that are set as
# False.
......@@ -203,11 +204,13 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler):
tf.bool)
# project back the order based on stored permutations
reprojections = tf.one_hot(permutation, depth=input_length,
dtype=tf.float32)
return tf.cast(tf.tensordot(
tf.cast(sampled_idx_indicator, tf.float32),
reprojections, axes=[0, 0]), tf.bool)
reprojections = tf.one_hot(
permutation, depth=input_length, dtype=tf.float32)
return tf.cast(
tf.tensordot(
tf.cast(sampled_idx_indicator, tf.float32),
reprojections,
axes=[0, 0]), tf.bool)
def subsample(self, indicator, batch_size, labels, scope=None):
"""Returns subsampled minibatch.
......@@ -218,7 +221,7 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler):
randomly selects negative samples so that the positive sample fraction
matches self._positive_fraction. It cannot be None is is_static is True.
labels: boolean tensor of shape [N] denoting positive(=True) and negative
(=False) examples.
(=False) examples.
scope: name scope.
Returns:
......
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Base box coder.
Box coders convert between coordinate frames, namely image-centric
......@@ -32,7 +31,6 @@ from abc import abstractproperty
import tensorflow as tf
# Box coder types.
FASTER_RCNN = 'faster_rcnn'
KEYPOINT = 'keypoint'
......@@ -138,11 +136,11 @@ def batch_decode(encoded_boxes, box_coder, anchors):
"""
encoded_boxes.get_shape().assert_has_rank(3)
if encoded_boxes.get_shape()[1].value != anchors.num_boxes_static():
raise ValueError('The number of anchors inferred from encoded_boxes'
' and anchors are inconsistent: shape[1] of encoded_boxes'
' %s should be equal to the number of anchors: %s.' %
(encoded_boxes.get_shape()[1].value,
anchors.num_boxes_static()))
raise ValueError(
'The number of anchors inferred from encoded_boxes'
' and anchors are inconsistent: shape[1] of encoded_boxes'
' %s should be equal to the number of anchors: %s.' %
(encoded_boxes.get_shape()[1].value, anchors.num_boxes_static()))
decoded_boxes = tf.stack([
box_coder.decode(boxes, anchors).get()
......
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Bounding Box List definition.
BoxList represents a list of bounding boxes as tensorflow
......@@ -126,8 +125,8 @@ class BoxList(object):
it returns the box coordinates.
Args:
field: this optional string parameter can be used to specify
a related field to be accessed.
field: this optional string parameter can be used to specify a related
field to be accessed.
Returns:
a tensor representing the box collection or an associated field.
......@@ -192,8 +191,8 @@ class BoxList(object):
"""Retrieves specified fields as a dictionary of tensors.
Args:
fields: (optional) list of fields to return in the dictionary.
If None (default), all fields are returned.
fields: (optional) list of fields to return in the dictionary. If None
(default), all fields are returned.
Returns:
tensor_dict: A dictionary of tensors specified by fields.
......
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Bounding Box List operations.
Example box operations that are supported:
......@@ -152,8 +151,8 @@ def prune_outside_window(boxlist, window, scope=None):
Args:
boxlist: a BoxList holding M_in boxes.
window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax]
of the window
window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax] of
the window
scope: name scope.
Returns:
......@@ -166,8 +165,10 @@ def prune_outside_window(boxlist, window, scope=None):
value=boxlist.get(), num_or_size_splits=4, axis=1)
win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
coordinate_violations = tf.concat([
tf.less(y_min, win_y_min), tf.less(x_min, win_x_min),
tf.greater(y_max, win_y_max), tf.greater(x_max, win_x_max)
tf.less(y_min, win_y_min),
tf.less(x_min, win_x_min),
tf.greater(y_max, win_y_max),
tf.greater(x_max, win_x_max)
], 1)
valid_indices = tf.reshape(
tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))), [-1])
......@@ -183,8 +184,8 @@ def prune_completely_outside_window(boxlist, window, scope=None):
Args:
boxlist: a BoxList holding M_in boxes.
window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax]
of the window
window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax] of
the window
scope: name scope.
Returns:
......@@ -198,8 +199,10 @@ def prune_completely_outside_window(boxlist, window, scope=None):
value=boxlist.get(), num_or_size_splits=4, axis=1)
win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
coordinate_violations = tf.concat([
tf.greater_equal(y_min, win_y_max), tf.greater_equal(x_min, win_x_max),
tf.less_equal(y_max, win_y_min), tf.less_equal(x_max, win_x_min)
tf.greater_equal(y_min, win_y_max),
tf.greater_equal(x_min, win_x_max),
tf.less_equal(y_max, win_y_min),
tf.less_equal(x_max, win_x_min)
], 1)
valid_indices = tf.reshape(
tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))), [-1])
......@@ -274,8 +277,8 @@ def iou(boxlist1, boxlist2, scope=None):
unions = (
tf.expand_dims(areas1, 1) + tf.expand_dims(areas2, 0) - intersections)
return tf.where(
tf.equal(intersections, 0.0),
tf.zeros_like(intersections), tf.truediv(intersections, unions))
tf.equal(intersections, 0.0), tf.zeros_like(intersections),
tf.truediv(intersections, unions))
def matched_iou(boxlist1, boxlist2, scope=None):
......@@ -295,8 +298,8 @@ def matched_iou(boxlist1, boxlist2, scope=None):
areas2 = area(boxlist2)
unions = areas1 + areas2 - intersections
return tf.where(
tf.equal(intersections, 0.0),
tf.zeros_like(intersections), tf.truediv(intersections, unions))
tf.equal(intersections, 0.0), tf.zeros_like(intersections),
tf.truediv(intersections, unions))
def ioa(boxlist1, boxlist2, scope=None):
......@@ -320,8 +323,10 @@ def ioa(boxlist1, boxlist2, scope=None):
return tf.truediv(intersections, areas)
def prune_non_overlapping_boxes(
boxlist1, boxlist2, min_overlap=0.0, scope=None):
def prune_non_overlapping_boxes(boxlist1,
boxlist2,
min_overlap=0.0,
scope=None):
"""Prunes the boxes in boxlist1 that overlap less than thresh with boxlist2.
For each box in boxlist1, we want its IOA to be more than minoverlap with
......@@ -331,7 +336,7 @@ def prune_non_overlapping_boxes(
boxlist1: BoxList holding N boxes.
boxlist2: BoxList holding M boxes.
min_overlap: Minimum required overlap between boxes, to count them as
overlapping.
overlapping.
scope: name scope.
Returns:
......@@ -361,8 +366,8 @@ def prune_small_boxes(boxlist, min_side, scope=None):
"""
with tf.name_scope(scope, 'PruneSmallBoxes'):
height, width = height_width(boxlist)
is_valid = tf.logical_and(tf.greater_equal(width, min_side),
tf.greater_equal(height, min_side))
is_valid = tf.logical_and(
tf.greater_equal(width, min_side), tf.greater_equal(height, min_side))
return gather(boxlist, tf.reshape(tf.where(is_valid), [-1]))
......@@ -389,9 +394,10 @@ def change_coordinate_frame(boxlist, window, scope=None):
with tf.name_scope(scope, 'ChangeCoordinateFrame'):
win_height = window[2] - window[0]
win_width = window[3] - window[1]
boxlist_new = scale(box_list.BoxList(
boxlist.get() - [window[0], window[1], window[0], window[1]]),
1.0 / win_height, 1.0 / win_width)
boxlist_new = scale(
box_list.BoxList(boxlist.get() -
[window[0], window[1], window[0], window[1]]),
1.0 / win_height, 1.0 / win_width)
boxlist_new = _copy_extra_fields(boxlist_new, boxlist)
return boxlist_new
......@@ -420,13 +426,17 @@ def sq_dist(boxlist1, boxlist2, scope=None):
with tf.name_scope(scope, 'SqDist'):
sqnorm1 = tf.reduce_sum(tf.square(boxlist1.get()), 1, keep_dims=True)
sqnorm2 = tf.reduce_sum(tf.square(boxlist2.get()), 1, keep_dims=True)
innerprod = tf.matmul(boxlist1.get(), boxlist2.get(),
transpose_a=False, transpose_b=True)
innerprod = tf.matmul(
boxlist1.get(), boxlist2.get(), transpose_a=False, transpose_b=True)
return sqnorm1 + tf.transpose(sqnorm2) - 2.0 * innerprod
def boolean_mask(boxlist, indicator, fields=None, scope=None,
use_static_shapes=False, indicator_sum=None):
def boolean_mask(boxlist,
indicator,
fields=None,
scope=None,
use_static_shapes=False,
indicator_sum=None):
"""Select boxes from BoxList according to indicator and return new BoxList.
`boolean_mask` returns the subset of boxes that are marked as "True" by the
......@@ -463,8 +473,7 @@ def boolean_mask(boxlist, indicator, fields=None, scope=None,
raise ValueError('`indicator_sum` must be a of type int')
selected_positions = tf.cast(indicator, dtype=tf.float32)
indexed_positions = tf.cast(
tf.multiply(
tf.cumsum(selected_positions), selected_positions),
tf.multiply(tf.cumsum(selected_positions), selected_positions),
dtype=tf.int32)
one_hot_selector = tf.one_hot(
indexed_positions - 1, indicator_sum, dtype=tf.float32)
......@@ -541,9 +550,8 @@ def concatenate(boxlists, fields=None, scope=None):
Args:
boxlists: list of BoxList objects
fields: optional list of fields to also concatenate. By default, all
fields from the first BoxList in the list are included in the
concatenation.
fields: optional list of fields to also concatenate. By default, all fields
from the first BoxList in the list are included in the concatenation.
scope: name scope.
Returns:
......@@ -637,8 +645,8 @@ def visualize_boxes_in_image(image, boxlist, normalized=False, scope=None):
Args:
image: an image tensor with shape [height, width, 3]
boxlist: a BoxList
normalized: (boolean) specify whether corners are to be interpreted
as absolute coordinates in image space or normalized with respect to the
normalized: (boolean) specify whether corners are to be interpreted as
absolute coordinates in image space or normalized with respect to the
image size.
scope: name scope.
......@@ -648,8 +656,7 @@ def visualize_boxes_in_image(image, boxlist, normalized=False, scope=None):
with tf.name_scope(scope, 'VisualizeBoxesInImage'):
if not normalized:
height, width, _ = tf.unstack(tf.shape(image))
boxlist = scale(boxlist,
1.0 / tf.cast(height, tf.float32),
boxlist = scale(boxlist, 1.0 / tf.cast(height, tf.float32),
1.0 / tf.cast(width, tf.float32))
corners = tf.expand_dims(boxlist.get(), 0)
image = tf.expand_dims(image, 0)
......@@ -714,9 +721,8 @@ def filter_greater_than(boxlist, thresh, scope=None):
if len(scores.shape.as_list()) == 2 and scores.shape.as_list()[1] != 1:
raise ValueError('Scores should have rank 1 or have shape '
'consistent with [None, 1]')
high_score_indices = tf.cast(tf.reshape(
tf.where(tf.greater(scores, thresh)),
[-1]), tf.int32)
high_score_indices = tf.cast(
tf.reshape(tf.where(tf.greater(scores, thresh)), [-1]), tf.int32)
return gather(boxlist, high_score_indices)
......@@ -748,8 +754,10 @@ def non_max_suppression(boxlist, thresh, max_output_size, scope=None):
if not boxlist.has_field('scores'):
raise ValueError('input boxlist must have \'scores\' field')
selected_indices = tf.image.non_max_suppression(
boxlist.get(), boxlist.get_field('scores'),
max_output_size, iou_threshold=thresh)
boxlist.get(),
boxlist.get_field('scores'),
max_output_size,
iou_threshold=thresh)
return gather(boxlist, selected_indices)
......@@ -768,8 +776,11 @@ def _copy_extra_fields(boxlist_to_copy_to, boxlist_to_copy_from):
return boxlist_to_copy_to
def to_normalized_coordinates(boxlist, height, width,
check_range=True, scope=None):
def to_normalized_coordinates(boxlist,
height,
width,
check_range=True,
scope=None):
"""Converts absolute box coordinates to normalized coordinates in [0, 1].
Usually one uses the dynamic shape of the image or conv-layer tensor:
......@@ -797,8 +808,9 @@ def to_normalized_coordinates(boxlist, height, width,
if check_range:
max_val = tf.reduce_max(boxlist.get())
max_assert = tf.Assert(tf.greater(max_val, 1.01),
['max value is lower than 1.01: ', max_val])
max_assert = tf.Assert(
tf.greater(max_val, 1.01),
['max value is lower than 1.01: ', max_val])
with tf.control_dependencies([max_assert]):
width = tf.identity(width)
......@@ -822,8 +834,8 @@ def to_absolute_coordinates(boxlist,
height: Maximum value for height of absolute box coordinates.
width: Maximum value for width of absolute box coordinates.
check_range: If True, checks if the coordinates are normalized or not.
maximum_normalized_coordinate: Maximum coordinate value to be considered
as normalized, default to 1.1.
maximum_normalized_coordinate: Maximum coordinate value to be considered as
normalized, default to 1.1.
scope: name scope.
Returns:
......@@ -838,9 +850,10 @@ def to_absolute_coordinates(boxlist,
if check_range:
box_maximum = tf.reduce_max(boxlist.get())
max_assert = tf.Assert(
tf.greater_equal(maximum_normalized_coordinate, box_maximum),
['maximum box coordinate value is larger '
'than %f: ' % maximum_normalized_coordinate, box_maximum])
tf.greater_equal(maximum_normalized_coordinate, box_maximum), [
'maximum box coordinate value is larger '
'than %f: ' % maximum_normalized_coordinate, box_maximum
])
with tf.control_dependencies([max_assert]):
width = tf.identity(width)
......@@ -924,13 +937,15 @@ def refine_boxes(pool_boxes,
if not pool_boxes.has_field('scores'):
raise ValueError('pool_boxes must have a \'scores\' field')
nms_boxes = non_max_suppression(
pool_boxes, nms_iou_thresh, nms_max_detections)
nms_boxes = non_max_suppression(pool_boxes, nms_iou_thresh,
nms_max_detections)
return box_voting(nms_boxes, pool_boxes, voting_iou_thresh)
def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5):
"""Performs box voting as described in S. Gidaris and N. Komodakis, ICCV 2015.
"""Performs box voting as described in S. Gidaris and N.
Komodakis, ICCV 2015.
Performs box voting as described in 'Object detection via a multi-region &
semantic segmentation-aware CNN model', Gidaris and Komodakis, ICCV 2015. For
......@@ -972,9 +987,10 @@ def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5):
# match to any boxes in pool_boxes. For such boxes without any matches, we
# should return the original boxes without voting.
match_assert = tf.Assert(
tf.reduce_all(tf.greater(num_matches, 0)),
['Each box in selected_boxes must match with at least one box '
'in pool_boxes.'])
tf.reduce_all(tf.greater(num_matches, 0)), [
'Each box in selected_boxes must match with at least one box '
'in pool_boxes.'
])
scores = tf.expand_dims(pool_boxes.get_field('scores'), 1)
scores_assert = tf.Assert(
......@@ -993,9 +1009,7 @@ def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5):
return averaged_boxes
def get_minimal_coverage_box(boxlist,
default_box=None,
scope=None):
def get_minimal_coverage_box(boxlist, default_box=None, scope=None):
"""Creates a single bounding box which covers all boxes in the boxlist.
Args:
......@@ -1045,9 +1059,9 @@ def sample_boxes_by_jittering(boxlist,
boxlist: A boxlist containing N boxes in normalized coordinates.
num_boxes_to_sample: A positive integer containing the number of boxes to
sample.
stddev: Standard deviation. This is used to draw random offsets for the
box corners from a normal distribution. The offset is multiplied by the
box size so will be larger in terms of pixels for larger boxes.
stddev: Standard deviation. This is used to draw random offsets for the box
corners from a normal distribution. The offset is multiplied by the box
size so will be larger in terms of pixels for larger boxes.
scope: Name scope.
Returns:
......@@ -1056,11 +1070,10 @@ def sample_boxes_by_jittering(boxlist,
"""
with tf.name_scope(scope, 'SampleBoxesByJittering'):
num_boxes = boxlist.num_boxes()
box_indices = tf.random_uniform(
[num_boxes_to_sample],
minval=0,
maxval=num_boxes,
dtype=tf.int32)
box_indices = tf.random_uniform([num_boxes_to_sample],
minval=0,
maxval=num_boxes,
dtype=tf.int32)
sampled_boxes = tf.gather(boxlist.get(), box_indices)
sampled_boxes_height = sampled_boxes[:, 2] - sampled_boxes[:, 0]
sampled_boxes_width = sampled_boxes[:, 3] - sampled_boxes[:, 1]
......
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Faster RCNN box coder.
Faster RCNN box coder follows the coding schema described below:
......@@ -43,9 +42,9 @@ class FasterRcnnBoxCoder(box_coder.BoxCoder):
"""Constructor for FasterRcnnBoxCoder.
Args:
scale_factors: List of 4 positive scalars to scale ty, tx, th and tw.
If set to None, does not perform scaling. For Faster RCNN,
the open-source implementation recommends using [10.0, 10.0, 5.0, 5.0].
scale_factors: List of 4 positive scalars to scale ty, tx, th and tw. If
set to None, does not perform scaling. For Faster RCNN, the open-source
implementation recommends using [10.0, 10.0, 5.0, 5.0].
"""
if scale_factors:
assert len(scale_factors) == 4
......
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Matcher interface and Match class.
This module defines the Matcher interface and the Match object. The job of the
......@@ -49,9 +48,9 @@ class Match(object):
Args:
match_results: Integer tensor of shape [N] with (1) match_results[i]>=0,
meaning that column i is matched with row match_results[i].
(2) match_results[i]=-1, meaning that column i is not matched.
(3) match_results[i]=-2, meaning that column i is ignored.
meaning that column i is matched with row match_results[i]. (2)
match_results[i]=-1, meaning that column i is not matched. (3)
match_results[i]=-2, meaning that column i is ignored.
Raises:
ValueError: if match_results does not have rank 1 or is not an
......@@ -168,8 +167,7 @@ class Match(object):
def _reshape_and_cast(self, t):
return tf.cast(tf.reshape(t, [-1]), tf.int32)
def gather_based_on_match(self, input_tensor, unmatched_value,
ignored_value):
def gather_based_on_match(self, input_tensor, unmatched_value, ignored_value):
"""Gathers elements from `input_tensor` based on match results.
For columns that are matched to a row, gathered_tensor[col] is set to
......@@ -190,16 +188,15 @@ class Match(object):
The shape of the gathered tensor is [match_results.shape[0]] +
input_tensor.shape[1:].
"""
input_tensor = tf.concat([tf.stack([ignored_value, unmatched_value]),
input_tensor], axis=0)
input_tensor = tf.concat(
[tf.stack([ignored_value, unmatched_value]), input_tensor], axis=0)
gather_indices = tf.maximum(self.match_results + 2, 0)
gathered_tensor = tf.gather(input_tensor, gather_indices)
return gathered_tensor
class Matcher(object):
"""Abstract base class for matcher.
"""
"""Abstract base class for matcher."""
__metaclass__ = ABCMeta
def match(self, similarity_matrix, scope=None, **params):
......@@ -212,8 +209,8 @@ class Matcher(object):
similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
where higher value means more similar.
scope: Op scope name. Defaults to 'Match' if None.
**params: Additional keyword arguments for specific implementations of
the Matcher.
**params: Additional keyword arguments for specific implementations of the
Matcher.
Returns:
A Match object with the results of matching.
......@@ -230,8 +227,8 @@ class Matcher(object):
Args:
similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
where higher value means more similar.
**params: Additional keyword arguments for specific implementations of
the Matcher.
**params: Additional keyword arguments for specific implementations of the
Matcher.
Returns:
match_results: Integer tensor of shape [M]: match_results[i]>=0 means
......
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Base minibatch sampler module.
The job of the minibatch_sampler is to subsample a minibatch based on some
......@@ -53,8 +52,8 @@ class MinibatchSampler(object):
Args:
indicator: boolean tensor of shape [N] whose True entries can be sampled.
batch_size: desired batch size.
**params: additional keyword arguments for specific implementations of
the MinibatchSampler.
**params: additional keyword arguments for specific implementations of the
MinibatchSampler.
Returns:
sample_indicator: boolean tensor of shape [N] whose True entries have been
......@@ -72,8 +71,8 @@ class MinibatchSampler(object):
is returned.
Args:
indicator: a 1-dimensional boolean tensor indicating which elements
are allowed to be sampled and which are not.
indicator: a 1-dimensional boolean tensor indicating which elements are
allowed to be sampled and which are not.
num_samples: int32 scalar tensor
Returns:
......
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A module for helper tensorflow ops.
This is originally implemented in TensorFlow Object Detection API.
......@@ -37,7 +36,7 @@ def indices_to_dense_vector(indices,
Args:
indices: 1d Tensor with integer indices which are to be set to
indices_values.
indices_values.
size: scalar with size (integer) of output Tensor.
indices_value: values of elements specified by indices in the output vector
default_value: values of other elements in the output vector.
......@@ -61,10 +60,10 @@ def matmul_gather_on_zeroth_axis(params, indices, scope=None):
TODO(rathodv, jonathanhuang): enable sparse matmul option.
Args:
params: A float32 Tensor. The tensor from which to gather values.
Must be at least rank 1.
indices: A Tensor. Must be one of the following types: int32, int64.
Must be in range [0, params.shape[0])
params: A float32 Tensor. The tensor from which to gather values. Must be at
least rank 1.
indices: A Tensor. Must be one of the following types: int32, int64. Must be
in range [0, params.shape[0])
scope: A name for the operation (optional).
Returns:
......
......@@ -50,10 +50,9 @@ def _flip_boxes_left_right(boxes):
"""Left-right flip the boxes.
Args:
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. Boxes
are in normalized form meaning their coordinates vary between [0, 1]. Each
row is in the form of [ymin, xmin, ymax, xmax].
Returns:
Flipped boxes.
......@@ -69,8 +68,8 @@ def _flip_masks_left_right(masks):
"""Left-right flip masks.
Args:
masks: rank 3 float32 tensor with shape
[num_instances, height, width] representing instance masks.
masks: rank 3 float32 tensor with shape [num_instances, height, width]
representing instance masks.
Returns:
flipped masks: rank 3 float32 tensor with shape
......@@ -79,7 +78,9 @@ def _flip_masks_left_right(masks):
return masks[:, :, ::-1]
def keypoint_flip_horizontal(keypoints, flip_point, flip_permutation,
def keypoint_flip_horizontal(keypoints,
flip_point,
flip_permutation,
scope=None):
"""Flips the keypoints horizontally around the flip_point.
......@@ -91,9 +92,9 @@ def keypoint_flip_horizontal(keypoints, flip_point, flip_permutation,
flip_point: (float) scalar tensor representing the x coordinate to flip the
keypoints around.
flip_permutation: rank 1 int32 tensor containing the keypoint flip
permutation. This specifies the mapping from original keypoint indices
to the flipped keypoint indices. This is used primarily for keypoints
that are not reflection invariant. E.g. Suppose there are 3 keypoints
permutation. This specifies the mapping from original keypoint indices to
the flipped keypoint indices. This is used primarily for keypoints that
are not reflection invariant. E.g. Suppose there are 3 keypoints
representing ['head', 'right_eye', 'left_eye'], then a logical choice for
flip_permutation might be [0, 2, 1] since we want to swap the 'left_eye'
and 'right_eye' after a horizontal flip.
......@@ -190,19 +191,16 @@ def random_horizontal_flip(image,
Args:
image: rank 3 float32 tensor with shape [height, width, channels].
boxes: (optional) rank 2 float32 tensor with shape [N, 4]
containing the bounding boxes.
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks. The masks
are of the same height, width as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x
normalized coordinates.
boxes: (optional) rank 2 float32 tensor with shape [N, 4] containing the
bounding boxes. Boxes are in normalized form meaning their coordinates
vary between [0, 1]. Each row is in the form of [ymin, xmin, ymax, xmax].
masks: (optional) rank 3 float32 tensor with shape [num_instances, height,
width] containing instance masks. The masks are of the same height, width
as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape [num_instances,
num_keypoints, 2]. The keypoints are in y-x normalized coordinates.
keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip
permutation.
permutation.
seed: random seed
Returns:
......@@ -369,20 +367,19 @@ def resize_to_range(image,
Args:
image: A 3D tensor of shape [height, width, channels]
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks.
masks: (optional) rank 3 float32 tensor with shape [num_instances, height,
width] containing instance masks.
min_dimension: (optional) (scalar) desired size of the smaller image
dimension.
max_dimension: (optional) (scalar) maximum allowed size
of the larger image dimension.
dimension.
max_dimension: (optional) (scalar) maximum allowed size of the larger image
dimension.
method: (optional) interpolation method used in resizing. Defaults to
BILINEAR.
align_corners: bool. If true, exactly align all 4 corners of the input
and output. Defaults to False.
pad_to_max_dimension: Whether to resize the image and pad it with zeros
so the resulting image is of the spatial size
[max_dimension, max_dimension]. If masks are included they are padded
similarly.
BILINEAR.
align_corners: bool. If true, exactly align all 4 corners of the input and
output. Defaults to False.
pad_to_max_dimension: Whether to resize the image and pad it with zeros so
the resulting image is of the spatial size [max_dimension, max_dimension].
If masks are included they are padded similarly.
Returns:
Note that the position of the resized_image_shape changes based on whether
......@@ -410,8 +407,8 @@ def resize_to_range(image,
new_image = tf.image.resize(image, new_size[:-1], method=method)
if pad_to_max_dimension:
new_image = tf.image.pad_to_bounding_box(
new_image, 0, 0, max_dimension, max_dimension)
new_image = tf.image.pad_to_bounding_box(new_image, 0, 0, max_dimension,
max_dimension)
result = [new_image]
if masks is not None:
......@@ -422,8 +419,8 @@ def resize_to_range(image,
method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
new_masks = tf.squeeze(new_masks, 3)
if pad_to_max_dimension:
new_masks = tf.image.pad_to_bounding_box(
new_masks, 0, 0, max_dimension, max_dimension)
new_masks = tf.image.pad_to_bounding_box(new_masks, 0, 0, max_dimension,
max_dimension)
result.append(new_masks)
result.append(new_size)
......@@ -500,11 +497,10 @@ def scale_boxes_to_pixel_coordinates(image, boxes, keypoints=None):
Args:
image: A 3D float32 tensor of shape [height, width, channels].
boxes: A 2D float32 tensor of shape [num_boxes, 4] containing the bounding
boxes in normalized coordinates. Each row is of the form
[ymin, xmin, ymax, xmax].
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x normalized
coordinates.
boxes in normalized coordinates. Each row is of the form [ymin, xmin,
ymax, xmax].
keypoints: (optional) rank 3 float32 tensor with shape [num_instances,
num_keypoints, 2]. The keypoints are in y-x normalized coordinates.
Returns:
image: unchanged input image.
......
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Region Similarity Calculators for BoxLists.
Region Similarity Calculators compare a pairwise measure of similarity
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment