Commit 999fae62 authored by Hongkun Yu's avatar Hongkun Yu Committed by A. Unique TensorFlower
Browse files

Internal change

PiperOrigin-RevId: 326286926
parent 94561082
...@@ -52,15 +52,15 @@ class RetinanetModel(base_model.Model): ...@@ -52,15 +52,15 @@ class RetinanetModel(base_model.Model):
# Predict function. # Predict function.
self._generate_detections_fn = postprocess_ops.MultilevelDetectionGenerator( self._generate_detections_fn = postprocess_ops.MultilevelDetectionGenerator(
params.architecture.min_level, params.architecture.min_level, params.architecture.max_level,
params.architecture.max_level,
params.postprocess) params.postprocess)
self._transpose_input = params.train.transpose_input self._transpose_input = params.train.transpose_input
assert not self._transpose_input, 'Transpose input is not supported.' assert not self._transpose_input, 'Transpose input is not supported.'
# Input layer. # Input layer.
self._input_layer = tf.keras.layers.Input( self._input_layer = tf.keras.layers.Input(
shape=(None, None, params.retinanet_parser.num_channels), name='', shape=(None, None, params.retinanet_parser.num_channels),
name='',
dtype=tf.bfloat16 if self._use_bfloat16 else tf.float32) dtype=tf.bfloat16 if self._use_bfloat16 else tf.float32)
def build_outputs(self, inputs, mode): def build_outputs(self, inputs, mode):
...@@ -141,8 +141,8 @@ class RetinanetModel(base_model.Model): ...@@ -141,8 +141,8 @@ class RetinanetModel(base_model.Model):
raise ValueError('"%s" is missing in outputs, requried %s found %s', raise ValueError('"%s" is missing in outputs, requried %s found %s',
field, required_label_fields, labels.keys()) field, required_label_fields, labels.keys())
boxes, scores, classes, valid_detections = self._generate_detections_fn( boxes, scores, classes, valid_detections = self._generate_detections_fn(
outputs['box_outputs'], outputs['cls_outputs'], outputs['box_outputs'], outputs['cls_outputs'], labels['anchor_boxes'],
labels['anchor_boxes'], labels['image_info'][:, 1:2, :]) labels['image_info'][:, 1:2, :])
# Discards the old output tensors to save memory. The `cls_outputs` and # Discards the old output tensors to save memory. The `cls_outputs` and
# `box_outputs` are pretty big and could potentiall lead to memory issue. # `box_outputs` are pretty big and could potentiall lead to memory issue.
outputs = { outputs = {
......
...@@ -61,13 +61,11 @@ class ShapeMaskModel(base_model.Model): ...@@ -61,13 +61,11 @@ class ShapeMaskModel(base_model.Model):
params.shapemask_loss.shape_prior_loss_weight) params.shapemask_loss.shape_prior_loss_weight)
self._coarse_mask_loss_weight = ( self._coarse_mask_loss_weight = (
params.shapemask_loss.coarse_mask_loss_weight) params.shapemask_loss.coarse_mask_loss_weight)
self._fine_mask_loss_weight = ( self._fine_mask_loss_weight = (params.shapemask_loss.fine_mask_loss_weight)
params.shapemask_loss.fine_mask_loss_weight)
# Predict function. # Predict function.
self._generate_detections_fn = postprocess_ops.MultilevelDetectionGenerator( self._generate_detections_fn = postprocess_ops.MultilevelDetectionGenerator(
params.architecture.min_level, params.architecture.min_level, params.architecture.max_level,
params.architecture.max_level,
params.postprocess) params.postprocess)
def build_outputs(self, inputs, mode): def build_outputs(self, inputs, mode):
...@@ -79,10 +77,8 @@ class ShapeMaskModel(base_model.Model): ...@@ -79,10 +77,8 @@ class ShapeMaskModel(base_model.Model):
else: else:
anchor_boxes = anchor.Anchor( anchor_boxes = anchor.Anchor(
self._params.architecture.min_level, self._params.architecture.min_level,
self._params.architecture.max_level, self._params.architecture.max_level, self._params.anchor.num_scales,
self._params.anchor.num_scales, self._params.anchor.aspect_ratios, self._params.anchor.anchor_size,
self._params.anchor.aspect_ratios,
self._params.anchor.anchor_size,
images.get_shape().as_list()[1:3]).multilevel_boxes images.get_shape().as_list()[1:3]).multilevel_boxes
batch_size = tf.shape(images)[0] batch_size = tf.shape(images)[0]
...@@ -96,8 +92,7 @@ class ShapeMaskModel(base_model.Model): ...@@ -96,8 +92,7 @@ class ShapeMaskModel(base_model.Model):
fpn_features, is_training=is_training) fpn_features, is_training=is_training)
valid_boxes, valid_scores, valid_classes, valid_detections = ( valid_boxes, valid_scores, valid_classes, valid_detections = (
self._generate_detections_fn(box_outputs, cls_outputs, self._generate_detections_fn(box_outputs, cls_outputs, anchor_boxes,
anchor_boxes,
inputs['image_info'][:, 1:2, :])) inputs['image_info'][:, 1:2, :]))
image_size = images.get_shape().as_list()[1:3] image_size = images.get_shape().as_list()[1:3]
...@@ -124,22 +119,18 @@ class ShapeMaskModel(base_model.Model): ...@@ -124,22 +119,18 @@ class ShapeMaskModel(base_model.Model):
return boxes, classes, outer_boxes return boxes, classes, outer_boxes
boxes, classes, outer_boxes = SampledBoxesLayer()( boxes, classes, outer_boxes = SampledBoxesLayer()(
inputs, valid_boxes, valid_classes, inputs,
valid_outer_boxes, training=is_training) valid_boxes,
valid_classes,
instance_features, prior_masks = self._shape_prior_head_fn(fpn_features, valid_outer_boxes,
boxes, training=is_training)
outer_boxes,
classes, instance_features, prior_masks = self._shape_prior_head_fn(
is_training) fpn_features, boxes, outer_boxes, classes, is_training)
coarse_mask_logits = self._coarse_mask_fn(instance_features, coarse_mask_logits = self._coarse_mask_fn(instance_features, prior_masks,
prior_masks, classes, is_training)
classes, fine_mask_logits = self._fine_mask_fn(instance_features, coarse_mask_logits,
is_training) classes, is_training)
fine_mask_logits = self._fine_mask_fn(instance_features,
coarse_mask_logits,
classes,
is_training)
model_outputs = { model_outputs = {
'cls_outputs': cls_outputs, 'cls_outputs': cls_outputs,
...@@ -177,18 +168,15 @@ class ShapeMaskModel(base_model.Model): ...@@ -177,18 +168,15 @@ class ShapeMaskModel(base_model.Model):
labels['num_positives']) labels['num_positives'])
# Adds Shapemask model losses. # Adds Shapemask model losses.
shape_prior_loss = self._shapemask_prior_loss_fn( shape_prior_loss = self._shapemask_prior_loss_fn(outputs['prior_masks'],
outputs['prior_masks'], labels['mask_targets'],
labels['mask_targets'], labels['mask_is_valid'])
labels['mask_is_valid']) coarse_mask_loss = self._shapemask_loss_fn(outputs['coarse_mask_logits'],
coarse_mask_loss = self._shapemask_loss_fn( labels['mask_targets'],
outputs['coarse_mask_logits'], labels['mask_is_valid'])
labels['mask_targets'], fine_mask_loss = self._shapemask_loss_fn(outputs['fine_mask_logits'],
labels['mask_is_valid']) labels['fine_mask_targets'],
fine_mask_loss = self._shapemask_loss_fn( labels['mask_is_valid'])
outputs['fine_mask_logits'],
labels['fine_mask_targets'],
labels['mask_is_valid'])
model_loss = ( model_loss = (
cls_loss + self._box_loss_weight * box_loss + cls_loss + self._box_loss_weight * box_loss +
...@@ -222,43 +210,46 @@ class ShapeMaskModel(base_model.Model): ...@@ -222,43 +210,46 @@ class ShapeMaskModel(base_model.Model):
if is_training: if is_training:
batch_size = params.train.batch_size batch_size = params.train.batch_size
input_layer = { input_layer = {
'image': tf.keras.layers.Input( 'image':
shape=input_shape, tf.keras.layers.Input(
batch_size=batch_size, shape=input_shape,
name='image', batch_size=batch_size,
dtype=tf.bfloat16 if self._use_bfloat16 else tf.float32), name='image',
'image_info': tf.keras.layers.Input( dtype=tf.bfloat16 if self._use_bfloat16 else tf.float32),
shape=[4, 2], 'image_info':
batch_size=batch_size, tf.keras.layers.Input(
name='image_info'), shape=[4, 2], batch_size=batch_size, name='image_info'),
'mask_classes': tf.keras.layers.Input( 'mask_classes':
shape=[params.shapemask_parser.num_sampled_masks], tf.keras.layers.Input(
batch_size=batch_size, shape=[params.shapemask_parser.num_sampled_masks],
name='mask_classes', batch_size=batch_size,
dtype=tf.int64), name='mask_classes',
'mask_outer_boxes': tf.keras.layers.Input( dtype=tf.int64),
shape=[params.shapemask_parser.num_sampled_masks, 4], 'mask_outer_boxes':
batch_size=batch_size, tf.keras.layers.Input(
name='mask_outer_boxes', shape=[params.shapemask_parser.num_sampled_masks, 4],
dtype=tf.float32), batch_size=batch_size,
'mask_boxes': tf.keras.layers.Input( name='mask_outer_boxes',
shape=[params.shapemask_parser.num_sampled_masks, 4], dtype=tf.float32),
batch_size=batch_size, 'mask_boxes':
name='mask_boxes', tf.keras.layers.Input(
dtype=tf.float32), shape=[params.shapemask_parser.num_sampled_masks, 4],
batch_size=batch_size,
name='mask_boxes',
dtype=tf.float32),
} }
else: else:
batch_size = params.eval.batch_size batch_size = params.eval.batch_size
input_layer = { input_layer = {
'image': tf.keras.layers.Input( 'image':
shape=input_shape, tf.keras.layers.Input(
batch_size=batch_size, shape=input_shape,
name='image', batch_size=batch_size,
dtype=tf.bfloat16 if self._use_bfloat16 else tf.float32), name='image',
'image_info': tf.keras.layers.Input( dtype=tf.bfloat16 if self._use_bfloat16 else tf.float32),
shape=[4, 2], 'image_info':
batch_size=batch_size, tf.keras.layers.Input(
name='image_info'), shape=[4, 2], batch_size=batch_size, name='image_info'),
} }
return input_layer return input_layer
...@@ -277,9 +268,10 @@ class ShapeMaskModel(base_model.Model): ...@@ -277,9 +268,10 @@ class ShapeMaskModel(base_model.Model):
return self._keras_model return self._keras_model
def post_processing(self, labels, outputs): def post_processing(self, labels, outputs):
required_output_fields = ['num_detections', 'detection_boxes', required_output_fields = [
'detection_classes', 'detection_masks', 'num_detections', 'detection_boxes', 'detection_classes',
'detection_scores'] 'detection_masks', 'detection_scores'
]
for field in required_output_fields: for field in required_output_fields:
if field not in outputs: if field not in outputs:
......
...@@ -22,7 +22,6 @@ import tensorflow as tf ...@@ -22,7 +22,6 @@ import tensorflow as tf
from official.vision.detection.utils import box_utils from official.vision.detection.utils import box_utils
NMS_TILE_SIZE = 512 NMS_TILE_SIZE = 512
...@@ -106,9 +105,7 @@ def _suppression_loop_body(boxes, iou_threshold, output_size, idx): ...@@ -106,9 +105,7 @@ def _suppression_loop_body(boxes, iou_threshold, output_size, idx):
return boxes, iou_threshold, output_size, idx + 1 return boxes, iou_threshold, output_size, idx + 1
def sorted_non_max_suppression_padded(scores, def sorted_non_max_suppression_padded(scores, boxes, max_output_size,
boxes,
max_output_size,
iou_threshold): iou_threshold):
"""A wrapper that handles non-maximum suppression. """A wrapper that handles non-maximum suppression.
...@@ -177,19 +174,18 @@ def sorted_non_max_suppression_padded(scores, ...@@ -177,19 +174,18 @@ def sorted_non_max_suppression_padded(scores,
idx < num_boxes // NMS_TILE_SIZE) idx < num_boxes // NMS_TILE_SIZE)
selected_boxes, _, output_size, _ = tf.while_loop( selected_boxes, _, output_size, _ = tf.while_loop(
_loop_cond, _suppression_loop_body, [ _loop_cond, _suppression_loop_body,
boxes, iou_threshold, [boxes, iou_threshold,
tf.zeros([batch_size], tf.int32), tf.zeros([batch_size], tf.int32),
tf.constant(0) tf.constant(0)])
])
idx = num_boxes - tf.cast( idx = num_boxes - tf.cast(
tf.nn.top_k( tf.nn.top_k(
tf.cast(tf.reduce_any(selected_boxes > 0, [2]), tf.int32) * tf.cast(tf.reduce_any(selected_boxes > 0, [2]), tf.int32) *
tf.expand_dims(tf.range(num_boxes, 0, -1), 0), max_output_size)[0], tf.expand_dims(tf.range(num_boxes, 0, -1), 0), max_output_size)[0],
tf.int32) tf.int32)
idx = tf.minimum(idx, num_boxes - 1) idx = tf.minimum(idx, num_boxes - 1)
idx = tf.reshape( idx = tf.reshape(idx + tf.reshape(tf.range(batch_size) * num_boxes, [-1, 1]),
idx + tf.reshape(tf.range(batch_size) * num_boxes, [-1, 1]), [-1]) [-1])
boxes = tf.reshape( boxes = tf.reshape(
tf.gather(tf.reshape(boxes, [-1, 4]), idx), tf.gather(tf.reshape(boxes, [-1, 4]), idx),
[batch_size, max_output_size, 4]) [batch_size, max_output_size, 4])
......
...@@ -19,6 +19,7 @@ from __future__ import division ...@@ -19,6 +19,7 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import functools import functools
import tensorflow as tf import tensorflow as tf
from official.vision.detection.ops import nms from official.vision.detection.ops import nms
...@@ -202,15 +203,14 @@ def _generate_detections_per_image(boxes, ...@@ -202,15 +203,14 @@ def _generate_detections_per_image(boxes,
scores_i, k=tf.minimum(tf.shape(input=scores_i)[-1], pre_nms_num_boxes)) scores_i, k=tf.minimum(tf.shape(input=scores_i)[-1], pre_nms_num_boxes))
boxes_i = tf.gather(boxes_i, indices) boxes_i = tf.gather(boxes_i, indices)
(nmsed_indices_i, (nmsed_indices_i, nmsed_num_valid_i) = tf.image.non_max_suppression_padded(
nmsed_num_valid_i) = tf.image.non_max_suppression_padded( tf.cast(boxes_i, tf.float32),
tf.cast(boxes_i, tf.float32), tf.cast(scores_i, tf.float32),
tf.cast(scores_i, tf.float32), max_total_size,
max_total_size, iou_threshold=nms_iou_threshold,
iou_threshold=nms_iou_threshold, score_threshold=score_threshold,
score_threshold=score_threshold, pad_to_max_output_size=True,
pad_to_max_output_size=True, name='nms_detections_' + str(i))
name='nms_detections_' + str(i))
nmsed_boxes_i = tf.gather(boxes_i, nmsed_indices_i) nmsed_boxes_i = tf.gather(boxes_i, nmsed_indices_i)
nmsed_scores_i = tf.gather(scores_i, nmsed_indices_i) nmsed_scores_i = tf.gather(scores_i, nmsed_indices_i)
# Sets scores of invalid boxes to -1. # Sets scores of invalid boxes to -1.
...@@ -235,11 +235,8 @@ def _generate_detections_per_image(boxes, ...@@ -235,11 +235,8 @@ def _generate_detections_per_image(boxes,
return nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections return nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections
def _generate_detections_batched(boxes, def _generate_detections_batched(boxes, scores, max_total_size,
scores, nms_iou_threshold, score_threshold):
max_total_size,
nms_iou_threshold,
score_threshold):
"""Generates detected boxes with scores and classes for one-stage detector. """Generates detected boxes with scores and classes for one-stage detector.
The function takes output of multi-level ConvNets and anchor boxes and The function takes output of multi-level ConvNets and anchor boxes and
...@@ -247,19 +244,20 @@ def _generate_detections_batched(boxes, ...@@ -247,19 +244,20 @@ def _generate_detections_batched(boxes,
supported on TPU currently. supported on TPU currently.
Args: Args:
boxes: a tensor with shape [batch_size, N, num_classes, 4] or boxes: a tensor with shape [batch_size, N, num_classes, 4] or [batch_size,
[batch_size, N, 1, 4], which box predictions on all feature levels. The N N, 1, 4], which box predictions on all feature levels. The N is the number
is the number of total anchors on all levels. of total anchors on all levels.
scores: a tensor with shape [batch_size, N, num_classes], which scores: a tensor with shape [batch_size, N, num_classes], which stacks class
stacks class probability on all feature levels. The N is the number of probability on all feature levels. The N is the number of total anchors on
total anchors on all levels. The num_classes is the number of classes all levels. The num_classes is the number of classes predicted by the
predicted by the model. Note that the class_outputs here is the raw score. model. Note that the class_outputs here is the raw score.
max_total_size: a scalar representing maximum number of boxes retained over max_total_size: a scalar representing maximum number of boxes retained over
all classes. all classes.
nms_iou_threshold: a float representing the threshold for deciding whether nms_iou_threshold: a float representing the threshold for deciding whether
boxes overlap too much with respect to IOU. boxes overlap too much with respect to IOU.
score_threshold: a float representing the threshold for deciding when to score_threshold: a float representing the threshold for deciding when to
remove boxes based on score. remove boxes based on score.
Returns: Returns:
nms_boxes: `float` Tensor of shape [batch_size, max_total_size, 4] nms_boxes: `float` Tensor of shape [batch_size, max_total_size, 4]
representing top detected boxes in [y1, x1, y2, x2]. representing top detected boxes in [y1, x1, y2, x2].
...@@ -285,7 +283,8 @@ def _generate_detections_batched(boxes, ...@@ -285,7 +283,8 @@ def _generate_detections_batched(boxes,
max_total_size=max_total_size, max_total_size=max_total_size,
iou_threshold=nms_iou_threshold, iou_threshold=nms_iou_threshold,
score_threshold=score_threshold, score_threshold=score_threshold,
pad_per_class=False,) pad_per_class=False,
)
# De-normalizes box cooridinates. # De-normalizes box cooridinates.
nmsed_boxes *= normalizer nmsed_boxes *= normalizer
nmsed_classes = tf.cast(nmsed_classes, tf.int32) nmsed_classes = tf.cast(nmsed_classes, tf.int32)
...@@ -382,16 +381,13 @@ class GenericDetectionGenerator(object): ...@@ -382,16 +381,13 @@ class GenericDetectionGenerator(object):
box_outputs = tf.reshape( box_outputs = tf.reshape(
box_outputs, box_outputs,
tf.stack([batch_size, num_locations, num_classes, 4], axis=-1)) tf.stack([batch_size, num_locations, num_classes, 4], axis=-1))
box_outputs = tf.slice( box_outputs = tf.slice(box_outputs, [0, 0, 1, 0], [-1, -1, -1, -1])
box_outputs, [0, 0, 1, 0], [-1, -1, -1, -1])
anchor_boxes = tf.tile( anchor_boxes = tf.tile(
tf.expand_dims(anchor_boxes, axis=2), [1, 1, num_classes - 1, 1]) tf.expand_dims(anchor_boxes, axis=2), [1, 1, num_classes - 1, 1])
box_outputs = tf.reshape( box_outputs = tf.reshape(box_outputs,
box_outputs, tf.stack([batch_size, num_detections, 4], axis=-1))
tf.stack([batch_size, num_detections, 4], axis=-1))
anchor_boxes = tf.reshape( anchor_boxes = tf.reshape(
anchor_boxes, anchor_boxes, tf.stack([batch_size, num_detections, 4], axis=-1))
tf.stack([batch_size, num_detections, 4], axis=-1))
# Box decoding. # Box decoding.
decoded_boxes = box_utils.decode_boxes( decoded_boxes = box_utils.decode_boxes(
......
...@@ -56,8 +56,8 @@ def multilevel_propose_rois(rpn_boxes, ...@@ -56,8 +56,8 @@ def multilevel_propose_rois(rpn_boxes,
rpn_scores: a dict with keys representing FPN levels and values representing rpn_scores: a dict with keys representing FPN levels and values representing
logit tensors of shape [batch_size, feature_h, feature_w, num_anchors]. logit tensors of shape [batch_size, feature_h, feature_w, num_anchors].
anchor_boxes: a dict with keys representing FPN levels and values anchor_boxes: a dict with keys representing FPN levels and values
representing anchor box tensors of shape representing anchor box tensors of shape [batch_size, feature_h,
[batch_size, feature_h, feature_w, num_anchors * 4]. feature_w, num_anchors * 4].
image_shape: a tensor of shape [batch_size, 2] where the last dimension are image_shape: a tensor of shape [batch_size, 2] where the last dimension are
[height, width] of the scaled image. [height, width] of the scaled image.
rpn_pre_nms_top_k: an integer of top scoring RPN proposals *per level* to rpn_pre_nms_top_k: an integer of top scoring RPN proposals *per level* to
...@@ -112,17 +112,14 @@ def multilevel_propose_rois(rpn_boxes, ...@@ -112,17 +112,14 @@ def multilevel_propose_rois(rpn_boxes,
this_level_scores = tf.sigmoid(this_level_scores) this_level_scores = tf.sigmoid(this_level_scores)
if decode_boxes: if decode_boxes:
this_level_boxes = box_utils.decode_boxes( this_level_boxes = box_utils.decode_boxes(this_level_boxes,
this_level_boxes, this_level_anchors) this_level_anchors)
if clip_boxes: if clip_boxes:
this_level_boxes = box_utils.clip_boxes( this_level_boxes = box_utils.clip_boxes(this_level_boxes, image_shape)
this_level_boxes, image_shape)
if rpn_min_size_threshold > 0.0: if rpn_min_size_threshold > 0.0:
this_level_boxes, this_level_scores = box_utils.filter_boxes( this_level_boxes, this_level_scores = box_utils.filter_boxes(
this_level_boxes, this_level_boxes, this_level_scores, image_shape,
this_level_scores,
image_shape,
rpn_min_size_threshold) rpn_min_size_threshold)
this_level_pre_nms_top_k = min(num_boxes, rpn_pre_nms_top_k) this_level_pre_nms_top_k = min(num_boxes, rpn_pre_nms_top_k)
...@@ -142,8 +139,9 @@ def multilevel_propose_rois(rpn_boxes, ...@@ -142,8 +139,9 @@ def multilevel_propose_rois(rpn_boxes,
else: else:
if rpn_score_threshold > 0.0: if rpn_score_threshold > 0.0:
this_level_boxes, this_level_scores = ( this_level_boxes, this_level_scores = (
box_utils.filter_boxes_by_scores( box_utils.filter_boxes_by_scores(this_level_boxes,
this_level_boxes, this_level_scores, rpn_score_threshold)) this_level_scores,
rpn_score_threshold))
this_level_boxes, this_level_scores = box_utils.top_k_boxes( this_level_boxes, this_level_scores = box_utils.top_k_boxes(
this_level_boxes, this_level_scores, k=this_level_pre_nms_top_k) this_level_boxes, this_level_scores, k=this_level_pre_nms_top_k)
this_level_roi_scores, this_level_rois = ( this_level_roi_scores, this_level_rois = (
...@@ -154,9 +152,7 @@ def multilevel_propose_rois(rpn_boxes, ...@@ -154,9 +152,7 @@ def multilevel_propose_rois(rpn_boxes,
iou_threshold=rpn_nms_threshold)) iou_threshold=rpn_nms_threshold))
else: else:
this_level_rois, this_level_roi_scores = box_utils.top_k_boxes( this_level_rois, this_level_roi_scores = box_utils.top_k_boxes(
this_level_rois, this_level_rois, this_level_scores, k=this_level_post_nms_top_k)
this_level_scores,
k=this_level_post_nms_top_k)
rois.append(this_level_rois) rois.append(this_level_rois)
roi_scores.append(this_level_roi_scores) roi_scores.append(this_level_roi_scores)
...@@ -199,8 +195,8 @@ class ROIGenerator(object): ...@@ -199,8 +195,8 @@ class ROIGenerator(object):
scores: a dict with keys representing FPN levels and values representing scores: a dict with keys representing FPN levels and values representing
logit tensors of shape [batch_size, feature_h, feature_w, num_anchors]. logit tensors of shape [batch_size, feature_h, feature_w, num_anchors].
anchor_boxes: a dict with keys representing FPN levels and values anchor_boxes: a dict with keys representing FPN levels and values
representing anchor box tensors of shape representing anchor box tensors of shape [batch_size, feature_h,
[batch_size, feature_h, feature_w, num_anchors * 4]. feature_w, num_anchors * 4].
image_shape: a tensor of shape [batch_size, 2] where the last dimension image_shape: a tensor of shape [batch_size, 2] where the last dimension
are [height, width] of the scaled image. are [height, width] of the scaled image.
is_training: a bool indicating whether it is in training or inference is_training: a bool indicating whether it is in training or inference
...@@ -220,16 +216,16 @@ class ROIGenerator(object): ...@@ -220,16 +216,16 @@ class ROIGenerator(object):
scores, scores,
anchor_boxes, anchor_boxes,
image_shape, image_shape,
rpn_pre_nms_top_k=(self._rpn_pre_nms_top_k if is_training rpn_pre_nms_top_k=(self._rpn_pre_nms_top_k
else self._test_rpn_pre_nms_top_k), if is_training else self._test_rpn_pre_nms_top_k),
rpn_post_nms_top_k=(self._rpn_post_nms_top_k if is_training rpn_post_nms_top_k=(self._rpn_post_nms_top_k
else self._test_rpn_post_nms_top_k), if is_training else self._test_rpn_post_nms_top_k),
rpn_nms_threshold=(self._rpn_nms_threshold if is_training rpn_nms_threshold=(self._rpn_nms_threshold
else self._test_rpn_nms_threshold), if is_training else self._test_rpn_nms_threshold),
rpn_score_threshold=(self._rpn_score_threshold if is_training rpn_score_threshold=(self._rpn_score_threshold if is_training else
else self._test_rpn_score_threshold), self._test_rpn_score_threshold),
rpn_min_size_threshold=(self._rpn_min_size_threshold if is_training rpn_min_size_threshold=(self._rpn_min_size_threshold if is_training else
else self._test_rpn_min_size_threshold), self._test_rpn_min_size_threshold),
decode_boxes=True, decode_boxes=True,
clip_boxes=True, clip_boxes=True,
use_batched_nms=self._use_batched_nms, use_batched_nms=self._use_batched_nms,
......
...@@ -20,7 +20,6 @@ from __future__ import print_function ...@@ -20,7 +20,6 @@ from __future__ import print_function
import tensorflow as tf import tensorflow as tf
_EPSILON = 1e-8 _EPSILON = 1e-8
...@@ -30,6 +29,7 @@ def nearest_upsampling(data, scale): ...@@ -30,6 +29,7 @@ def nearest_upsampling(data, scale):
Args: Args:
data: A tensor with a shape of [batch, height_in, width_in, channels]. data: A tensor with a shape of [batch, height_in, width_in, channels].
scale: An integer multiple to scale resolution of input data. scale: An integer multiple to scale resolution of input data.
Returns: Returns:
data_up: A tensor with a shape of data_up: A tensor with a shape of
[batch, height_in*scale, width_in*scale, channels]. Same dtype as input [batch, height_in*scale, width_in*scale, channels]. Same dtype as input
...@@ -382,8 +382,7 @@ def multilevel_crop_and_resize(features, boxes, output_size=7): ...@@ -382,8 +382,7 @@ def multilevel_crop_and_resize(features, boxes, output_size=7):
areas_sqrt = tf.sqrt(box_height * box_width) areas_sqrt = tf.sqrt(box_height * box_width)
levels = tf.cast( levels = tf.cast(
tf.math.floordiv( tf.math.floordiv(
tf.math.log(tf.divide(areas_sqrt, 224.0)), tf.math.log(2.0)) + tf.math.log(tf.divide(areas_sqrt, 224.0)), tf.math.log(2.0)) + 4.0,
4.0,
dtype=tf.int32) dtype=tf.int32)
# Maps levels between [min_level, max_level]. # Maps levels between [min_level, max_level].
levels = tf.minimum(max_level, tf.maximum(levels, min_level)) levels = tf.minimum(max_level, tf.maximum(levels, min_level))
...@@ -395,9 +394,12 @@ def multilevel_crop_and_resize(features, boxes, output_size=7): ...@@ -395,9 +394,12 @@ def multilevel_crop_and_resize(features, boxes, output_size=7):
boxes /= tf.expand_dims(scale_to_level, axis=2) boxes /= tf.expand_dims(scale_to_level, axis=2)
box_width /= scale_to_level box_width /= scale_to_level
box_height /= scale_to_level box_height /= scale_to_level
boxes = tf.concat([boxes[:, :, 0:2], boxes = tf.concat([
tf.expand_dims(box_height, -1), boxes[:, :, 0:2],
tf.expand_dims(box_width, -1)], axis=-1) tf.expand_dims(box_height, -1),
tf.expand_dims(box_width, -1)
],
axis=-1)
# Maps levels to [0, max_level-min_level]. # Maps levels to [0, max_level-min_level].
levels -= min_level levels -= min_level
...@@ -464,12 +466,12 @@ def single_level_feature_crop(features, level_boxes, detection_prior_levels, ...@@ -464,12 +466,12 @@ def single_level_feature_crop(features, level_boxes, detection_prior_levels,
Args: Args:
features: a float tensor of shape [batch_size, num_levels, features: a float tensor of shape [batch_size, num_levels, max_feature_size,
max_feature_size, max_feature_size, num_downsample_channels]. max_feature_size, num_downsample_channels].
level_boxes: a float Tensor of the level boxes to crop from. level_boxes: a float Tensor of the level boxes to crop from. [batch_size,
[batch_size, num_instances, 4]. num_instances, 4].
detection_prior_levels: an int Tensor of instance assigned level of shape detection_prior_levels: an int Tensor of instance assigned level of shape
[batch_size, num_instances]. [batch_size, num_instances].
min_mask_level: minimum FPN level to crop mask feature from. min_mask_level: minimum FPN level to crop mask feature from.
mask_crop_size: an int of mask crop size. mask_crop_size: an int of mask crop size.
...@@ -478,8 +480,8 @@ def single_level_feature_crop(features, level_boxes, detection_prior_levels, ...@@ -478,8 +480,8 @@ def single_level_feature_crop(features, level_boxes, detection_prior_levels,
mask_crop_size, mask_crop_size, num_downsample_channels]. This is the mask_crop_size, mask_crop_size, num_downsample_channels]. This is the
instance feature crop. instance feature crop.
""" """
(batch_size, num_levels, max_feature_size, (batch_size, num_levels, max_feature_size, _,
_, num_downsample_channels) = features.get_shape().as_list() num_downsample_channels) = features.get_shape().as_list()
_, num_of_instances, _ = level_boxes.get_shape().as_list() _, num_of_instances, _ = level_boxes.get_shape().as_list()
level_boxes = tf.cast(level_boxes, tf.int32) level_boxes = tf.cast(level_boxes, tf.int32)
assert num_of_instances == detection_prior_levels.get_shape().as_list()[1] assert num_of_instances == detection_prior_levels.get_shape().as_list()[1]
...@@ -503,32 +505,25 @@ def single_level_feature_crop(features, level_boxes, detection_prior_levels, ...@@ -503,32 +505,25 @@ def single_level_feature_crop(features, level_boxes, detection_prior_levels,
indices = tf.reshape( indices = tf.reshape(
tf.tile( tf.tile(
tf.reshape( tf.reshape(
tf.range(batch_size) * batch_dim_size, tf.range(batch_size) * batch_dim_size, [batch_size, 1, 1, 1]),
[batch_size, 1, 1, 1]), [1, num_of_instances, mask_crop_size, mask_crop_size]) + tf.tile(
[1, num_of_instances, tf.reshape(levels * level_dim_size,
mask_crop_size, mask_crop_size]) + [batch_size, num_of_instances, 1, 1]),
tf.tile( [1, 1, mask_crop_size, mask_crop_size]) + tf.tile(
tf.reshape(levels * level_dim_size, tf.reshape(y_indices * height_dim_size,
[batch_size, num_of_instances, 1, 1]), [batch_size, num_of_instances, mask_crop_size, 1]),
[1, 1, mask_crop_size, mask_crop_size]) + [1, 1, 1, mask_crop_size]) +
tf.tile(
tf.reshape(y_indices * height_dim_size,
[batch_size, num_of_instances,
mask_crop_size, 1]),
[1, 1, 1, mask_crop_size]) +
tf.tile( tf.tile(
tf.reshape(x_indices, tf.reshape(x_indices,
[batch_size, num_of_instances, [batch_size, num_of_instances, 1, mask_crop_size]),
1, mask_crop_size]),
[1, 1, mask_crop_size, 1]), [-1]) [1, 1, mask_crop_size, 1]), [-1])
features_r2 = tf.reshape(features, features_r2 = tf.reshape(features, [-1, num_downsample_channels])
[-1, num_downsample_channels])
crop_features = tf.reshape( crop_features = tf.reshape(
tf.gather(features_r2, indices), tf.gather(features_r2, indices), [
[batch_size * num_of_instances, batch_size * num_of_instances, mask_crop_size, mask_crop_size,
mask_crop_size, mask_crop_size, num_downsample_channels
num_downsample_channels]) ])
return crop_features return crop_features
...@@ -546,9 +541,9 @@ def crop_mask_in_target_box(masks, ...@@ -546,9 +541,9 @@ def crop_mask_in_target_box(masks,
boxes: a float tensor representing box cooridnates that tightly enclose boxes: a float tensor representing box cooridnates that tightly enclose
masks with a shape of [batch_size, num_masks, 4] in un-normalized masks with a shape of [batch_size, num_masks, 4] in un-normalized
coordinates. A box is represented by [ymin, xmin, ymax, xmax]. coordinates. A box is represented by [ymin, xmin, ymax, xmax].
target_boxes: a float tensor representing target box cooridnates for target_boxes: a float tensor representing target box cooridnates for masks
masks with a shape of [batch_size, num_masks, 4] in un-normalized with a shape of [batch_size, num_masks, 4] in un-normalized coordinates. A
coordinates. A box is represented by [ymin, xmin, ymax, xmax]. box is represented by [ymin, xmin, ymax, xmax].
output_size: A scalar to indicate the output crop size. It currently only output_size: A scalar to indicate the output crop size. It currently only
supports to output a square shape outputs. supports to output a square shape outputs.
sample_offset: a float number in [0, 1] indicates the subpixel sample offset sample_offset: a float number in [0, 1] indicates the subpixel sample offset
...@@ -561,10 +556,10 @@ def crop_mask_in_target_box(masks, ...@@ -561,10 +556,10 @@ def crop_mask_in_target_box(masks,
""" """
with tf.name_scope('crop_mask_in_target_box'): with tf.name_scope('crop_mask_in_target_box'):
batch_size, num_masks, height, width = masks.get_shape().as_list() batch_size, num_masks, height, width = masks.get_shape().as_list()
masks = tf.reshape(masks, [batch_size*num_masks, height, width, 1]) masks = tf.reshape(masks, [batch_size * num_masks, height, width, 1])
# Pad zeros on the boundary of masks. # Pad zeros on the boundary of masks.
masks = tf.image.pad_to_bounding_box(masks, 2, 2, height + 4, width + 4) masks = tf.image.pad_to_bounding_box(masks, 2, 2, height + 4, width + 4)
masks = tf.reshape(masks, [batch_size, num_masks, height+4, width+4, 1]) masks = tf.reshape(masks, [batch_size, num_masks, height + 4, width + 4, 1])
# Projects target box locations and sizes to corresponding cropped # Projects target box locations and sizes to corresponding cropped
# mask coordinates. # mask coordinates.
...@@ -572,10 +567,10 @@ def crop_mask_in_target_box(masks, ...@@ -572,10 +567,10 @@ def crop_mask_in_target_box(masks,
value=boxes, num_or_size_splits=4, axis=2) value=boxes, num_or_size_splits=4, axis=2)
bb_y_min, bb_x_min, bb_y_max, bb_x_max = tf.split( bb_y_min, bb_x_min, bb_y_max, bb_x_max = tf.split(
value=target_boxes, num_or_size_splits=4, axis=2) value=target_boxes, num_or_size_splits=4, axis=2)
y_transform = (bb_y_min - gt_y_min) * height / ( y_transform = (bb_y_min - gt_y_min) * height / (gt_y_max - gt_y_min +
gt_y_max - gt_y_min + _EPSILON) + 2 _EPSILON) + 2
x_transform = (bb_x_min - gt_x_min) * height / ( x_transform = (bb_x_min - gt_x_min) * height / (gt_x_max - gt_x_min +
gt_x_max - gt_x_min + _EPSILON) + 2 _EPSILON) + 2
h_transform = (bb_y_max - bb_y_min) * width / ( h_transform = (bb_y_max - bb_y_min) * width / (
gt_y_max - gt_y_min + _EPSILON) gt_y_max - gt_y_min + _EPSILON)
w_transform = (bb_x_max - bb_x_min) * width / ( w_transform = (bb_x_max - bb_x_min) * width / (
...@@ -592,8 +587,8 @@ def crop_mask_in_target_box(masks, ...@@ -592,8 +587,8 @@ def crop_mask_in_target_box(masks,
# Reshape tensors to have the right shape for selective_crop_and_resize. # Reshape tensors to have the right shape for selective_crop_and_resize.
trasnformed_boxes = tf.concat( trasnformed_boxes = tf.concat(
[y_transform, x_transform, h_transform, w_transform], -1) [y_transform, x_transform, h_transform, w_transform], -1)
levels = tf.tile(tf.reshape(tf.range(num_masks), [1, num_masks]), levels = tf.tile(
[batch_size, 1]) tf.reshape(tf.range(num_masks), [1, num_masks]), [batch_size, 1])
cropped_masks = selective_crop_and_resize( cropped_masks = selective_crop_and_resize(
masks, masks,
......
...@@ -87,18 +87,16 @@ def box_matching(boxes, gt_boxes, gt_classes): ...@@ -87,18 +87,16 @@ def box_matching(boxes, gt_boxes, gt_classes):
matched_gt_boxes) matched_gt_boxes)
matched_gt_classes = tf.gather_nd(gt_classes, gather_nd_indices) matched_gt_classes = tf.gather_nd(gt_classes, gather_nd_indices)
matched_gt_classes = tf.where( matched_gt_classes = tf.where(background_box_mask,
background_box_mask, tf.zeros_like(matched_gt_classes),
tf.zeros_like(matched_gt_classes), matched_gt_classes)
matched_gt_classes)
matched_gt_indices = tf.where( matched_gt_indices = tf.where(background_box_mask,
background_box_mask, -tf.ones_like(argmax_iou_indices),
-tf.ones_like(argmax_iou_indices), argmax_iou_indices)
argmax_iou_indices)
return (matched_gt_boxes, matched_gt_classes, matched_gt_indices, return (matched_gt_boxes, matched_gt_classes, matched_gt_indices, matched_iou,
matched_iou, iou) iou)
def assign_and_sample_proposals(proposed_boxes, def assign_and_sample_proposals(proposed_boxes,
...@@ -121,22 +119,21 @@ def assign_and_sample_proposals(proposed_boxes, ...@@ -121,22 +119,21 @@ def assign_and_sample_proposals(proposed_boxes,
returns box_targets, class_targets, and RoIs. returns box_targets, class_targets, and RoIs.
Args: Args:
proposed_boxes: a tensor of shape of [batch_size, N, 4]. N is the number proposed_boxes: a tensor of shape of [batch_size, N, 4]. N is the number of
of proposals before groundtruth assignment. The last dimension is the proposals before groundtruth assignment. The last dimension is the box
box coordinates w.r.t. the scaled images in [ymin, xmin, ymax, xmax] coordinates w.r.t. the scaled images in [ymin, xmin, ymax, xmax] format.
format. gt_boxes: a tensor of shape of [batch_size, MAX_NUM_INSTANCES, 4]. The
gt_boxes: a tensor of shape of [batch_size, MAX_NUM_INSTANCES, 4]. coordinates of gt_boxes are in the pixel coordinates of the scaled image.
The coordinates of gt_boxes are in the pixel coordinates of the scaled This tensor might have padding of values -1 indicating the invalid box
image. This tensor might have padding of values -1 indicating the invalid coordinates.
box coordinates.
gt_classes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This gt_classes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This
tensor might have paddings with values of -1 indicating the invalid tensor might have paddings with values of -1 indicating the invalid
classes. classes.
num_samples_per_image: a integer represents RoI minibatch size per image. num_samples_per_image: a integer represents RoI minibatch size per image.
mix_gt_boxes: a bool indicating whether to mix the groundtruth boxes before mix_gt_boxes: a bool indicating whether to mix the groundtruth boxes before
sampling proposals. sampling proposals.
fg_fraction: a float represents the target fraction of RoI minibatch that fg_fraction: a float represents the target fraction of RoI minibatch that is
is labeled foreground (i.e., class > 0). labeled foreground (i.e., class > 0).
fg_iou_thresh: a float represents the IoU overlap threshold for an RoI to be fg_iou_thresh: a float represents the IoU overlap threshold for an RoI to be
considered foreground (if >= fg_iou_thresh). considered foreground (if >= fg_iou_thresh).
bg_iou_thresh_hi: a float represents the IoU overlap threshold for an RoI to bg_iou_thresh_hi: a float represents the IoU overlap threshold for an RoI to
...@@ -163,8 +160,8 @@ def assign_and_sample_proposals(proposed_boxes, ...@@ -163,8 +160,8 @@ def assign_and_sample_proposals(proposed_boxes,
else: else:
boxes = proposed_boxes boxes = proposed_boxes
(matched_gt_boxes, matched_gt_classes, matched_gt_indices, (matched_gt_boxes, matched_gt_classes, matched_gt_indices, matched_iou,
matched_iou, _) = box_matching(boxes, gt_boxes, gt_classes) _) = box_matching(boxes, gt_boxes, gt_classes)
positive_match = tf.greater(matched_iou, fg_iou_thresh) positive_match = tf.greater(matched_iou, fg_iou_thresh)
negative_match = tf.logical_and( negative_match = tf.logical_and(
...@@ -173,10 +170,12 @@ def assign_and_sample_proposals(proposed_boxes, ...@@ -173,10 +170,12 @@ def assign_and_sample_proposals(proposed_boxes,
ignored_match = tf.less(matched_iou, 0.0) ignored_match = tf.less(matched_iou, 0.0)
# re-assign negatively matched boxes to the background class. # re-assign negatively matched boxes to the background class.
matched_gt_classes = tf.where( matched_gt_classes = tf.where(negative_match,
negative_match, tf.zeros_like(matched_gt_classes), matched_gt_classes) tf.zeros_like(matched_gt_classes),
matched_gt_indices = tf.where( matched_gt_classes)
negative_match, tf.zeros_like(matched_gt_indices), matched_gt_indices) matched_gt_indices = tf.where(negative_match,
tf.zeros_like(matched_gt_indices),
matched_gt_indices)
sample_candidates = tf.logical_and( sample_candidates = tf.logical_and(
tf.logical_or(positive_match, negative_match), tf.logical_or(positive_match, negative_match),
...@@ -189,8 +188,9 @@ def assign_and_sample_proposals(proposed_boxes, ...@@ -189,8 +188,9 @@ def assign_and_sample_proposals(proposed_boxes,
batch_size, _ = sample_candidates.get_shape().as_list() batch_size, _ = sample_candidates.get_shape().as_list()
sampled_indicators = [] sampled_indicators = []
for i in range(batch_size): for i in range(batch_size):
sampled_indicator = sampler.subsample( sampled_indicator = sampler.subsample(sample_candidates[i],
sample_candidates[i], num_samples_per_image, positive_match[i]) num_samples_per_image,
positive_match[i])
sampled_indicators.append(sampled_indicator) sampled_indicators.append(sampled_indicator)
sampled_indicators = tf.stack(sampled_indicators) sampled_indicators = tf.stack(sampled_indicators)
_, sampled_indices = tf.nn.top_k( _, sampled_indices = tf.nn.top_k(
...@@ -206,10 +206,8 @@ def assign_and_sample_proposals(proposed_boxes, ...@@ -206,10 +206,8 @@ def assign_and_sample_proposals(proposed_boxes,
sampled_rois = tf.gather_nd(boxes, gather_nd_indices) sampled_rois = tf.gather_nd(boxes, gather_nd_indices)
sampled_gt_boxes = tf.gather_nd(matched_gt_boxes, gather_nd_indices) sampled_gt_boxes = tf.gather_nd(matched_gt_boxes, gather_nd_indices)
sampled_gt_classes = tf.gather_nd( sampled_gt_classes = tf.gather_nd(matched_gt_classes, gather_nd_indices)
matched_gt_classes, gather_nd_indices) sampled_gt_indices = tf.gather_nd(matched_gt_indices, gather_nd_indices)
sampled_gt_indices = tf.gather_nd(
matched_gt_indices, gather_nd_indices)
return (sampled_rois, sampled_gt_boxes, sampled_gt_classes, return (sampled_rois, sampled_gt_boxes, sampled_gt_classes,
sampled_gt_indices) sampled_gt_indices)
...@@ -237,8 +235,8 @@ def sample_and_crop_foreground_masks(candidate_rois, ...@@ -237,8 +235,8 @@ def sample_and_crop_foreground_masks(candidate_rois,
candidate_gt_indices: a tensor of shape [batch_size, N], storing the candidate_gt_indices: a tensor of shape [batch_size, N], storing the
corresponding groundtruth instance indices to the `candidate_gt_boxes`, corresponding groundtruth instance indices to the `candidate_gt_boxes`,
i.e. gt_boxes[candidate_gt_indices[:, i]] = candidate_gt_boxes[:, i] and i.e. gt_boxes[candidate_gt_indices[:, i]] = candidate_gt_boxes[:, i] and
gt_boxes which is of shape [batch_size, MAX_INSTANCES, 4], M >= N, is the gt_boxes which is of shape [batch_size, MAX_INSTANCES, 4], M >= N, is
superset of candidate_gt_boxes. the superset of candidate_gt_boxes.
gt_masks: a tensor of [batch_size, MAX_INSTANCES, mask_height, mask_width] gt_masks: a tensor of [batch_size, MAX_INSTANCES, mask_height, mask_width]
containing all the groundtruth masks which sample masks are drawn from. containing all the groundtruth masks which sample masks are drawn from.
num_mask_samples_per_image: an integer which specifies the number of masks num_mask_samples_per_image: an integer which specifies the number of masks
...@@ -266,27 +264,29 @@ def sample_and_crop_foreground_masks(candidate_rois, ...@@ -266,27 +264,29 @@ def sample_and_crop_foreground_masks(candidate_rois,
tf.expand_dims(tf.range(fg_instance_indices_shape[0]), axis=-1) * tf.expand_dims(tf.range(fg_instance_indices_shape[0]), axis=-1) *
tf.ones([1, fg_instance_indices_shape[-1]], dtype=tf.int32)) tf.ones([1, fg_instance_indices_shape[-1]], dtype=tf.int32))
gather_nd_instance_indices = tf.stack( gather_nd_instance_indices = tf.stack([batch_indices, fg_instance_indices],
[batch_indices, fg_instance_indices], axis=-1) axis=-1)
foreground_rois = tf.gather_nd( foreground_rois = tf.gather_nd(candidate_rois, gather_nd_instance_indices)
candidate_rois, gather_nd_instance_indices) foreground_boxes = tf.gather_nd(candidate_gt_boxes,
foreground_boxes = tf.gather_nd( gather_nd_instance_indices)
candidate_gt_boxes, gather_nd_instance_indices) foreground_classes = tf.gather_nd(candidate_gt_classes,
foreground_classes = tf.gather_nd( gather_nd_instance_indices)
candidate_gt_classes, gather_nd_instance_indices) foreground_gt_indices = tf.gather_nd(candidate_gt_indices,
foreground_gt_indices = tf.gather_nd( gather_nd_instance_indices)
candidate_gt_indices, gather_nd_instance_indices)
foreground_gt_indices_shape = tf.shape(foreground_gt_indices) foreground_gt_indices_shape = tf.shape(foreground_gt_indices)
batch_indices = ( batch_indices = (
tf.expand_dims(tf.range(foreground_gt_indices_shape[0]), axis=-1) * tf.expand_dims(tf.range(foreground_gt_indices_shape[0]), axis=-1) *
tf.ones([1, foreground_gt_indices_shape[-1]], dtype=tf.int32)) tf.ones([1, foreground_gt_indices_shape[-1]], dtype=tf.int32))
gather_nd_gt_indices = tf.stack( gather_nd_gt_indices = tf.stack([batch_indices, foreground_gt_indices],
[batch_indices, foreground_gt_indices], axis=-1) axis=-1)
foreground_masks = tf.gather_nd(gt_masks, gather_nd_gt_indices) foreground_masks = tf.gather_nd(gt_masks, gather_nd_gt_indices)
cropped_foreground_masks = spatial_transform_ops.crop_mask_in_target_box( cropped_foreground_masks = spatial_transform_ops.crop_mask_in_target_box(
foreground_masks, foreground_boxes, foreground_rois, mask_target_size, foreground_masks,
foreground_boxes,
foreground_rois,
mask_target_size,
sample_offset=0.5) sample_offset=0.5)
return foreground_rois, foreground_classes, cropped_foreground_masks return foreground_rois, foreground_classes, cropped_foreground_masks
...@@ -307,12 +307,11 @@ class ROISampler(object): ...@@ -307,12 +307,11 @@ class ROISampler(object):
"""Sample and assign RoIs for training. """Sample and assign RoIs for training.
Args: Args:
rois: a tensor of shape of [batch_size, N, 4]. N is the number rois: a tensor of shape of [batch_size, N, 4]. N is the number of
of proposals before groundtruth assignment. The last dimension is the proposals before groundtruth assignment. The last dimension is the box
box coordinates w.r.t. the scaled images in [ymin, xmin, ymax, xmax] coordinates w.r.t. the scaled images in [ymin, xmin, ymax, xmax] format.
format. gt_boxes: a tensor of shape of [batch_size, MAX_NUM_INSTANCES, 4]. The
gt_boxes: a tensor of shape of [batch_size, MAX_NUM_INSTANCES, 4]. coordinates of gt_boxes are in the pixel coordinates of the scaled
The coordinates of gt_boxes are in the pixel coordinates of the scaled
image. This tensor might have padding of values -1 indicating the image. This tensor might have padding of values -1 indicating the
invalid box coordinates. invalid box coordinates.
gt_classes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This gt_classes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This
...@@ -350,12 +349,8 @@ class MaskSampler(object): ...@@ -350,12 +349,8 @@ class MaskSampler(object):
self._mask_target_size = mask_target_size self._mask_target_size = mask_target_size
self._num_mask_samples_per_image = num_mask_samples_per_image self._num_mask_samples_per_image = num_mask_samples_per_image
def __call__(self, def __call__(self, candidate_rois, candidate_gt_boxes, candidate_gt_classes,
candidate_rois, candidate_gt_indices, gt_masks):
candidate_gt_boxes,
candidate_gt_classes,
candidate_gt_indices,
gt_masks):
"""Sample and create mask targets for training. """Sample and create mask targets for training.
Args: Args:
...@@ -371,8 +366,8 @@ class MaskSampler(object): ...@@ -371,8 +366,8 @@ class MaskSampler(object):
candidate_gt_indices: a tensor of shape [batch_size, N], storing the candidate_gt_indices: a tensor of shape [batch_size, N], storing the
corresponding groundtruth instance indices to the `candidate_gt_boxes`, corresponding groundtruth instance indices to the `candidate_gt_boxes`,
i.e. gt_boxes[candidate_gt_indices[:, i]] = candidate_gt_boxes[:, i], i.e. gt_boxes[candidate_gt_indices[:, i]] = candidate_gt_boxes[:, i],
where gt_boxes which is of shape [batch_size, MAX_INSTANCES, 4], M >= N, where gt_boxes which is of shape [batch_size, MAX_INSTANCES, 4], M >=
is the superset of candidate_gt_boxes. N, is the superset of candidate_gt_boxes.
gt_masks: a tensor of [batch_size, MAX_INSTANCES, mask_height, mask_width] gt_masks: a tensor of [batch_size, MAX_INSTANCES, mask_height, mask_width]
containing all the groundtruth masks which sample masks are drawn from. containing all the groundtruth masks which sample masks are drawn from.
after sampling. The output masks are resized w.r.t the sampled RoIs. after sampling. The output masks are resized w.r.t the sampled RoIs.
...@@ -388,12 +383,9 @@ class MaskSampler(object): ...@@ -388,12 +383,9 @@ class MaskSampler(object):
cropped foreground masks used for training. cropped foreground masks used for training.
""" """
foreground_rois, foreground_classes, cropped_foreground_masks = ( foreground_rois, foreground_classes, cropped_foreground_masks = (
sample_and_crop_foreground_masks( sample_and_crop_foreground_masks(candidate_rois, candidate_gt_boxes,
candidate_rois, candidate_gt_classes,
candidate_gt_boxes, candidate_gt_indices, gt_masks,
candidate_gt_classes, self._num_mask_samples_per_image,
candidate_gt_indices, self._mask_target_size))
gt_masks,
self._num_mask_samples_per_image,
self._mask_target_size))
return foreground_rois, foreground_classes, cropped_foreground_masks return foreground_rois, foreground_classes, cropped_foreground_masks
...@@ -115,8 +115,8 @@ def normalize_boxes(boxes, image_shape): ...@@ -115,8 +115,8 @@ def normalize_boxes(boxes, image_shape):
"""Converts boxes to the normalized coordinates. """Converts boxes to the normalized coordinates.
Args: Args:
boxes: a tensor whose last dimension is 4 representing the coordinates boxes: a tensor whose last dimension is 4 representing the coordinates of
of boxes in ymin, xmin, ymax, xmax order. boxes in ymin, xmin, ymax, xmax order.
image_shape: a list of two integers, a two-element vector or a tensor such image_shape: a list of two integers, a two-element vector or a tensor such
that all but the last dimensions are `broadcastable` to `boxes`. The last that all but the last dimensions are `broadcastable` to `boxes`. The last
dimension is 2, which represents [height, width]. dimension is 2, which represents [height, width].
...@@ -153,8 +153,8 @@ def denormalize_boxes(boxes, image_shape): ...@@ -153,8 +153,8 @@ def denormalize_boxes(boxes, image_shape):
"""Converts boxes normalized by [height, width] to pixel coordinates. """Converts boxes normalized by [height, width] to pixel coordinates.
Args: Args:
boxes: a tensor whose last dimension is 4 representing the coordinates boxes: a tensor whose last dimension is 4 representing the coordinates of
of boxes in ymin, xmin, ymax, xmax order. boxes in ymin, xmin, ymax, xmax order.
image_shape: a list of two integers, a two-element vector or a tensor such image_shape: a list of two integers, a two-element vector or a tensor such
that all but the last dimensions are `broadcastable` to `boxes`. The last that all but the last dimensions are `broadcastable` to `boxes`. The last
dimension is 2, which represents [height, width]. dimension is 2, which represents [height, width].
...@@ -187,8 +187,8 @@ def clip_boxes(boxes, image_shape): ...@@ -187,8 +187,8 @@ def clip_boxes(boxes, image_shape):
"""Clips boxes to image boundaries. """Clips boxes to image boundaries.
Args: Args:
boxes: a tensor whose last dimension is 4 representing the coordinates boxes: a tensor whose last dimension is 4 representing the coordinates of
of boxes in ymin, xmin, ymax, xmax order. boxes in ymin, xmin, ymax, xmax order.
image_shape: a list of two integers, a two-element vector or a tensor such image_shape: a list of two integers, a two-element vector or a tensor such
that all but the last dimensions are `broadcastable` to `boxes`. The last that all but the last dimensions are `broadcastable` to `boxes`. The last
dimension is 2, which represents [height, width]. dimension is 2, which represents [height, width].
...@@ -255,8 +255,8 @@ def encode_boxes(boxes, anchors, weights=None): ...@@ -255,8 +255,8 @@ def encode_boxes(boxes, anchors, weights=None):
"""Encode boxes to targets. """Encode boxes to targets.
Args: Args:
boxes: a tensor whose last dimension is 4 representing the coordinates boxes: a tensor whose last dimension is 4 representing the coordinates of
of boxes in ymin, xmin, ymax, xmax order. boxes in ymin, xmin, ymax, xmax order.
anchors: a tensor whose shape is the same as, or `broadcastable` to `boxes`, anchors: a tensor whose shape is the same as, or `broadcastable` to `boxes`,
representing the coordinates of anchors in ymin, xmin, ymax, xmax order. representing the coordinates of anchors in ymin, xmin, ymax, xmax order.
weights: None or a list of four float numbers used to scale coordinates. weights: None or a list of four float numbers used to scale coordinates.
...@@ -302,9 +302,8 @@ def encode_boxes(boxes, anchors, weights=None): ...@@ -302,9 +302,8 @@ def encode_boxes(boxes, anchors, weights=None):
encoded_dh *= weights[2] encoded_dh *= weights[2]
encoded_dw *= weights[3] encoded_dw *= weights[3]
encoded_boxes = tf.concat( encoded_boxes = tf.concat([encoded_dy, encoded_dx, encoded_dh, encoded_dw],
[encoded_dy, encoded_dx, encoded_dh, encoded_dw], axis=-1)
axis=-1)
return encoded_boxes return encoded_boxes
...@@ -359,10 +358,11 @@ def decode_boxes(encoded_boxes, anchors, weights=None): ...@@ -359,10 +358,11 @@ def decode_boxes(encoded_boxes, anchors, weights=None):
decoded_boxes_ymax = decoded_boxes_ymin + decoded_boxes_h - 1.0 decoded_boxes_ymax = decoded_boxes_ymin + decoded_boxes_h - 1.0
decoded_boxes_xmax = decoded_boxes_xmin + decoded_boxes_w - 1.0 decoded_boxes_xmax = decoded_boxes_xmin + decoded_boxes_w - 1.0
decoded_boxes = tf.concat( decoded_boxes = tf.concat([
[decoded_boxes_ymin, decoded_boxes_xmin, decoded_boxes_ymin, decoded_boxes_xmin, decoded_boxes_ymax,
decoded_boxes_ymax, decoded_boxes_xmax], decoded_boxes_xmax
axis=-1) ],
axis=-1)
return decoded_boxes return decoded_boxes
...@@ -546,6 +546,6 @@ def get_non_empty_box_indices(boxes): ...@@ -546,6 +546,6 @@ def get_non_empty_box_indices(boxes):
# Selects indices if box height or width is 0. # Selects indices if box height or width is 0.
height = boxes[:, 2] - boxes[:, 0] height = boxes[:, 2] - boxes[:, 0]
width = boxes[:, 3] - boxes[:, 1] width = boxes[:, 3] - boxes[:, 1]
indices = tf.where(tf.logical_and(tf.greater(height, 0), indices = tf.where(
tf.greater(width, 0))) tf.logical_and(tf.greater(height, 0), tf.greater(width, 0)))
return indices[:, 0] return indices[:, 0]
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
"""Utility functions for input processing.""" """Utility functions for input processing."""
import math import math
import tensorflow as tf import tensorflow as tf
from official.vision.detection.utils import box_utils from official.vision.detection.utils import box_utils
...@@ -91,12 +92,12 @@ def compute_padded_size(desired_size, stride): ...@@ -91,12 +92,12 @@ def compute_padded_size(desired_size, stride):
[height, width] of the padded output image size. [height, width] of the padded output image size.
""" """
if isinstance(desired_size, list) or isinstance(desired_size, tuple): if isinstance(desired_size, list) or isinstance(desired_size, tuple):
padded_size = [int(math.ceil(d * 1.0 / stride) * stride) padded_size = [
for d in desired_size] int(math.ceil(d * 1.0 / stride) * stride) for d in desired_size
]
else: else:
padded_size = tf.cast( padded_size = tf.cast(
tf.math.ceil( tf.math.ceil(tf.cast(desired_size, dtype=tf.float32) / stride) * stride,
tf.cast(desired_size, dtype=tf.float32) / stride) * stride,
tf.int32) tf.int32)
return padded_size return padded_size
...@@ -158,8 +159,8 @@ def resize_and_crop_image(image, ...@@ -158,8 +159,8 @@ def resize_and_crop_image(image,
else: else:
scaled_size = desired_size scaled_size = desired_size
scale = tf.minimum( scale = tf.minimum(scaled_size[0] / image_size[0],
scaled_size[0] / image_size[0], scaled_size[1] / image_size[1]) scaled_size[1] / image_size[1])
scaled_size = tf.round(image_size * scale) scaled_size = tf.round(image_size * scale)
# Computes 2D image_scale. # Computes 2D image_scale.
...@@ -169,9 +170,8 @@ def resize_and_crop_image(image, ...@@ -169,9 +170,8 @@ def resize_and_crop_image(image,
# desired_size. # desired_size.
if random_jittering: if random_jittering:
max_offset = scaled_size - desired_size max_offset = scaled_size - desired_size
max_offset = tf.where(tf.less(max_offset, 0), max_offset = tf.where(
tf.zeros_like(max_offset), tf.less(max_offset, 0), tf.zeros_like(max_offset), max_offset)
max_offset)
offset = max_offset * tf.random.uniform([ offset = max_offset * tf.random.uniform([
2, 2,
], 0, 1, seed=seed) ], 0, 1, seed=seed)
...@@ -191,9 +191,9 @@ def resize_and_crop_image(image, ...@@ -191,9 +191,9 @@ def resize_and_crop_image(image,
image_info = tf.stack([ image_info = tf.stack([
image_size, image_size,
tf.cast(desired_size, dtype=tf.float32), tf.cast(desired_size, dtype=tf.float32), image_scale,
image_scale, tf.cast(offset, tf.float32)
tf.cast(offset, tf.float32)]) ])
return output_image, image_info return output_image, image_info
...@@ -288,25 +288,21 @@ def resize_and_crop_image_v2(image, ...@@ -288,25 +288,21 @@ def resize_and_crop_image_v2(image,
image, tf.cast(scaled_size, tf.int32), method=method) image, tf.cast(scaled_size, tf.int32), method=method)
if random_jittering: if random_jittering:
scaled_image = scaled_image[ scaled_image = scaled_image[offset[0]:offset[0] + desired_size[0],
offset[0]:offset[0] + desired_size[0], offset[1]:offset[1] + desired_size[1], :]
offset[1]:offset[1] + desired_size[1], :]
output_image = tf.image.pad_to_bounding_box( output_image = tf.image.pad_to_bounding_box(scaled_image, 0, 0,
scaled_image, 0, 0, padded_size[0], padded_size[1]) padded_size[0], padded_size[1])
image_info = tf.stack([ image_info = tf.stack([
image_size, image_size,
tf.cast(desired_size, dtype=tf.float32), tf.cast(desired_size, dtype=tf.float32), image_scale,
image_scale, tf.cast(offset, tf.float32)
tf.cast(offset, tf.float32)]) ])
return output_image, image_info return output_image, image_info
def resize_and_crop_boxes(boxes, def resize_and_crop_boxes(boxes, image_scale, output_size, offset):
image_scale,
output_size,
offset):
"""Resizes boxes to output size with scale and offset. """Resizes boxes to output size with scale and offset.
Args: Args:
...@@ -329,10 +325,7 @@ def resize_and_crop_boxes(boxes, ...@@ -329,10 +325,7 @@ def resize_and_crop_boxes(boxes,
return boxes return boxes
def resize_and_crop_masks(masks, def resize_and_crop_masks(masks, image_scale, output_size, offset):
image_scale,
output_size,
offset):
"""Resizes boxes to output size with scale and offset. """Resizes boxes to output size with scale and offset.
Args: Args:
......
...@@ -18,14 +18,12 @@ from __future__ import division ...@@ -18,14 +18,12 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import math import math
import numpy as np import numpy as np
import cv2 import cv2
def paste_instance_masks(masks, def paste_instance_masks(masks, detected_boxes, image_height, image_width):
detected_boxes,
image_height,
image_width):
"""Paste instance masks to generate the image segmentation results. """Paste instance masks to generate the image segmentation results.
Args: Args:
...@@ -95,10 +93,8 @@ def paste_instance_masks(masks, ...@@ -95,10 +93,8 @@ def paste_instance_masks(masks,
y_0 = min(max(ref_box[1], 0), image_height) y_0 = min(max(ref_box[1], 0), image_height)
y_1 = min(max(ref_box[3] + 1, 0), image_height) y_1 = min(max(ref_box[3] + 1, 0), image_height)
im_mask[y_0:y_1, x_0:x_1] = mask[ im_mask[y_0:y_1, x_0:x_1] = mask[(y_0 - ref_box[1]):(y_1 - ref_box[1]),
(y_0 - ref_box[1]):(y_1 - ref_box[1]), (x_0 - ref_box[0]):(x_1 - ref_box[0])]
(x_0 - ref_box[0]):(x_1 - ref_box[0])
]
segms.append(im_mask) segms.append(im_mask)
segms = np.array(segms) segms = np.array(segms)
...@@ -106,10 +102,7 @@ def paste_instance_masks(masks, ...@@ -106,10 +102,7 @@ def paste_instance_masks(masks,
return segms return segms
def paste_instance_masks_v2(masks, def paste_instance_masks_v2(masks, detected_boxes, image_height, image_width):
detected_boxes,
image_height,
image_width):
"""Paste instance masks to generate the image segmentation (v2). """Paste instance masks to generate the image segmentation (v2).
Args: Args:
...@@ -146,34 +139,22 @@ def paste_instance_masks_v2(masks, ...@@ -146,34 +139,22 @@ def paste_instance_masks_v2(masks,
beta = box[3] / (1.0 * mask_height) beta = box[3] / (1.0 * mask_height)
# pylint: disable=invalid-name # pylint: disable=invalid-name
# Transformation from mask pixel indices to image coordinate. # Transformation from mask pixel indices to image coordinate.
M_mask_to_image = np.array( M_mask_to_image = np.array([[alpha, 0, xmin], [0, beta, ymin], [0, 0, 1]],
[[alpha, 0, xmin], dtype=np.float32)
[0, beta, ymin],
[0, 0, 1]],
dtype=np.float32)
# Transformation from image to cropped mask coordinate. # Transformation from image to cropped mask coordinate.
M_image_to_crop = np.array( M_image_to_crop = np.array(
[[1, 0, -xmin_int], [[1, 0, -xmin_int], [0, 1, -ymin_int], [0, 0, 1]], dtype=np.float32)
[0, 1, -ymin_int],
[0, 0, 1]],
dtype=np.float32)
M = np.dot(M_image_to_crop, M_mask_to_image) M = np.dot(M_image_to_crop, M_mask_to_image)
# Compensate the half pixel offset that OpenCV has in the # Compensate the half pixel offset that OpenCV has in the
# warpPerspective implementation: the top-left pixel is sampled # warpPerspective implementation: the top-left pixel is sampled
# at (0,0), but we want it to be at (0.5, 0.5). # at (0,0), but we want it to be at (0.5, 0.5).
M = np.dot( M = np.dot(
np.dot( np.dot(
np.array([[1, 0, -0.5], np.array([[1, 0, -0.5], [0, 1, -0.5], [0, 0, 1]], np.float32), M),
[0, 1, -0.5], np.array([[1, 0, 0.5], [0, 1, 0.5], [0, 0, 1]], np.float32))
[0, 0, 1]], np.float32),
M),
np.array([[1, 0, 0.5],
[0, 1, 0.5],
[0, 0, 1]], np.float32))
# pylint: enable=invalid-name # pylint: enable=invalid-name
cropped_mask = cv2.warpPerspective( cropped_mask = cv2.warpPerspective(
mask.astype(np.float32), M, mask.astype(np.float32), M, (xmax_int - xmin_int, ymax_int - ymin_int))
(xmax_int - xmin_int, ymax_int - ymin_int))
cropped_mask = np.array(cropped_mask > 0.5, dtype=np.uint8) cropped_mask = np.array(cropped_mask > 0.5, dtype=np.uint8)
img_mask = np.zeros((image_height, image_width)) img_mask = np.zeros((image_height, image_width))
...@@ -181,12 +162,10 @@ def paste_instance_masks_v2(masks, ...@@ -181,12 +162,10 @@ def paste_instance_masks_v2(masks,
x1 = max(min(xmax_int, image_width), 0) x1 = max(min(xmax_int, image_width), 0)
y0 = max(min(ymin_int, image_height), 0) y0 = max(min(ymin_int, image_height), 0)
y1 = max(min(ymax_int, image_height), 0) y1 = max(min(ymax_int, image_height), 0)
img_mask[y0:y1, x0:x1] = cropped_mask[ img_mask[y0:y1, x0:x1] = cropped_mask[(y0 - ymin_int):(y1 - ymin_int),
(y0 - ymin_int):(y1 - ymin_int), (x0 - xmin_int):(x1 - xmin_int)]
(x0 - xmin_int):(x1 - xmin_int)]
segms.append(img_mask) segms.append(img_mask)
segms = np.array(segms) segms = np.array(segms)
return segms return segms
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""Class to subsample minibatches by balancing positives and negatives. """Class to subsample minibatches by balancing positives and negatives.
Subsamples minibatches based on a pre-specified positive fraction in range Subsamples minibatches based on a pre-specified positive fraction in range
...@@ -92,10 +91,10 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler): ...@@ -92,10 +91,10 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler):
Args: Args:
input_tensor: An int32 tensor of shape [N] to be sliced. input_tensor: An int32 tensor of shape [N] to be sliced.
num_start_samples: Number of examples to be sliced from the beginning num_start_samples: Number of examples to be sliced from the beginning of
of the input tensor. the input tensor.
num_end_samples: Number of examples to be sliced from the end of the num_end_samples: Number of examples to be sliced from the end of the input
input tensor. tensor.
total_num_samples: Sum of is num_start_samples and num_end_samples. This total_num_samples: Sum of is num_start_samples and num_end_samples. This
should be a scalar. should be a scalar.
...@@ -110,13 +109,16 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler): ...@@ -110,13 +109,16 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler):
tf.range(input_length), input_length - num_end_samples) tf.range(input_length), input_length - num_end_samples)
selected_positions = tf.logical_or(start_positions, end_positions) selected_positions = tf.logical_or(start_positions, end_positions)
selected_positions = tf.cast(selected_positions, tf.float32) selected_positions = tf.cast(selected_positions, tf.float32)
indexed_positions = tf.multiply(tf.cumsum(selected_positions), indexed_positions = tf.multiply(
selected_positions) tf.cumsum(selected_positions), selected_positions)
one_hot_selector = tf.one_hot(tf.cast(indexed_positions, tf.int32) - 1, one_hot_selector = tf.one_hot(
total_num_samples, tf.cast(indexed_positions, tf.int32) - 1,
dtype=tf.float32) total_num_samples,
return tf.cast(tf.tensordot(tf.cast(input_tensor, tf.float32), dtype=tf.float32)
one_hot_selector, axes=[0, 0]), tf.int32) return tf.cast(
tf.tensordot(
tf.cast(input_tensor, tf.float32), one_hot_selector, axes=[0, 0]),
tf.int32)
def _static_subsample(self, indicator, batch_size, labels): def _static_subsample(self, indicator, batch_size, labels):
"""Returns subsampled minibatch. """Returns subsampled minibatch.
...@@ -182,13 +184,12 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler): ...@@ -182,13 +184,12 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler):
sorted_signed_indicator_idx = tf.nn.top_k( sorted_signed_indicator_idx = tf.nn.top_k(
signed_indicator_idx, input_length, sorted=True).values signed_indicator_idx, input_length, sorted=True).values
[num_positive_samples, [num_positive_samples, num_negative_samples
num_negative_samples] = self._get_num_pos_neg_samples( ] = self._get_num_pos_neg_samples(sorted_signed_indicator_idx, batch_size)
sorted_signed_indicator_idx, batch_size)
sampled_idx = self._get_values_from_start_and_end( sampled_idx = self._get_values_from_start_and_end(
sorted_signed_indicator_idx, num_positive_samples, sorted_signed_indicator_idx, num_positive_samples, num_negative_samples,
num_negative_samples, batch_size) batch_size)
# Shift the indices to start from 0 and remove any samples that are set as # Shift the indices to start from 0 and remove any samples that are set as
# False. # False.
...@@ -203,11 +204,13 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler): ...@@ -203,11 +204,13 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler):
tf.bool) tf.bool)
# project back the order based on stored permutations # project back the order based on stored permutations
reprojections = tf.one_hot(permutation, depth=input_length, reprojections = tf.one_hot(
dtype=tf.float32) permutation, depth=input_length, dtype=tf.float32)
return tf.cast(tf.tensordot( return tf.cast(
tf.cast(sampled_idx_indicator, tf.float32), tf.tensordot(
reprojections, axes=[0, 0]), tf.bool) tf.cast(sampled_idx_indicator, tf.float32),
reprojections,
axes=[0, 0]), tf.bool)
def subsample(self, indicator, batch_size, labels, scope=None): def subsample(self, indicator, batch_size, labels, scope=None):
"""Returns subsampled minibatch. """Returns subsampled minibatch.
...@@ -218,7 +221,7 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler): ...@@ -218,7 +221,7 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler):
randomly selects negative samples so that the positive sample fraction randomly selects negative samples so that the positive sample fraction
matches self._positive_fraction. It cannot be None is is_static is True. matches self._positive_fraction. It cannot be None is is_static is True.
labels: boolean tensor of shape [N] denoting positive(=True) and negative labels: boolean tensor of shape [N] denoting positive(=True) and negative
(=False) examples. (=False) examples.
scope: name scope. scope: name scope.
Returns: Returns:
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""Base box coder. """Base box coder.
Box coders convert between coordinate frames, namely image-centric Box coders convert between coordinate frames, namely image-centric
...@@ -32,7 +31,6 @@ from abc import abstractproperty ...@@ -32,7 +31,6 @@ from abc import abstractproperty
import tensorflow as tf import tensorflow as tf
# Box coder types. # Box coder types.
FASTER_RCNN = 'faster_rcnn' FASTER_RCNN = 'faster_rcnn'
KEYPOINT = 'keypoint' KEYPOINT = 'keypoint'
...@@ -138,11 +136,11 @@ def batch_decode(encoded_boxes, box_coder, anchors): ...@@ -138,11 +136,11 @@ def batch_decode(encoded_boxes, box_coder, anchors):
""" """
encoded_boxes.get_shape().assert_has_rank(3) encoded_boxes.get_shape().assert_has_rank(3)
if encoded_boxes.get_shape()[1].value != anchors.num_boxes_static(): if encoded_boxes.get_shape()[1].value != anchors.num_boxes_static():
raise ValueError('The number of anchors inferred from encoded_boxes' raise ValueError(
' and anchors are inconsistent: shape[1] of encoded_boxes' 'The number of anchors inferred from encoded_boxes'
' %s should be equal to the number of anchors: %s.' % ' and anchors are inconsistent: shape[1] of encoded_boxes'
(encoded_boxes.get_shape()[1].value, ' %s should be equal to the number of anchors: %s.' %
anchors.num_boxes_static())) (encoded_boxes.get_shape()[1].value, anchors.num_boxes_static()))
decoded_boxes = tf.stack([ decoded_boxes = tf.stack([
box_coder.decode(boxes, anchors).get() box_coder.decode(boxes, anchors).get()
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""Bounding Box List definition. """Bounding Box List definition.
BoxList represents a list of bounding boxes as tensorflow BoxList represents a list of bounding boxes as tensorflow
...@@ -126,8 +125,8 @@ class BoxList(object): ...@@ -126,8 +125,8 @@ class BoxList(object):
it returns the box coordinates. it returns the box coordinates.
Args: Args:
field: this optional string parameter can be used to specify field: this optional string parameter can be used to specify a related
a related field to be accessed. field to be accessed.
Returns: Returns:
a tensor representing the box collection or an associated field. a tensor representing the box collection or an associated field.
...@@ -192,8 +191,8 @@ class BoxList(object): ...@@ -192,8 +191,8 @@ class BoxList(object):
"""Retrieves specified fields as a dictionary of tensors. """Retrieves specified fields as a dictionary of tensors.
Args: Args:
fields: (optional) list of fields to return in the dictionary. fields: (optional) list of fields to return in the dictionary. If None
If None (default), all fields are returned. (default), all fields are returned.
Returns: Returns:
tensor_dict: A dictionary of tensors specified by fields. tensor_dict: A dictionary of tensors specified by fields.
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""Bounding Box List operations. """Bounding Box List operations.
Example box operations that are supported: Example box operations that are supported:
...@@ -152,8 +151,8 @@ def prune_outside_window(boxlist, window, scope=None): ...@@ -152,8 +151,8 @@ def prune_outside_window(boxlist, window, scope=None):
Args: Args:
boxlist: a BoxList holding M_in boxes. boxlist: a BoxList holding M_in boxes.
window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax] window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax] of
of the window the window
scope: name scope. scope: name scope.
Returns: Returns:
...@@ -166,8 +165,10 @@ def prune_outside_window(boxlist, window, scope=None): ...@@ -166,8 +165,10 @@ def prune_outside_window(boxlist, window, scope=None):
value=boxlist.get(), num_or_size_splits=4, axis=1) value=boxlist.get(), num_or_size_splits=4, axis=1)
win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window) win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
coordinate_violations = tf.concat([ coordinate_violations = tf.concat([
tf.less(y_min, win_y_min), tf.less(x_min, win_x_min), tf.less(y_min, win_y_min),
tf.greater(y_max, win_y_max), tf.greater(x_max, win_x_max) tf.less(x_min, win_x_min),
tf.greater(y_max, win_y_max),
tf.greater(x_max, win_x_max)
], 1) ], 1)
valid_indices = tf.reshape( valid_indices = tf.reshape(
tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))), [-1]) tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))), [-1])
...@@ -183,8 +184,8 @@ def prune_completely_outside_window(boxlist, window, scope=None): ...@@ -183,8 +184,8 @@ def prune_completely_outside_window(boxlist, window, scope=None):
Args: Args:
boxlist: a BoxList holding M_in boxes. boxlist: a BoxList holding M_in boxes.
window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax] window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax] of
of the window the window
scope: name scope. scope: name scope.
Returns: Returns:
...@@ -198,8 +199,10 @@ def prune_completely_outside_window(boxlist, window, scope=None): ...@@ -198,8 +199,10 @@ def prune_completely_outside_window(boxlist, window, scope=None):
value=boxlist.get(), num_or_size_splits=4, axis=1) value=boxlist.get(), num_or_size_splits=4, axis=1)
win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window) win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
coordinate_violations = tf.concat([ coordinate_violations = tf.concat([
tf.greater_equal(y_min, win_y_max), tf.greater_equal(x_min, win_x_max), tf.greater_equal(y_min, win_y_max),
tf.less_equal(y_max, win_y_min), tf.less_equal(x_max, win_x_min) tf.greater_equal(x_min, win_x_max),
tf.less_equal(y_max, win_y_min),
tf.less_equal(x_max, win_x_min)
], 1) ], 1)
valid_indices = tf.reshape( valid_indices = tf.reshape(
tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))), [-1]) tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))), [-1])
...@@ -274,8 +277,8 @@ def iou(boxlist1, boxlist2, scope=None): ...@@ -274,8 +277,8 @@ def iou(boxlist1, boxlist2, scope=None):
unions = ( unions = (
tf.expand_dims(areas1, 1) + tf.expand_dims(areas2, 0) - intersections) tf.expand_dims(areas1, 1) + tf.expand_dims(areas2, 0) - intersections)
return tf.where( return tf.where(
tf.equal(intersections, 0.0), tf.equal(intersections, 0.0), tf.zeros_like(intersections),
tf.zeros_like(intersections), tf.truediv(intersections, unions)) tf.truediv(intersections, unions))
def matched_iou(boxlist1, boxlist2, scope=None): def matched_iou(boxlist1, boxlist2, scope=None):
...@@ -295,8 +298,8 @@ def matched_iou(boxlist1, boxlist2, scope=None): ...@@ -295,8 +298,8 @@ def matched_iou(boxlist1, boxlist2, scope=None):
areas2 = area(boxlist2) areas2 = area(boxlist2)
unions = areas1 + areas2 - intersections unions = areas1 + areas2 - intersections
return tf.where( return tf.where(
tf.equal(intersections, 0.0), tf.equal(intersections, 0.0), tf.zeros_like(intersections),
tf.zeros_like(intersections), tf.truediv(intersections, unions)) tf.truediv(intersections, unions))
def ioa(boxlist1, boxlist2, scope=None): def ioa(boxlist1, boxlist2, scope=None):
...@@ -320,8 +323,10 @@ def ioa(boxlist1, boxlist2, scope=None): ...@@ -320,8 +323,10 @@ def ioa(boxlist1, boxlist2, scope=None):
return tf.truediv(intersections, areas) return tf.truediv(intersections, areas)
def prune_non_overlapping_boxes( def prune_non_overlapping_boxes(boxlist1,
boxlist1, boxlist2, min_overlap=0.0, scope=None): boxlist2,
min_overlap=0.0,
scope=None):
"""Prunes the boxes in boxlist1 that overlap less than thresh with boxlist2. """Prunes the boxes in boxlist1 that overlap less than thresh with boxlist2.
For each box in boxlist1, we want its IOA to be more than minoverlap with For each box in boxlist1, we want its IOA to be more than minoverlap with
...@@ -331,7 +336,7 @@ def prune_non_overlapping_boxes( ...@@ -331,7 +336,7 @@ def prune_non_overlapping_boxes(
boxlist1: BoxList holding N boxes. boxlist1: BoxList holding N boxes.
boxlist2: BoxList holding M boxes. boxlist2: BoxList holding M boxes.
min_overlap: Minimum required overlap between boxes, to count them as min_overlap: Minimum required overlap between boxes, to count them as
overlapping. overlapping.
scope: name scope. scope: name scope.
Returns: Returns:
...@@ -361,8 +366,8 @@ def prune_small_boxes(boxlist, min_side, scope=None): ...@@ -361,8 +366,8 @@ def prune_small_boxes(boxlist, min_side, scope=None):
""" """
with tf.name_scope(scope, 'PruneSmallBoxes'): with tf.name_scope(scope, 'PruneSmallBoxes'):
height, width = height_width(boxlist) height, width = height_width(boxlist)
is_valid = tf.logical_and(tf.greater_equal(width, min_side), is_valid = tf.logical_and(
tf.greater_equal(height, min_side)) tf.greater_equal(width, min_side), tf.greater_equal(height, min_side))
return gather(boxlist, tf.reshape(tf.where(is_valid), [-1])) return gather(boxlist, tf.reshape(tf.where(is_valid), [-1]))
...@@ -389,9 +394,10 @@ def change_coordinate_frame(boxlist, window, scope=None): ...@@ -389,9 +394,10 @@ def change_coordinate_frame(boxlist, window, scope=None):
with tf.name_scope(scope, 'ChangeCoordinateFrame'): with tf.name_scope(scope, 'ChangeCoordinateFrame'):
win_height = window[2] - window[0] win_height = window[2] - window[0]
win_width = window[3] - window[1] win_width = window[3] - window[1]
boxlist_new = scale(box_list.BoxList( boxlist_new = scale(
boxlist.get() - [window[0], window[1], window[0], window[1]]), box_list.BoxList(boxlist.get() -
1.0 / win_height, 1.0 / win_width) [window[0], window[1], window[0], window[1]]),
1.0 / win_height, 1.0 / win_width)
boxlist_new = _copy_extra_fields(boxlist_new, boxlist) boxlist_new = _copy_extra_fields(boxlist_new, boxlist)
return boxlist_new return boxlist_new
...@@ -420,13 +426,17 @@ def sq_dist(boxlist1, boxlist2, scope=None): ...@@ -420,13 +426,17 @@ def sq_dist(boxlist1, boxlist2, scope=None):
with tf.name_scope(scope, 'SqDist'): with tf.name_scope(scope, 'SqDist'):
sqnorm1 = tf.reduce_sum(tf.square(boxlist1.get()), 1, keep_dims=True) sqnorm1 = tf.reduce_sum(tf.square(boxlist1.get()), 1, keep_dims=True)
sqnorm2 = tf.reduce_sum(tf.square(boxlist2.get()), 1, keep_dims=True) sqnorm2 = tf.reduce_sum(tf.square(boxlist2.get()), 1, keep_dims=True)
innerprod = tf.matmul(boxlist1.get(), boxlist2.get(), innerprod = tf.matmul(
transpose_a=False, transpose_b=True) boxlist1.get(), boxlist2.get(), transpose_a=False, transpose_b=True)
return sqnorm1 + tf.transpose(sqnorm2) - 2.0 * innerprod return sqnorm1 + tf.transpose(sqnorm2) - 2.0 * innerprod
def boolean_mask(boxlist, indicator, fields=None, scope=None, def boolean_mask(boxlist,
use_static_shapes=False, indicator_sum=None): indicator,
fields=None,
scope=None,
use_static_shapes=False,
indicator_sum=None):
"""Select boxes from BoxList according to indicator and return new BoxList. """Select boxes from BoxList according to indicator and return new BoxList.
`boolean_mask` returns the subset of boxes that are marked as "True" by the `boolean_mask` returns the subset of boxes that are marked as "True" by the
...@@ -463,8 +473,7 @@ def boolean_mask(boxlist, indicator, fields=None, scope=None, ...@@ -463,8 +473,7 @@ def boolean_mask(boxlist, indicator, fields=None, scope=None,
raise ValueError('`indicator_sum` must be a of type int') raise ValueError('`indicator_sum` must be a of type int')
selected_positions = tf.cast(indicator, dtype=tf.float32) selected_positions = tf.cast(indicator, dtype=tf.float32)
indexed_positions = tf.cast( indexed_positions = tf.cast(
tf.multiply( tf.multiply(tf.cumsum(selected_positions), selected_positions),
tf.cumsum(selected_positions), selected_positions),
dtype=tf.int32) dtype=tf.int32)
one_hot_selector = tf.one_hot( one_hot_selector = tf.one_hot(
indexed_positions - 1, indicator_sum, dtype=tf.float32) indexed_positions - 1, indicator_sum, dtype=tf.float32)
...@@ -541,9 +550,8 @@ def concatenate(boxlists, fields=None, scope=None): ...@@ -541,9 +550,8 @@ def concatenate(boxlists, fields=None, scope=None):
Args: Args:
boxlists: list of BoxList objects boxlists: list of BoxList objects
fields: optional list of fields to also concatenate. By default, all fields: optional list of fields to also concatenate. By default, all fields
fields from the first BoxList in the list are included in the from the first BoxList in the list are included in the concatenation.
concatenation.
scope: name scope. scope: name scope.
Returns: Returns:
...@@ -637,8 +645,8 @@ def visualize_boxes_in_image(image, boxlist, normalized=False, scope=None): ...@@ -637,8 +645,8 @@ def visualize_boxes_in_image(image, boxlist, normalized=False, scope=None):
Args: Args:
image: an image tensor with shape [height, width, 3] image: an image tensor with shape [height, width, 3]
boxlist: a BoxList boxlist: a BoxList
normalized: (boolean) specify whether corners are to be interpreted normalized: (boolean) specify whether corners are to be interpreted as
as absolute coordinates in image space or normalized with respect to the absolute coordinates in image space or normalized with respect to the
image size. image size.
scope: name scope. scope: name scope.
...@@ -648,8 +656,7 @@ def visualize_boxes_in_image(image, boxlist, normalized=False, scope=None): ...@@ -648,8 +656,7 @@ def visualize_boxes_in_image(image, boxlist, normalized=False, scope=None):
with tf.name_scope(scope, 'VisualizeBoxesInImage'): with tf.name_scope(scope, 'VisualizeBoxesInImage'):
if not normalized: if not normalized:
height, width, _ = tf.unstack(tf.shape(image)) height, width, _ = tf.unstack(tf.shape(image))
boxlist = scale(boxlist, boxlist = scale(boxlist, 1.0 / tf.cast(height, tf.float32),
1.0 / tf.cast(height, tf.float32),
1.0 / tf.cast(width, tf.float32)) 1.0 / tf.cast(width, tf.float32))
corners = tf.expand_dims(boxlist.get(), 0) corners = tf.expand_dims(boxlist.get(), 0)
image = tf.expand_dims(image, 0) image = tf.expand_dims(image, 0)
...@@ -714,9 +721,8 @@ def filter_greater_than(boxlist, thresh, scope=None): ...@@ -714,9 +721,8 @@ def filter_greater_than(boxlist, thresh, scope=None):
if len(scores.shape.as_list()) == 2 and scores.shape.as_list()[1] != 1: if len(scores.shape.as_list()) == 2 and scores.shape.as_list()[1] != 1:
raise ValueError('Scores should have rank 1 or have shape ' raise ValueError('Scores should have rank 1 or have shape '
'consistent with [None, 1]') 'consistent with [None, 1]')
high_score_indices = tf.cast(tf.reshape( high_score_indices = tf.cast(
tf.where(tf.greater(scores, thresh)), tf.reshape(tf.where(tf.greater(scores, thresh)), [-1]), tf.int32)
[-1]), tf.int32)
return gather(boxlist, high_score_indices) return gather(boxlist, high_score_indices)
...@@ -748,8 +754,10 @@ def non_max_suppression(boxlist, thresh, max_output_size, scope=None): ...@@ -748,8 +754,10 @@ def non_max_suppression(boxlist, thresh, max_output_size, scope=None):
if not boxlist.has_field('scores'): if not boxlist.has_field('scores'):
raise ValueError('input boxlist must have \'scores\' field') raise ValueError('input boxlist must have \'scores\' field')
selected_indices = tf.image.non_max_suppression( selected_indices = tf.image.non_max_suppression(
boxlist.get(), boxlist.get_field('scores'), boxlist.get(),
max_output_size, iou_threshold=thresh) boxlist.get_field('scores'),
max_output_size,
iou_threshold=thresh)
return gather(boxlist, selected_indices) return gather(boxlist, selected_indices)
...@@ -768,8 +776,11 @@ def _copy_extra_fields(boxlist_to_copy_to, boxlist_to_copy_from): ...@@ -768,8 +776,11 @@ def _copy_extra_fields(boxlist_to_copy_to, boxlist_to_copy_from):
return boxlist_to_copy_to return boxlist_to_copy_to
def to_normalized_coordinates(boxlist, height, width, def to_normalized_coordinates(boxlist,
check_range=True, scope=None): height,
width,
check_range=True,
scope=None):
"""Converts absolute box coordinates to normalized coordinates in [0, 1]. """Converts absolute box coordinates to normalized coordinates in [0, 1].
Usually one uses the dynamic shape of the image or conv-layer tensor: Usually one uses the dynamic shape of the image or conv-layer tensor:
...@@ -797,8 +808,9 @@ def to_normalized_coordinates(boxlist, height, width, ...@@ -797,8 +808,9 @@ def to_normalized_coordinates(boxlist, height, width,
if check_range: if check_range:
max_val = tf.reduce_max(boxlist.get()) max_val = tf.reduce_max(boxlist.get())
max_assert = tf.Assert(tf.greater(max_val, 1.01), max_assert = tf.Assert(
['max value is lower than 1.01: ', max_val]) tf.greater(max_val, 1.01),
['max value is lower than 1.01: ', max_val])
with tf.control_dependencies([max_assert]): with tf.control_dependencies([max_assert]):
width = tf.identity(width) width = tf.identity(width)
...@@ -822,8 +834,8 @@ def to_absolute_coordinates(boxlist, ...@@ -822,8 +834,8 @@ def to_absolute_coordinates(boxlist,
height: Maximum value for height of absolute box coordinates. height: Maximum value for height of absolute box coordinates.
width: Maximum value for width of absolute box coordinates. width: Maximum value for width of absolute box coordinates.
check_range: If True, checks if the coordinates are normalized or not. check_range: If True, checks if the coordinates are normalized or not.
maximum_normalized_coordinate: Maximum coordinate value to be considered maximum_normalized_coordinate: Maximum coordinate value to be considered as
as normalized, default to 1.1. normalized, default to 1.1.
scope: name scope. scope: name scope.
Returns: Returns:
...@@ -838,9 +850,10 @@ def to_absolute_coordinates(boxlist, ...@@ -838,9 +850,10 @@ def to_absolute_coordinates(boxlist,
if check_range: if check_range:
box_maximum = tf.reduce_max(boxlist.get()) box_maximum = tf.reduce_max(boxlist.get())
max_assert = tf.Assert( max_assert = tf.Assert(
tf.greater_equal(maximum_normalized_coordinate, box_maximum), tf.greater_equal(maximum_normalized_coordinate, box_maximum), [
['maximum box coordinate value is larger ' 'maximum box coordinate value is larger '
'than %f: ' % maximum_normalized_coordinate, box_maximum]) 'than %f: ' % maximum_normalized_coordinate, box_maximum
])
with tf.control_dependencies([max_assert]): with tf.control_dependencies([max_assert]):
width = tf.identity(width) width = tf.identity(width)
...@@ -924,13 +937,15 @@ def refine_boxes(pool_boxes, ...@@ -924,13 +937,15 @@ def refine_boxes(pool_boxes,
if not pool_boxes.has_field('scores'): if not pool_boxes.has_field('scores'):
raise ValueError('pool_boxes must have a \'scores\' field') raise ValueError('pool_boxes must have a \'scores\' field')
nms_boxes = non_max_suppression( nms_boxes = non_max_suppression(pool_boxes, nms_iou_thresh,
pool_boxes, nms_iou_thresh, nms_max_detections) nms_max_detections)
return box_voting(nms_boxes, pool_boxes, voting_iou_thresh) return box_voting(nms_boxes, pool_boxes, voting_iou_thresh)
def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5): def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5):
"""Performs box voting as described in S. Gidaris and N. Komodakis, ICCV 2015. """Performs box voting as described in S. Gidaris and N.
Komodakis, ICCV 2015.
Performs box voting as described in 'Object detection via a multi-region & Performs box voting as described in 'Object detection via a multi-region &
semantic segmentation-aware CNN model', Gidaris and Komodakis, ICCV 2015. For semantic segmentation-aware CNN model', Gidaris and Komodakis, ICCV 2015. For
...@@ -972,9 +987,10 @@ def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5): ...@@ -972,9 +987,10 @@ def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5):
# match to any boxes in pool_boxes. For such boxes without any matches, we # match to any boxes in pool_boxes. For such boxes without any matches, we
# should return the original boxes without voting. # should return the original boxes without voting.
match_assert = tf.Assert( match_assert = tf.Assert(
tf.reduce_all(tf.greater(num_matches, 0)), tf.reduce_all(tf.greater(num_matches, 0)), [
['Each box in selected_boxes must match with at least one box ' 'Each box in selected_boxes must match with at least one box '
'in pool_boxes.']) 'in pool_boxes.'
])
scores = tf.expand_dims(pool_boxes.get_field('scores'), 1) scores = tf.expand_dims(pool_boxes.get_field('scores'), 1)
scores_assert = tf.Assert( scores_assert = tf.Assert(
...@@ -993,9 +1009,7 @@ def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5): ...@@ -993,9 +1009,7 @@ def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5):
return averaged_boxes return averaged_boxes
def get_minimal_coverage_box(boxlist, def get_minimal_coverage_box(boxlist, default_box=None, scope=None):
default_box=None,
scope=None):
"""Creates a single bounding box which covers all boxes in the boxlist. """Creates a single bounding box which covers all boxes in the boxlist.
Args: Args:
...@@ -1045,9 +1059,9 @@ def sample_boxes_by_jittering(boxlist, ...@@ -1045,9 +1059,9 @@ def sample_boxes_by_jittering(boxlist,
boxlist: A boxlist containing N boxes in normalized coordinates. boxlist: A boxlist containing N boxes in normalized coordinates.
num_boxes_to_sample: A positive integer containing the number of boxes to num_boxes_to_sample: A positive integer containing the number of boxes to
sample. sample.
stddev: Standard deviation. This is used to draw random offsets for the stddev: Standard deviation. This is used to draw random offsets for the box
box corners from a normal distribution. The offset is multiplied by the corners from a normal distribution. The offset is multiplied by the box
box size so will be larger in terms of pixels for larger boxes. size so will be larger in terms of pixels for larger boxes.
scope: Name scope. scope: Name scope.
Returns: Returns:
...@@ -1056,11 +1070,10 @@ def sample_boxes_by_jittering(boxlist, ...@@ -1056,11 +1070,10 @@ def sample_boxes_by_jittering(boxlist,
""" """
with tf.name_scope(scope, 'SampleBoxesByJittering'): with tf.name_scope(scope, 'SampleBoxesByJittering'):
num_boxes = boxlist.num_boxes() num_boxes = boxlist.num_boxes()
box_indices = tf.random_uniform( box_indices = tf.random_uniform([num_boxes_to_sample],
[num_boxes_to_sample], minval=0,
minval=0, maxval=num_boxes,
maxval=num_boxes, dtype=tf.int32)
dtype=tf.int32)
sampled_boxes = tf.gather(boxlist.get(), box_indices) sampled_boxes = tf.gather(boxlist.get(), box_indices)
sampled_boxes_height = sampled_boxes[:, 2] - sampled_boxes[:, 0] sampled_boxes_height = sampled_boxes[:, 2] - sampled_boxes[:, 0]
sampled_boxes_width = sampled_boxes[:, 3] - sampled_boxes[:, 1] sampled_boxes_width = sampled_boxes[:, 3] - sampled_boxes[:, 1]
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""Faster RCNN box coder. """Faster RCNN box coder.
Faster RCNN box coder follows the coding schema described below: Faster RCNN box coder follows the coding schema described below:
...@@ -43,9 +42,9 @@ class FasterRcnnBoxCoder(box_coder.BoxCoder): ...@@ -43,9 +42,9 @@ class FasterRcnnBoxCoder(box_coder.BoxCoder):
"""Constructor for FasterRcnnBoxCoder. """Constructor for FasterRcnnBoxCoder.
Args: Args:
scale_factors: List of 4 positive scalars to scale ty, tx, th and tw. scale_factors: List of 4 positive scalars to scale ty, tx, th and tw. If
If set to None, does not perform scaling. For Faster RCNN, set to None, does not perform scaling. For Faster RCNN, the open-source
the open-source implementation recommends using [10.0, 10.0, 5.0, 5.0]. implementation recommends using [10.0, 10.0, 5.0, 5.0].
""" """
if scale_factors: if scale_factors:
assert len(scale_factors) == 4 assert len(scale_factors) == 4
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""Matcher interface and Match class. """Matcher interface and Match class.
This module defines the Matcher interface and the Match object. The job of the This module defines the Matcher interface and the Match object. The job of the
...@@ -49,9 +48,9 @@ class Match(object): ...@@ -49,9 +48,9 @@ class Match(object):
Args: Args:
match_results: Integer tensor of shape [N] with (1) match_results[i]>=0, match_results: Integer tensor of shape [N] with (1) match_results[i]>=0,
meaning that column i is matched with row match_results[i]. meaning that column i is matched with row match_results[i]. (2)
(2) match_results[i]=-1, meaning that column i is not matched. match_results[i]=-1, meaning that column i is not matched. (3)
(3) match_results[i]=-2, meaning that column i is ignored. match_results[i]=-2, meaning that column i is ignored.
Raises: Raises:
ValueError: if match_results does not have rank 1 or is not an ValueError: if match_results does not have rank 1 or is not an
...@@ -168,8 +167,7 @@ class Match(object): ...@@ -168,8 +167,7 @@ class Match(object):
def _reshape_and_cast(self, t): def _reshape_and_cast(self, t):
return tf.cast(tf.reshape(t, [-1]), tf.int32) return tf.cast(tf.reshape(t, [-1]), tf.int32)
def gather_based_on_match(self, input_tensor, unmatched_value, def gather_based_on_match(self, input_tensor, unmatched_value, ignored_value):
ignored_value):
"""Gathers elements from `input_tensor` based on match results. """Gathers elements from `input_tensor` based on match results.
For columns that are matched to a row, gathered_tensor[col] is set to For columns that are matched to a row, gathered_tensor[col] is set to
...@@ -190,16 +188,15 @@ class Match(object): ...@@ -190,16 +188,15 @@ class Match(object):
The shape of the gathered tensor is [match_results.shape[0]] + The shape of the gathered tensor is [match_results.shape[0]] +
input_tensor.shape[1:]. input_tensor.shape[1:].
""" """
input_tensor = tf.concat([tf.stack([ignored_value, unmatched_value]), input_tensor = tf.concat(
input_tensor], axis=0) [tf.stack([ignored_value, unmatched_value]), input_tensor], axis=0)
gather_indices = tf.maximum(self.match_results + 2, 0) gather_indices = tf.maximum(self.match_results + 2, 0)
gathered_tensor = tf.gather(input_tensor, gather_indices) gathered_tensor = tf.gather(input_tensor, gather_indices)
return gathered_tensor return gathered_tensor
class Matcher(object): class Matcher(object):
"""Abstract base class for matcher. """Abstract base class for matcher."""
"""
__metaclass__ = ABCMeta __metaclass__ = ABCMeta
def match(self, similarity_matrix, scope=None, **params): def match(self, similarity_matrix, scope=None, **params):
...@@ -212,8 +209,8 @@ class Matcher(object): ...@@ -212,8 +209,8 @@ class Matcher(object):
similarity_matrix: Float tensor of shape [N, M] with pairwise similarity similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
where higher value means more similar. where higher value means more similar.
scope: Op scope name. Defaults to 'Match' if None. scope: Op scope name. Defaults to 'Match' if None.
**params: Additional keyword arguments for specific implementations of **params: Additional keyword arguments for specific implementations of the
the Matcher. Matcher.
Returns: Returns:
A Match object with the results of matching. A Match object with the results of matching.
...@@ -230,8 +227,8 @@ class Matcher(object): ...@@ -230,8 +227,8 @@ class Matcher(object):
Args: Args:
similarity_matrix: Float tensor of shape [N, M] with pairwise similarity similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
where higher value means more similar. where higher value means more similar.
**params: Additional keyword arguments for specific implementations of **params: Additional keyword arguments for specific implementations of the
the Matcher. Matcher.
Returns: Returns:
match_results: Integer tensor of shape [M]: match_results[i]>=0 means match_results: Integer tensor of shape [M]: match_results[i]>=0 means
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""Base minibatch sampler module. """Base minibatch sampler module.
The job of the minibatch_sampler is to subsample a minibatch based on some The job of the minibatch_sampler is to subsample a minibatch based on some
...@@ -53,8 +52,8 @@ class MinibatchSampler(object): ...@@ -53,8 +52,8 @@ class MinibatchSampler(object):
Args: Args:
indicator: boolean tensor of shape [N] whose True entries can be sampled. indicator: boolean tensor of shape [N] whose True entries can be sampled.
batch_size: desired batch size. batch_size: desired batch size.
**params: additional keyword arguments for specific implementations of **params: additional keyword arguments for specific implementations of the
the MinibatchSampler. MinibatchSampler.
Returns: Returns:
sample_indicator: boolean tensor of shape [N] whose True entries have been sample_indicator: boolean tensor of shape [N] whose True entries have been
...@@ -72,8 +71,8 @@ class MinibatchSampler(object): ...@@ -72,8 +71,8 @@ class MinibatchSampler(object):
is returned. is returned.
Args: Args:
indicator: a 1-dimensional boolean tensor indicating which elements indicator: a 1-dimensional boolean tensor indicating which elements are
are allowed to be sampled and which are not. allowed to be sampled and which are not.
num_samples: int32 scalar tensor num_samples: int32 scalar tensor
Returns: Returns:
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""A module for helper tensorflow ops. """A module for helper tensorflow ops.
This is originally implemented in TensorFlow Object Detection API. This is originally implemented in TensorFlow Object Detection API.
...@@ -37,7 +36,7 @@ def indices_to_dense_vector(indices, ...@@ -37,7 +36,7 @@ def indices_to_dense_vector(indices,
Args: Args:
indices: 1d Tensor with integer indices which are to be set to indices: 1d Tensor with integer indices which are to be set to
indices_values. indices_values.
size: scalar with size (integer) of output Tensor. size: scalar with size (integer) of output Tensor.
indices_value: values of elements specified by indices in the output vector indices_value: values of elements specified by indices in the output vector
default_value: values of other elements in the output vector. default_value: values of other elements in the output vector.
...@@ -61,10 +60,10 @@ def matmul_gather_on_zeroth_axis(params, indices, scope=None): ...@@ -61,10 +60,10 @@ def matmul_gather_on_zeroth_axis(params, indices, scope=None):
TODO(rathodv, jonathanhuang): enable sparse matmul option. TODO(rathodv, jonathanhuang): enable sparse matmul option.
Args: Args:
params: A float32 Tensor. The tensor from which to gather values. params: A float32 Tensor. The tensor from which to gather values. Must be at
Must be at least rank 1. least rank 1.
indices: A Tensor. Must be one of the following types: int32, int64. indices: A Tensor. Must be one of the following types: int32, int64. Must be
Must be in range [0, params.shape[0]) in range [0, params.shape[0])
scope: A name for the operation (optional). scope: A name for the operation (optional).
Returns: Returns:
......
...@@ -50,10 +50,9 @@ def _flip_boxes_left_right(boxes): ...@@ -50,10 +50,9 @@ def _flip_boxes_left_right(boxes):
"""Left-right flip the boxes. """Left-right flip the boxes.
Args: Args:
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. Boxes
Boxes are in normalized form meaning their coordinates vary are in normalized form meaning their coordinates vary between [0, 1]. Each
between [0, 1]. row is in the form of [ymin, xmin, ymax, xmax].
Each row is in the form of [ymin, xmin, ymax, xmax].
Returns: Returns:
Flipped boxes. Flipped boxes.
...@@ -69,8 +68,8 @@ def _flip_masks_left_right(masks): ...@@ -69,8 +68,8 @@ def _flip_masks_left_right(masks):
"""Left-right flip masks. """Left-right flip masks.
Args: Args:
masks: rank 3 float32 tensor with shape masks: rank 3 float32 tensor with shape [num_instances, height, width]
[num_instances, height, width] representing instance masks. representing instance masks.
Returns: Returns:
flipped masks: rank 3 float32 tensor with shape flipped masks: rank 3 float32 tensor with shape
...@@ -79,7 +78,9 @@ def _flip_masks_left_right(masks): ...@@ -79,7 +78,9 @@ def _flip_masks_left_right(masks):
return masks[:, :, ::-1] return masks[:, :, ::-1]
def keypoint_flip_horizontal(keypoints, flip_point, flip_permutation, def keypoint_flip_horizontal(keypoints,
flip_point,
flip_permutation,
scope=None): scope=None):
"""Flips the keypoints horizontally around the flip_point. """Flips the keypoints horizontally around the flip_point.
...@@ -91,9 +92,9 @@ def keypoint_flip_horizontal(keypoints, flip_point, flip_permutation, ...@@ -91,9 +92,9 @@ def keypoint_flip_horizontal(keypoints, flip_point, flip_permutation,
flip_point: (float) scalar tensor representing the x coordinate to flip the flip_point: (float) scalar tensor representing the x coordinate to flip the
keypoints around. keypoints around.
flip_permutation: rank 1 int32 tensor containing the keypoint flip flip_permutation: rank 1 int32 tensor containing the keypoint flip
permutation. This specifies the mapping from original keypoint indices permutation. This specifies the mapping from original keypoint indices to
to the flipped keypoint indices. This is used primarily for keypoints the flipped keypoint indices. This is used primarily for keypoints that
that are not reflection invariant. E.g. Suppose there are 3 keypoints are not reflection invariant. E.g. Suppose there are 3 keypoints
representing ['head', 'right_eye', 'left_eye'], then a logical choice for representing ['head', 'right_eye', 'left_eye'], then a logical choice for
flip_permutation might be [0, 2, 1] since we want to swap the 'left_eye' flip_permutation might be [0, 2, 1] since we want to swap the 'left_eye'
and 'right_eye' after a horizontal flip. and 'right_eye' after a horizontal flip.
...@@ -190,19 +191,16 @@ def random_horizontal_flip(image, ...@@ -190,19 +191,16 @@ def random_horizontal_flip(image,
Args: Args:
image: rank 3 float32 tensor with shape [height, width, channels]. image: rank 3 float32 tensor with shape [height, width, channels].
boxes: (optional) rank 2 float32 tensor with shape [N, 4] boxes: (optional) rank 2 float32 tensor with shape [N, 4] containing the
containing the bounding boxes. bounding boxes. Boxes are in normalized form meaning their coordinates
Boxes are in normalized form meaning their coordinates vary vary between [0, 1]. Each row is in the form of [ymin, xmin, ymax, xmax].
between [0, 1]. masks: (optional) rank 3 float32 tensor with shape [num_instances, height,
Each row is in the form of [ymin, xmin, ymax, xmax]. width] containing instance masks. The masks are of the same height, width
masks: (optional) rank 3 float32 tensor with shape as the input `image`.
[num_instances, height, width] containing instance masks. The masks keypoints: (optional) rank 3 float32 tensor with shape [num_instances,
are of the same height, width as the input `image`. num_keypoints, 2]. The keypoints are in y-x normalized coordinates.
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x
normalized coordinates.
keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip
permutation. permutation.
seed: random seed seed: random seed
Returns: Returns:
...@@ -369,20 +367,19 @@ def resize_to_range(image, ...@@ -369,20 +367,19 @@ def resize_to_range(image,
Args: Args:
image: A 3D tensor of shape [height, width, channels] image: A 3D tensor of shape [height, width, channels]
masks: (optional) rank 3 float32 tensor with shape masks: (optional) rank 3 float32 tensor with shape [num_instances, height,
[num_instances, height, width] containing instance masks. width] containing instance masks.
min_dimension: (optional) (scalar) desired size of the smaller image min_dimension: (optional) (scalar) desired size of the smaller image
dimension. dimension.
max_dimension: (optional) (scalar) maximum allowed size max_dimension: (optional) (scalar) maximum allowed size of the larger image
of the larger image dimension. dimension.
method: (optional) interpolation method used in resizing. Defaults to method: (optional) interpolation method used in resizing. Defaults to
BILINEAR. BILINEAR.
align_corners: bool. If true, exactly align all 4 corners of the input align_corners: bool. If true, exactly align all 4 corners of the input and
and output. Defaults to False. output. Defaults to False.
pad_to_max_dimension: Whether to resize the image and pad it with zeros pad_to_max_dimension: Whether to resize the image and pad it with zeros so
so the resulting image is of the spatial size the resulting image is of the spatial size [max_dimension, max_dimension].
[max_dimension, max_dimension]. If masks are included they are padded If masks are included they are padded similarly.
similarly.
Returns: Returns:
Note that the position of the resized_image_shape changes based on whether Note that the position of the resized_image_shape changes based on whether
...@@ -410,8 +407,8 @@ def resize_to_range(image, ...@@ -410,8 +407,8 @@ def resize_to_range(image,
new_image = tf.image.resize(image, new_size[:-1], method=method) new_image = tf.image.resize(image, new_size[:-1], method=method)
if pad_to_max_dimension: if pad_to_max_dimension:
new_image = tf.image.pad_to_bounding_box( new_image = tf.image.pad_to_bounding_box(new_image, 0, 0, max_dimension,
new_image, 0, 0, max_dimension, max_dimension) max_dimension)
result = [new_image] result = [new_image]
if masks is not None: if masks is not None:
...@@ -422,8 +419,8 @@ def resize_to_range(image, ...@@ -422,8 +419,8 @@ def resize_to_range(image,
method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
new_masks = tf.squeeze(new_masks, 3) new_masks = tf.squeeze(new_masks, 3)
if pad_to_max_dimension: if pad_to_max_dimension:
new_masks = tf.image.pad_to_bounding_box( new_masks = tf.image.pad_to_bounding_box(new_masks, 0, 0, max_dimension,
new_masks, 0, 0, max_dimension, max_dimension) max_dimension)
result.append(new_masks) result.append(new_masks)
result.append(new_size) result.append(new_size)
...@@ -500,11 +497,10 @@ def scale_boxes_to_pixel_coordinates(image, boxes, keypoints=None): ...@@ -500,11 +497,10 @@ def scale_boxes_to_pixel_coordinates(image, boxes, keypoints=None):
Args: Args:
image: A 3D float32 tensor of shape [height, width, channels]. image: A 3D float32 tensor of shape [height, width, channels].
boxes: A 2D float32 tensor of shape [num_boxes, 4] containing the bounding boxes: A 2D float32 tensor of shape [num_boxes, 4] containing the bounding
boxes in normalized coordinates. Each row is of the form boxes in normalized coordinates. Each row is of the form [ymin, xmin,
[ymin, xmin, ymax, xmax]. ymax, xmax].
keypoints: (optional) rank 3 float32 tensor with shape keypoints: (optional) rank 3 float32 tensor with shape [num_instances,
[num_instances, num_keypoints, 2]. The keypoints are in y-x normalized num_keypoints, 2]. The keypoints are in y-x normalized coordinates.
coordinates.
Returns: Returns:
image: unchanged input image. image: unchanged input image.
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""Region Similarity Calculators for BoxLists. """Region Similarity Calculators for BoxLists.
Region Similarity Calculators compare a pairwise measure of similarity Region Similarity Calculators compare a pairwise measure of similarity
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment