Commit fd67924c authored by Xianzhi Du's avatar Xianzhi Du Committed by A. Unique TensorFlower
Browse files

Internal change

PiperOrigin-RevId: 373004044
parent 6f50b49a
......@@ -119,7 +119,12 @@ class Parser(parser.Parser):
"""Parses data for training and evaluation."""
classes = data['groundtruth_classes']
boxes = data['groundtruth_boxes']
# If not empty, `attributes` is a dict of (name, ground_truth) pairs.
# `ground_gruth` of attributes is assumed in shape [N, attribute_size].
# TODO(xianzhi): support parsing attributes weights.
attributes = data.get('groundtruth_attributes', {})
is_crowds = data['groundtruth_is_crowd']
# Skips annotations with `is_crowd` = True.
if self._skip_crowd_during_training:
num_groundtrtuhs = tf.shape(input=classes)[0]
......@@ -130,6 +135,8 @@ class Parser(parser.Parser):
false_fn=lambda: tf.cast(tf.range(num_groundtrtuhs), tf.int64))
classes = tf.gather(classes, indices)
boxes = tf.gather(boxes, indices)
for k, v in attributes.items():
attributes[k] = tf.gather(v, indices)
# Gets original image and its size.
image = data['image']
......@@ -165,6 +172,8 @@ class Parser(parser.Parser):
indices = box_ops.get_non_empty_box_indices(boxes)
boxes = tf.gather(boxes, indices)
classes = tf.gather(classes, indices)
for k, v in attributes.items():
attributes[k] = tf.gather(v, indices)
# Assigns anchors.
input_anchor = anchor.build_anchor_generator(
......@@ -176,9 +185,9 @@ class Parser(parser.Parser):
anchor_boxes = input_anchor(image_size=(image_height, image_width))
anchor_labeler = anchor.AnchorLabeler(self._match_threshold,
self._unmatched_threshold)
(cls_targets, box_targets, cls_weights,
(cls_targets, box_targets, att_targets, cls_weights,
box_weights) = anchor_labeler.label_anchors(
anchor_boxes, boxes, tf.expand_dims(classes, axis=1))
anchor_boxes, boxes, tf.expand_dims(classes, axis=1), attributes)
# Casts input image to desired data type.
image = tf.cast(image, dtype=self._dtype)
......@@ -192,6 +201,8 @@ class Parser(parser.Parser):
'box_weights': box_weights,
'image_info': image_info,
}
if att_targets:
labels['attribute_targets'] = att_targets
return image, labels
def _parse_eval_data(self, data):
......@@ -199,6 +210,10 @@ class Parser(parser.Parser):
groundtruths = {}
classes = data['groundtruth_classes']
boxes = data['groundtruth_boxes']
# If not empty, `attributes` is a dict of (name, ground_truth) pairs.
# `ground_gruth` of attributes is assumed in shape [N, attribute_size].
# TODO(xianzhi): support parsing attributes weights.
attributes = data.get('groundtruth_attributes', {})
# Gets original image and its size.
image = data['image']
......@@ -229,6 +244,8 @@ class Parser(parser.Parser):
indices = box_ops.get_non_empty_box_indices(boxes)
boxes = tf.gather(boxes, indices)
classes = tf.gather(classes, indices)
for k, v in attributes.items():
attributes[k] = tf.gather(v, indices)
# Assigns anchors.
input_anchor = anchor.build_anchor_generator(
......@@ -240,9 +257,9 @@ class Parser(parser.Parser):
anchor_boxes = input_anchor(image_size=(image_height, image_width))
anchor_labeler = anchor.AnchorLabeler(self._match_threshold,
self._unmatched_threshold)
(cls_targets, box_targets, cls_weights,
(cls_targets, box_targets, att_targets, cls_weights,
box_weights) = anchor_labeler.label_anchors(
anchor_boxes, boxes, tf.expand_dims(classes, axis=1))
anchor_boxes, boxes, tf.expand_dims(classes, axis=1), attributes)
# Casts input image to desired data type.
image = tf.cast(image, dtype=self._dtype)
......@@ -260,6 +277,8 @@ class Parser(parser.Parser):
'areas': data['groundtruth_area'],
'is_crowds': tf.cast(data['groundtruth_is_crowd'], tf.int32),
}
if 'groundtruth_attributes' in data:
groundtruths['attributes'] = data['groundtruth_attributes']
groundtruths['source_id'] = utils.process_source_id(
groundtruths['source_id'])
groundtruths = utils.pad_groundtruths_to_fixed_size(
......@@ -275,4 +294,6 @@ class Parser(parser.Parser):
'image_info': image_info,
'groundtruths': groundtruths,
}
if att_targets:
labels['attribute_targets'] = att_targets
return image, labels
......@@ -62,4 +62,8 @@ def pad_groundtruths_to_fixed_size(groundtruths: Dict[str, tf.Tensor],
groundtruths['areas'], size, -1)
groundtruths['classes'] = preprocess_ops.clip_or_pad_to_fixed_size(
groundtruths['classes'], size, -1)
if 'attributes' in groundtruths:
for k, v in groundtruths['attributes'].items():
groundtruths['attributes'][k] = preprocess_ops.clip_or_pad_to_fixed_size(
v, size, -1)
return groundtruths
......@@ -40,21 +40,29 @@ class UtilsTest(tf.test.TestCase, parameterized.TestCase):
utils.process_source_id(source_id=source_id))
@parameterized.parameters(
([[10, 20, 30, 40]], [[100]], [[0]], 10),
([[0.1, 0.2, 0.5, 0.6]], [[0.5]], [[1]], 2),
([[10, 20, 30, 40]], [[100]], [[0]], 10, None),
([[0.1, 0.2, 0.5, 0.6]], [[0.5]], [[1]], 2, [[1.0, 2.0]]),
)
def test_pad_groundtruths_to_fixed_size(self, boxes, area, classes, size):
def test_pad_groundtruths_to_fixed_size(self, boxes, area, classes, size,
attributes):
groundtruths = {}
groundtruths['boxes'] = tf.constant(boxes)
groundtruths['is_crowds'] = tf.constant([[0]])
groundtruths['areas'] = tf.constant(area)
groundtruths['classes'] = tf.constant(classes)
if attributes:
groundtruths['attributes'] = {'depth': tf.constant(attributes)}
actual_result = utils.pad_groundtruths_to_fixed_size(
groundtruths=groundtruths, size=size)
# Check that the first dimension is padded to the expected size.
for key in actual_result:
if key == 'attributes':
for _, v in actual_result[key].items():
pad_shape = v.shape[0]
self.assertEqual(size, pad_shape)
else:
pad_shape = actual_result[key].shape[0]
self.assertEqual(size, pad_shape)
......
......@@ -126,7 +126,7 @@ class RetinaNetModel(tf.keras.Model):
'box_outputs': raw_boxes,
}
if raw_attributes:
outputs.update({'att_outputs': raw_attributes})
outputs.update({'attribute_outputs': raw_attributes})
return outputs
else:
# Generate anchor boxes for this batch if not provided.
......@@ -166,7 +166,7 @@ class RetinaNetModel(tf.keras.Model):
if raw_attributes:
outputs.update({
'att_outputs': raw_attributes,
'attribute_outputs': raw_attributes,
'detection_attributes': final_results['detection_attributes'],
})
return outputs
......
......@@ -223,7 +223,7 @@ class RetinaNetTest(parameterized.TestCase, tf.test.TestCase):
4 * num_anchors_per_location
], box_outputs[str(level)].numpy().shape)
if has_att_heads:
att_outputs = model_outputs['att_outputs']
att_outputs = model_outputs['attribute_outputs']
for att in att_outputs.values():
self.assertAllEqual([
2, image_size[0] // 2**level, image_size[1] // 2**level,
......
......@@ -140,7 +140,11 @@ class AnchorLabeler(object):
force_match_for_each_col=True)
self.box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
def label_anchors(self, anchor_boxes, gt_boxes, gt_labels):
def label_anchors(self,
anchor_boxes,
gt_boxes,
gt_labels,
gt_attributes=None):
"""Labels anchors with ground truth inputs.
Args:
......@@ -150,6 +154,9 @@ class AnchorLabeler(object):
For each row, it stores [y0, x0, y1, x1] for four corners of a box.
gt_labels: A integer tensor with shape [N, 1] representing groundtruth
classes.
gt_attributes: If not None, a dict of (name, gt_attribute) pairs.
`gt_attribute` is a float tensor with shape [N, attribute_size]
representing groundtruth attributes.
Returns:
cls_targets_dict: ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
......@@ -160,6 +167,12 @@ class AnchorLabeler(object):
shape [height_l, width_l, num_anchors_per_location * 4]. The height_l
and width_l represent the dimension of bounding box regression output at
l-th level.
attribute_targets_dict: a dict with (name, attribute_targets) pairs. Each
`attribute_targets` represents an ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, num_anchors_per_location * attribute_size].
The height_l and width_l represent the dimension of attribute prediction
output at l-th level.
cls_weights: A flattened Tensor with shape [batch_size, num_anchors], that
serves as masking / sample weight for classification loss. Its value
is 1.0 for positive and negative matched anchors, and 0.0 for ignored
......@@ -175,11 +188,19 @@ class AnchorLabeler(object):
flattened_anchor_boxes = tf.concat(flattened_anchor_boxes, axis=0)
similarity_matrix = self.similarity_calc(flattened_anchor_boxes, gt_boxes)
match_indices, match_indicators = self.matcher(similarity_matrix)
mask = tf.less_equal(match_indicators, 0)
cls_mask = tf.expand_dims(mask, -1)
cls_targets = self.target_gather(gt_labels, match_indices, cls_mask, -1)
box_mask = tf.tile(cls_mask, [1, 4])
box_targets = self.target_gather(gt_boxes, match_indices, box_mask)
att_targets = {}
if gt_attributes:
for k, v in gt_attributes.items():
att_size = v.get_shape().as_list()[-1]
att_mask = tf.tile(cls_mask, [1, att_size])
att_targets[k] = self.target_gather(v, match_indices, att_mask, -1)
weights = tf.squeeze(tf.ones_like(gt_labels, dtype=tf.float32), -1)
box_weights = self.target_gather(weights, match_indices, mask)
ignore_mask = tf.equal(match_indicators, -2)
......@@ -191,8 +212,11 @@ class AnchorLabeler(object):
# Unpacks labels into multi-level representations.
cls_targets_dict = unpack_targets(cls_targets, anchor_boxes)
box_targets_dict = unpack_targets(box_targets, anchor_boxes)
attribute_targets_dict = {}
for k, v in att_targets.items():
attribute_targets_dict[k] = unpack_targets(v, anchor_boxes)
return cls_targets_dict, box_targets_dict, cls_weights, box_weights
return cls_targets_dict, box_targets_dict, attribute_targets_dict, cls_weights, box_weights
class RpnAnchorLabeler(AnchorLabeler):
......
......@@ -107,12 +107,15 @@ class AnchorTest(parameterized.TestCase, tf.test.TestCase):
self.assertEqual(expected_boxes, boxes.tolist())
@parameterized.parameters(
(3, 6, 2, [1.0], 2.0),
(3, 6, 2, [1.0], 2.0, False),
(3, 6, 2, [1.0], 2.0, True),
)
def testLabelAnchors(self, min_level, max_level, num_scales,
aspect_ratios, anchor_size):
def testLabelAnchors(self, min_level, max_level, num_scales, aspect_ratios,
anchor_size, has_attribute):
input_size = [512, 512]
ground_truth_class_id = 2
attribute_name = 'depth'
ground_truth_depth = 3.0
# The matched anchors are the anchors used as ground truth and the anchors
# at the next octave scale on the same location.
......@@ -126,9 +129,13 @@ class AnchorTest(parameterized.TestCase, tf.test.TestCase):
# two anchors with two intermediate scales at the same location.
gt_boxes = anchor_boxes['3'][0:1, 0, 0:4]
gt_classes = tf.constant([[ground_truth_class_id]], dtype=tf.float32)
(cls_targets, box_targets, _,
box_weights) = anchor_labeler.label_anchors(
anchor_boxes, gt_boxes, gt_classes)
gt_attributes = {
attribute_name: tf.constant([[ground_truth_depth]], dtype=tf.float32)
} if has_attribute else {}
(cls_targets, box_targets, att_targets, _,
box_weights) = anchor_labeler.label_anchors(anchor_boxes, gt_boxes,
gt_classes, gt_attributes)
for k, v in cls_targets.items():
cls_targets[k] = v.numpy()
......@@ -142,6 +149,17 @@ class AnchorTest(parameterized.TestCase, tf.test.TestCase):
# Two anchor boxes on min_level got matched to the gt_boxes.
self.assertAllClose(tf.reduce_sum(box_weights), 2)
if has_attribute:
self.assertIn(attribute_name, att_targets)
for k, v in att_targets[attribute_name].items():
att_targets[attribute_name][k] = v.numpy()
anchor_locations = np.vstack(
np.where(
att_targets[attribute_name][str(min_level)] > -1)).transpose()
self.assertAllClose(expected_anchor_locations, anchor_locations)
else:
self.assertEmpty(att_targets)
@parameterized.parameters(
(3, 7, [.5, 1., 2.], 2, 8, (256, 256)),
(3, 8, [1.], 3, 32, (512, 512)),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment