Commit 06b2d7d7 authored by A. Unique TensorFlower's avatar A. Unique TensorFlower
Browse files

Refactor Mask RCNN export module and enable image+box model for DeepMARC.

PiperOrigin-RevId: 400811710
parent c67aad59
...@@ -209,8 +209,10 @@ class DeepMaskHead(tf.keras.layers.Layer): ...@@ -209,8 +209,10 @@ class DeepMaskHead(tf.keras.layers.Layer):
roi_width * upsample_factor], representing the mask predictions. roi_width * upsample_factor], representing the mask predictions.
""" """
roi_features, roi_classes = inputs roi_features, roi_classes = inputs
features_shape = tf.shape(roi_features)
batch_size, num_rois, height, width, filters = ( batch_size, num_rois, height, width, filters = (
roi_features.get_shape().as_list()) features_shape[0], features_shape[1], features_shape[2],
features_shape[3], features_shape[4])
if batch_size is None: if batch_size is None:
batch_size = tf.shape(roi_features)[0] batch_size = tf.shape(roi_features)[0]
......
...@@ -143,6 +143,20 @@ class DeepMaskRCNNModel(maskrcnn_model.MaskRCNNModel): ...@@ -143,6 +143,20 @@ class DeepMaskRCNNModel(maskrcnn_model.MaskRCNNModel):
model_outputs.update(model_mask_outputs) model_outputs.update(model_mask_outputs)
return model_outputs return model_outputs
def call_images_and_boxes(self, images, boxes):
"""Predict masks given an image and bounding boxes."""
_, decoder_features = self._get_backbone_and_decoder_features(images)
boxes_shape = tf.shape(boxes)
batch_size, num_boxes = boxes_shape[0], boxes_shape[1]
classes = tf.zeros((batch_size, num_boxes), dtype=tf.int32)
_, mask_probs = self._features_to_mask_outputs(
decoder_features, boxes, classes)
return {
'detection_masks': mask_probs
}
def _call_mask_outputs( def _call_mask_outputs(
self, self,
model_box_outputs: Mapping[str, tf.Tensor], model_box_outputs: Mapping[str, tf.Tensor],
...@@ -187,20 +201,22 @@ class DeepMaskRCNNModel(maskrcnn_model.MaskRCNNModel): ...@@ -187,20 +201,22 @@ class DeepMaskRCNNModel(maskrcnn_model.MaskRCNNModel):
# Mask RoI align. # Mask RoI align.
if training and self._config_dict['use_gt_boxes_for_masks']: if training and self._config_dict['use_gt_boxes_for_masks']:
logging.info('Using GT mask roi features.') logging.info('Using GT mask roi features.')
mask_roi_features = self.mask_roi_aligner(features, gt_boxes) roi_aligner_boxes = gt_boxes
raw_masks = self.mask_head([mask_roi_features, gt_classes]) mask_head_classes = gt_classes
else: else:
mask_roi_features = self.mask_roi_aligner(features, rois) roi_aligner_boxes = rois
raw_masks = self.mask_head([mask_roi_features, roi_classes]) mask_head_classes = roi_classes
mask_logits, mask_probs = self._features_to_mask_outputs(
features, roi_aligner_boxes, mask_head_classes)
# Mask head.
if training: if training:
model_outputs.update({ model_outputs.update({
'mask_outputs': raw_masks, 'mask_outputs': mask_logits,
}) })
else: else:
model_outputs.update({ model_outputs.update({
'detection_masks': tf.math.sigmoid(raw_masks), 'detection_masks': mask_probs,
}) })
return model_outputs return model_outputs
...@@ -35,6 +35,61 @@ from official.vision.beta.projects.deepmac_maskrcnn.modeling import maskrcnn_mod ...@@ -35,6 +35,61 @@ from official.vision.beta.projects.deepmac_maskrcnn.modeling import maskrcnn_mod
from official.vision.beta.projects.deepmac_maskrcnn.modeling.heads import instance_heads as deep_instance_heads from official.vision.beta.projects.deepmac_maskrcnn.modeling.heads import instance_heads as deep_instance_heads
def construct_model_and_anchors(image_size, use_gt_boxes_for_masks):
num_classes = 3
min_level = 3
max_level = 4
num_scales = 3
aspect_ratios = [1.0]
anchor_boxes = anchor.Anchor(
min_level=min_level,
max_level=max_level,
num_scales=num_scales,
aspect_ratios=aspect_ratios,
anchor_size=3,
image_size=image_size).multilevel_boxes
num_anchors_per_location = len(aspect_ratios) * num_scales
input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, 3])
backbone = resnet.ResNet(model_id=50, input_specs=input_specs)
decoder = fpn.FPN(
min_level=min_level,
max_level=max_level,
input_specs=backbone.output_specs)
rpn_head = dense_prediction_heads.RPNHead(
min_level=min_level,
max_level=max_level,
num_anchors_per_location=num_anchors_per_location)
detection_head = instance_heads.DetectionHead(
num_classes=num_classes)
roi_generator_obj = roi_generator.MultilevelROIGenerator()
roi_sampler_obj = roi_sampler.ROISampler()
roi_aligner_obj = roi_aligner.MultilevelROIAligner()
detection_generator_obj = detection_generator.DetectionGenerator()
mask_head = deep_instance_heads.DeepMaskHead(
num_classes=num_classes, upsample_factor=2)
mask_sampler_obj = mask_sampler.MaskSampler(
mask_target_size=28, num_sampled_masks=1)
mask_roi_aligner_obj = roi_aligner.MultilevelROIAligner(crop_size=14)
model = maskrcnn_model.DeepMaskRCNNModel(
backbone,
decoder,
rpn_head,
detection_head,
roi_generator_obj,
roi_sampler_obj,
roi_aligner_obj,
detection_generator_obj,
mask_head,
mask_sampler_obj,
mask_roi_aligner_obj,
use_gt_boxes_for_masks=use_gt_boxes_for_masks)
return model, anchor_boxes
class MaskRCNNModelTest(parameterized.TestCase, tf.test.TestCase): class MaskRCNNModelTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters( @parameterized.parameters(
...@@ -44,64 +99,16 @@ class MaskRCNNModelTest(parameterized.TestCase, tf.test.TestCase): ...@@ -44,64 +99,16 @@ class MaskRCNNModelTest(parameterized.TestCase, tf.test.TestCase):
(True, True,), (True, True,),
) )
def test_forward(self, use_gt_boxes_for_masks, training): def test_forward(self, use_gt_boxes_for_masks, training):
num_classes = 3
min_level = 3
max_level = 4
num_scales = 3
aspect_ratios = [1.0]
image_size = (256, 256) image_size = (256, 256)
images = np.random.rand(2, image_size[0], image_size[1], 3) images = np.random.rand(2, image_size[0], image_size[1], 3)
image_shape = np.array([[224, 100], [100, 224]]) image_shape = np.array([[224, 100], [100, 224]])
anchor_boxes = anchor.Anchor( model, anchor_boxes = construct_model_and_anchors(
min_level=min_level, image_size, use_gt_boxes_for_masks)
max_level=max_level,
num_scales=num_scales,
aspect_ratios=aspect_ratios,
anchor_size=3,
image_size=image_size).multilevel_boxes
num_anchors_per_location = len(aspect_ratios) * num_scales
input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, 3])
backbone = resnet.ResNet(model_id=50, input_specs=input_specs)
decoder = fpn.FPN(
min_level=min_level,
max_level=max_level,
input_specs=backbone.output_specs)
rpn_head = dense_prediction_heads.RPNHead(
min_level=min_level,
max_level=max_level,
num_anchors_per_location=num_anchors_per_location)
detection_head = instance_heads.DetectionHead(
num_classes=num_classes)
roi_generator_obj = roi_generator.MultilevelROIGenerator()
roi_sampler_obj = roi_sampler.ROISampler()
roi_aligner_obj = roi_aligner.MultilevelROIAligner()
detection_generator_obj = detection_generator.DetectionGenerator()
mask_head = deep_instance_heads.DeepMaskHead(
num_classes=num_classes, upsample_factor=2)
mask_sampler_obj = mask_sampler.MaskSampler(
mask_target_size=28, num_sampled_masks=1)
mask_roi_aligner_obj = roi_aligner.MultilevelROIAligner(crop_size=14)
model = maskrcnn_model.DeepMaskRCNNModel(
backbone,
decoder,
rpn_head,
detection_head,
roi_generator_obj,
roi_sampler_obj,
roi_aligner_obj,
detection_generator_obj,
mask_head,
mask_sampler_obj,
mask_roi_aligner_obj,
use_gt_boxes_for_masks=use_gt_boxes_for_masks)
gt_boxes = tf.zeros((2, 16, 4), dtype=tf.float32) gt_boxes = tf.zeros((2, 16, 4), dtype=tf.float32)
gt_masks = tf.zeros((2, 16, 32, 32)) gt_masks = tf.zeros((2, 16, 32, 32))
gt_classes = tf.zeros((2, 16), dtype=tf.int32) gt_classes = tf.zeros((2, 16), dtype=tf.int32)
results = model(images, results = model(images.astype(np.uint8),
image_shape, image_shape,
anchor_boxes, anchor_boxes,
gt_boxes, gt_boxes,
...@@ -126,6 +133,22 @@ class MaskRCNNModelTest(parameterized.TestCase, tf.test.TestCase): ...@@ -126,6 +133,22 @@ class MaskRCNNModelTest(parameterized.TestCase, tf.test.TestCase):
self.assertIn('num_detections', results) self.assertIn('num_detections', results)
self.assertIn('detection_masks', results) self.assertIn('detection_masks', results)
@parameterized.parameters(
[(1, 5), (1, 10), (1, 15), (2, 5), (2, 10), (2, 15)]
)
def test_image_and_boxes(self, batch_size, num_boxes):
image_size = (640, 640)
images = np.random.rand(1, image_size[0], image_size[1], 3).astype(
np.float32)
model, _ = construct_model_and_anchors(
image_size, use_gt_boxes_for_masks=True)
boxes = np.zeros((1, num_boxes, 4), dtype=np.float32)
boxes[:, :, [2, 3]] = 1.0
boxes = tf.constant(boxes)
results = model.call_images_and_boxes(images, boxes)
self.assertIn('detection_masks', results)
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -12,15 +12,46 @@ ...@@ -12,15 +12,46 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# Lint as: python3
"""Detection input and model functions for serving/inference.""" """Detection input and model functions for serving/inference."""
from typing import Dict, Mapping, Text
import tensorflow as tf import tensorflow as tf
from official.vision.beta.ops import box_ops
from official.vision.beta.projects.deepmac_maskrcnn.configs import deep_mask_head_rcnn as cfg from official.vision.beta.projects.deepmac_maskrcnn.configs import deep_mask_head_rcnn as cfg
from official.vision.beta.projects.deepmac_maskrcnn.modeling import maskrcnn_model
from official.vision.beta.projects.deepmac_maskrcnn.tasks import deep_mask_head_rcnn from official.vision.beta.projects.deepmac_maskrcnn.tasks import deep_mask_head_rcnn
from official.vision.beta.serving import detection from official.vision.beta.serving import detection
def reverse_input_box_transformation(boxes, image_info):
"""Reverse the Mask R-CNN model's input boxes tranformation.
Args:
boxes: A [batch_size, num_boxes, 4] float tensor of boxes in normalized
coordinates.
image_info: a 2D `Tensor` that encodes the information of the image and the
applied preprocessing. It is in the format of
[[original_height, original_width], [desired_height, desired_width],
[y_scale, x_scale], [y_offset, x_offset]], where [desired_height,
desired_width] is the actual scaled image size, and [y_scale, x_scale] is
the scaling factor, which is the ratio of
scaled dimension / original dimension.
Returns:
boxes: Same shape as input `boxes` but in the absolute coordinate space of
the preprocessed image.
"""
# Reversing sequence from Detection_module.serve when
# output_normalized_coordinates=true
scale = image_info[:, 2:3, :]
scale = tf.tile(scale, [1, 1, 2])
boxes = boxes * scale
height_width = image_info[:, 0:1, :]
return box_ops.denormalize_boxes(boxes, height_width)
class DetectionModule(detection.DetectionModule): class DetectionModule(detection.DetectionModule):
"""Detection Module.""" """Detection Module."""
...@@ -41,3 +72,68 @@ class DetectionModule(detection.DetectionModule): ...@@ -41,3 +72,68 @@ class DetectionModule(detection.DetectionModule):
type(self.params.task.model))) type(self.params.task.model)))
return model return model
@tf.function
def inference_for_tflite_image_and_boxes(
self, images: tf.Tensor, boxes: tf.Tensor) -> Mapping[str, tf.Tensor]:
"""A tf-function for serve_image_and_boxes.
Args:
images: A [batch_size, height, width, channels] float tensor.
boxes: A [batch_size, num_boxes, 4] float tensor containing boxes
normalized to the input image.
Returns:
result: A dict containing:
'detection_masks': A [batch_size, num_boxes, mask_height, mask_width]
float tensor containing per-pixel mask probabilities.
"""
if not isinstance(self.model, maskrcnn_model.DeepMaskRCNNModel):
raise ValueError(
('Can only use image and boxes input for DeepMaskRCNNModel, '
'Found {}'.format(type(self.model))))
return self.serve_image_and_boxes(images, boxes)
def serve_image_and_boxes(self, images: tf.Tensor, boxes: tf.Tensor):
"""Function used to export a model that consumes and image and boxes.
The model predicts the class-agnostic masks at the given box locations.
Args:
images: A [batch_size, height, width, channels] float tensor.
boxes: A [batch_size, num_boxes, 4] float tensor containing boxes
normalized to the input image.
Returns:
result: A dict containing:
'detection_masks': A [batch_size, num_boxes, mask_height, mask_width]
float tensor containing per-pixel mask probabilities.
"""
images, _, image_info = self.preprocess(images)
boxes = reverse_input_box_transformation(boxes, image_info)
result = self.model.call_images_and_boxes(images, boxes)
return result
def get_inference_signatures(self, function_keys: Dict[Text, Text]):
signatures = {}
if 'image_and_boxes_tensor' in function_keys:
def_name = function_keys['image_and_boxes_tensor']
image_signature = tf.TensorSpec(
shape=[self._batch_size] + [None] * len(self._input_image_size) +
[self._num_channels],
dtype=tf.uint8)
boxes_signature = tf.TensorSpec(shape=[self._batch_size, None, 4],
dtype=tf.float32)
tf_function = self.inference_for_tflite_image_and_boxes
signatures[def_name] = tf_function.get_concrete_function(
image_signature, boxes_signature)
function_keys.pop('image_and_boxes_tensor', None)
parent_signatures = super(DetectionModule, self).get_inference_signatures(
function_keys)
signatures.update(parent_signatures)
return signatures
...@@ -29,12 +29,12 @@ from official.vision.beta.projects.deepmac_maskrcnn.serving import detection ...@@ -29,12 +29,12 @@ from official.vision.beta.projects.deepmac_maskrcnn.serving import detection
class DetectionExportTest(tf.test.TestCase, parameterized.TestCase): class DetectionExportTest(tf.test.TestCase, parameterized.TestCase):
def _get_detection_module(self, experiment_name): def _get_detection_module(self, experiment_name, image_size=(640, 640)):
params = exp_factory.get_exp_config(experiment_name) params = exp_factory.get_exp_config(experiment_name)
params.task.model.backbone.resnet.model_id = 18 params.task.model.backbone.resnet.model_id = 18
params.task.model.detection_generator.use_batched_nms = True params.task.model.detection_generator.use_batched_nms = True
detection_module = detection.DetectionModule( detection_module = detection.DetectionModule(
params, batch_size=1, input_image_size=[640, 640]) params, batch_size=1, input_image_size=list(image_size))
return detection_module return detection_module
def _export_from_module(self, module, input_type, save_directory): def _export_from_module(self, module, input_type, save_directory):
...@@ -71,8 +71,9 @@ class DetectionExportTest(tf.test.TestCase, parameterized.TestCase): ...@@ -71,8 +71,9 @@ class DetectionExportTest(tf.test.TestCase, parameterized.TestCase):
('tf_example', 'deep_mask_head_rcnn_resnetfpn_coco', [640, 640]), ('tf_example', 'deep_mask_head_rcnn_resnetfpn_coco', [640, 640]),
) )
def test_export(self, input_type, experiment_name, image_size): def test_export(self, input_type, experiment_name, image_size):
self.skipTest('a')
tmp_dir = self.get_temp_dir() tmp_dir = self.get_temp_dir()
module = self._get_detection_module(experiment_name) module = self._get_detection_module(experiment_name, image_size)
self._export_from_module(module, input_type, tmp_dir) self._export_from_module(module, input_type, tmp_dir)
...@@ -108,6 +109,57 @@ class DetectionExportTest(tf.test.TestCase, parameterized.TestCase): ...@@ -108,6 +109,57 @@ class DetectionExportTest(tf.test.TestCase, parameterized.TestCase):
self.assertAllClose(outputs['num_detections'].numpy(), self.assertAllClose(outputs['num_detections'].numpy(),
expected_outputs['num_detections'].numpy()) expected_outputs['num_detections'].numpy())
@parameterized.parameters(
('deep_mask_head_rcnn_resnetfpn_coco', [640, 640], 1),
('deep_mask_head_rcnn_resnetfpn_coco', [640, 640], 5),
('deep_mask_head_rcnn_spinenet_coco', [640, 384], 3),
('deep_mask_head_rcnn_spinenet_coco', [640, 384], 9),
)
def test_export_image_and_boxes(self, experiment_name, image_size, num_boxes):
tmp_dir = self.get_temp_dir()
module = self._get_detection_module(experiment_name)
self._export_from_module(module, 'image_and_boxes_tensor', tmp_dir)
self.assertTrue(os.path.exists(os.path.join(tmp_dir, 'saved_model.pb')))
self.assertTrue(
os.path.exists(os.path.join(tmp_dir, 'variables', 'variables.index')))
self.assertTrue(
os.path.exists(
os.path.join(tmp_dir, 'variables',
'variables.data-00000-of-00001')))
imported = tf.saved_model.load(tmp_dir)
detection_fn = imported.signatures['serving_default']
images = self._get_dummy_input(
'image_tensor', batch_size=1, image_size=image_size)
processed_images, anchor_boxes, image_info = module._build_inputs(
tf.zeros(image_size + [3], dtype=tf.uint8))
image_shape = image_info[1, :]
image_shape = image_shape[tf.newaxis]
processed_images = processed_images[tf.newaxis]
image_info = image_info[tf.newaxis]
for l, l_boxes in anchor_boxes.items():
anchor_boxes[l] = tf.expand_dims(l_boxes, 0)
boxes = np.zeros((1, num_boxes, 4), dtype=np.float32)
boxes[:, :, [2, 3]] = 1.0
boxes = tf.constant(boxes)
denormalized_boxes = detection.reverse_input_box_transformation(
boxes, image_info)
expected_outputs = module.model.call_images_and_boxes(
images=processed_images, boxes=denormalized_boxes)
outputs = detection_fn(images=tf.constant(images), boxes=boxes)
self.assertAllClose(outputs['detection_masks'].numpy(),
expected_outputs['detection_masks'].numpy(),
rtol=1e-3, atol=1e-3)
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -63,7 +63,8 @@ flags.DEFINE_string( ...@@ -63,7 +63,8 @@ flags.DEFINE_string(
' on top of `config_file` template.') ' on top of `config_file` template.')
flags.DEFINE_integer('batch_size', None, 'The batch size.') flags.DEFINE_integer('batch_size', None, 'The batch size.')
flags.DEFINE_string('input_type', 'image_tensor', flags.DEFINE_string('input_type', 'image_tensor',
'One of `image_tensor`, `image_bytes`, `tf_example`.') ('One of `image_tensor`, `image_bytes`, `tf_example` '
'or `image_and_boxes_tensor`.'))
flags.DEFINE_string( flags.DEFINE_string(
'input_image_size', '224,224', 'input_image_size', '224,224',
'The comma-separated string of two integers representing the height,width ' 'The comma-separated string of two integers representing the height,width '
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment