# Copyright 2022 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Lint as: python3 """Detection input and model functions for serving/inference.""" from typing import Dict, Mapping, Text import tensorflow as tf from official.projects.deepmac_maskrcnn.configs import deep_mask_head_rcnn as cfg from official.projects.deepmac_maskrcnn.modeling import maskrcnn_model from official.projects.deepmac_maskrcnn.tasks import deep_mask_head_rcnn from official.vision.ops import box_ops from official.vision.serving import detection def reverse_input_box_transformation(boxes, image_info): """Reverse the Mask R-CNN model's input boxes tranformation. Args: boxes: A [batch_size, num_boxes, 4] float tensor of boxes in normalized coordinates. image_info: a 2D `Tensor` that encodes the information of the image and the applied preprocessing. It is in the format of [[original_height, original_width], [desired_height, desired_width], [y_scale, x_scale], [y_offset, x_offset]], where [desired_height, desired_width] is the actual scaled image size, and [y_scale, x_scale] is the scaling factor, which is the ratio of scaled dimension / original dimension. Returns: boxes: Same shape as input `boxes` but in the absolute coordinate space of the preprocessed image. """ # Reversing sequence from Detection_module.serve when # output_normalized_coordinates=true scale = image_info[:, 2:3, :] scale = tf.tile(scale, [1, 1, 2]) boxes = boxes * scale height_width = image_info[:, 0:1, :] return box_ops.denormalize_boxes(boxes, height_width) class DetectionModule(detection.DetectionModule): """Detection Module.""" def _build_model(self): if self._batch_size is None: ValueError("batch_size can't be None for detection models") if self.params.task.model.detection_generator.nms_version != 'batched': ValueError('Only batched_nms is supported.') input_specs = tf.keras.layers.InputSpec(shape=[self._batch_size] + self._input_image_size + [3]) if isinstance(self.params.task.model, cfg.DeepMaskHeadRCNN): model = deep_mask_head_rcnn.build_maskrcnn( input_specs=input_specs, model_config=self.params.task.model) else: raise ValueError('Detection module not implemented for {} model.'.format( type(self.params.task.model))) return model @tf.function def inference_for_tflite_image_and_boxes( self, images: tf.Tensor, boxes: tf.Tensor) -> Mapping[str, tf.Tensor]: """A tf-function for serve_image_and_boxes. Args: images: A [batch_size, height, width, channels] float tensor. boxes: A [batch_size, num_boxes, 4] float tensor containing boxes normalized to the input image. Returns: result: A dict containing: 'detection_masks': A [batch_size, num_boxes, mask_height, mask_width] float tensor containing per-pixel mask probabilities. """ if not isinstance(self.model, maskrcnn_model.DeepMaskRCNNModel): raise ValueError( ('Can only use image and boxes input for DeepMaskRCNNModel, ' 'Found {}'.format(type(self.model)))) return self.serve_image_and_boxes(images, boxes) def serve_image_and_boxes(self, images: tf.Tensor, boxes: tf.Tensor): """Function used to export a model that consumes and image and boxes. The model predicts the class-agnostic masks at the given box locations. Args: images: A [batch_size, height, width, channels] float tensor. boxes: A [batch_size, num_boxes, 4] float tensor containing boxes normalized to the input image. Returns: result: A dict containing: 'detection_masks': A [batch_size, num_boxes, mask_height, mask_width] float tensor containing per-pixel mask probabilities. """ images, _, image_info = self.preprocess(images) boxes = reverse_input_box_transformation(boxes, image_info) result = self.model.call_images_and_boxes(images, boxes) return result def get_inference_signatures(self, function_keys: Dict[Text, Text]): signatures = {} if 'image_and_boxes_tensor' in function_keys: def_name = function_keys['image_and_boxes_tensor'] image_signature = tf.TensorSpec( shape=[self._batch_size] + [None] * len(self._input_image_size) + [self._num_channels], dtype=tf.uint8) boxes_signature = tf.TensorSpec(shape=[self._batch_size, None, 4], dtype=tf.float32) tf_function = self.inference_for_tflite_image_and_boxes signatures[def_name] = tf_function.get_concrete_function( image_signature, boxes_signature) function_keys.pop('image_and_boxes_tensor', None) parent_signatures = super(DetectionModule, self).get_inference_signatures( function_keys) signatures.update(parent_signatures) return signatures