detection.py 8.08 KB
Newer Older
Yeqing Li's avatar
Yeqing Li committed
1
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Abdullah Rashwan's avatar
Abdullah Rashwan committed
2
3
4
5
6
7
8
9
10
11
12
13
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Yeqing Li's avatar
Yeqing Li committed
14
15

# Lint as: python3
Abdullah Rashwan's avatar
Abdullah Rashwan committed
16
17
"""Detection input and model functions for serving/inference."""

18
from typing import Mapping, Text
Abdullah Rashwan's avatar
Abdullah Rashwan committed
19
20
21
22
23
import tensorflow as tf

from official.vision.beta import configs
from official.vision.beta.modeling import factory
from official.vision.beta.ops import anchor
Abdullah Rashwan's avatar
Abdullah Rashwan committed
24
from official.vision.beta.ops import box_ops
Abdullah Rashwan's avatar
Abdullah Rashwan committed
25
26
27
28
29
30
31
32
33
34
35
from official.vision.beta.ops import preprocess_ops
from official.vision.beta.serving import export_base


MEAN_RGB = (0.485 * 255, 0.456 * 255, 0.406 * 255)
STDDEV_RGB = (0.229 * 255, 0.224 * 255, 0.225 * 255)


class DetectionModule(export_base.ExportModule):
  """Detection Module."""

Hongkun Yu's avatar
Hongkun Yu committed
36
  def _build_model(self):
Abdullah Rashwan's avatar
Abdullah Rashwan committed
37
38

    if self._batch_size is None:
Fan Yang's avatar
Fan Yang committed
39
      raise ValueError('batch_size cannot be None for detection models.')
Abdullah Rashwan's avatar
Abdullah Rashwan committed
40
41
42
    input_specs = tf.keras.layers.InputSpec(shape=[self._batch_size] +
                                            self._input_image_size + [3])

Hongkun Yu's avatar
Hongkun Yu committed
43
44
45
46
47
48
    if isinstance(self.params.task.model, configs.maskrcnn.MaskRCNN):
      model = factory.build_maskrcnn(
          input_specs=input_specs, model_config=self.params.task.model)
    elif isinstance(self.params.task.model, configs.retinanet.RetinaNet):
      model = factory.build_retinanet(
          input_specs=input_specs, model_config=self.params.task.model)
Abdullah Rashwan's avatar
Abdullah Rashwan committed
49
50
    else:
      raise ValueError('Detection module not implemented for {} model.'.format(
Hongkun Yu's avatar
Hongkun Yu committed
51
          type(self.params.task.model)))
Abdullah Rashwan's avatar
Abdullah Rashwan committed
52

Hongkun Yu's avatar
Hongkun Yu committed
53
    return model
Abdullah Rashwan's avatar
Abdullah Rashwan committed
54

Fan Yang's avatar
Fan Yang committed
55
56
57
58
59
60
61
62
63
64
65
66
  def _build_anchor_boxes(self):
    """Builds and returns anchor boxes."""
    model_params = self.params.task.model
    input_anchor = anchor.build_anchor_generator(
        min_level=model_params.min_level,
        max_level=model_params.max_level,
        num_scales=model_params.anchor.num_scales,
        aspect_ratios=model_params.anchor.aspect_ratios,
        anchor_size=model_params.anchor.anchor_size)
    return input_anchor(
        image_size=(self._input_image_size[0], self._input_image_size[1]))

Abdullah Rashwan's avatar
Abdullah Rashwan committed
67
  def _build_inputs(self, image):
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
68
    """Builds detection model inputs for serving."""
Hongkun Yu's avatar
Hongkun Yu committed
69
    model_params = self.params.task.model
Abdullah Rashwan's avatar
Abdullah Rashwan committed
70
71
72
73
74
75
76
77
78
79
80
81
    # Normalizes image with mean and std pixel values.
    image = preprocess_ops.normalize_image(image,
                                           offset=MEAN_RGB,
                                           scale=STDDEV_RGB)

    image, image_info = preprocess_ops.resize_and_crop_image(
        image,
        self._input_image_size,
        padded_size=preprocess_ops.compute_padded_size(
            self._input_image_size, 2**model_params.max_level),
        aug_scale_min=1.0,
        aug_scale_max=1.0)
Fan Yang's avatar
Fan Yang committed
82
    anchor_boxes = self._build_anchor_boxes()
Abdullah Rashwan's avatar
Abdullah Rashwan committed
83

A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
84
    return image, anchor_boxes, image_info
Abdullah Rashwan's avatar
Abdullah Rashwan committed
85

86
87
88
  def preprocess(self, images: tf.Tensor) -> (
      tf.Tensor, Mapping[Text, tf.Tensor], tf.Tensor):
    """Preprocess inputs to be suitable for the model.
Abdullah Rashwan's avatar
Abdullah Rashwan committed
89
90

    Args:
91
      images: The images tensor.
Abdullah Rashwan's avatar
Abdullah Rashwan committed
92
    Returns:
93
94
95
96
      images: The images tensor cast to float.
      anchor_boxes: Dict mapping anchor levels to anchor boxes.
      image_info: Tensor containing the details of the image resizing.

Abdullah Rashwan's avatar
Abdullah Rashwan committed
97
    """
Hongkun Yu's avatar
Hongkun Yu committed
98
    model_params = self.params.task.model
Abdullah Rashwan's avatar
Abdullah Rashwan committed
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
    with tf.device('cpu:0'):
      images = tf.cast(images, dtype=tf.float32)

      # Tensor Specs for map_fn outputs (images, anchor_boxes, and image_info).
      images_spec = tf.TensorSpec(shape=self._input_image_size + [3],
                                  dtype=tf.float32)

      num_anchors = model_params.anchor.num_scales * len(
          model_params.anchor.aspect_ratios) * 4
      anchor_shapes = []
      for level in range(model_params.min_level, model_params.max_level + 1):
        anchor_level_spec = tf.TensorSpec(
            shape=[
                self._input_image_size[0] // 2**level,
                self._input_image_size[1] // 2**level, num_anchors
            ],
            dtype=tf.float32)
        anchor_shapes.append((str(level), anchor_level_spec))

A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
118
      image_info_spec = tf.TensorSpec(shape=[4, 2], dtype=tf.float32)
Abdullah Rashwan's avatar
Abdullah Rashwan committed
119

A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
120
      images, anchor_boxes, image_info = tf.nest.map_structure(
Abdullah Rashwan's avatar
Abdullah Rashwan committed
121
122
123
124
125
          tf.identity,
          tf.map_fn(
              self._build_inputs,
              elems=images,
              fn_output_signature=(images_spec, dict(anchor_shapes),
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
126
                                   image_info_spec),
Abdullah Rashwan's avatar
Abdullah Rashwan committed
127
128
              parallel_iterations=32))

129
130
131
132
133
134
135
136
137
138
139
      return images, anchor_boxes, image_info

  def serve(self, images: tf.Tensor):
    """Cast image to float and run inference.

    Args:
      images: uint8 Tensor of shape [batch_size, None, None, 3]
    Returns:
      Tensor holding detection output logits.
    """

Fan Yang's avatar
Fan Yang committed
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
    # Skip image preprocessing when input_type is tflite so it is compatible
    # with TFLite quantization.
    if self._input_type != 'tflite':
      images, anchor_boxes, image_info = self.preprocess(images)
    else:
      with tf.device('cpu:0'):
        anchor_boxes = self._build_anchor_boxes()
        # image_info is a 3D tensor of shape [batch_size, 4, 2]. It is in the
        # format of [[original_height, original_width],
        # [desired_height, desired_width], [y_scale, x_scale],
        # [y_offset, x_offset]]. When input_type is tflite, input image is
        # supposed to be preprocessed already.
        image_info = tf.convert_to_tensor([[
            self._input_image_size, self._input_image_size, [1.0, 1.0], [0, 0]
        ]],
                                          dtype=tf.float32)
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
156
157
    input_image_shape = image_info[:, 1, :]

158
159
160
    # To overcome keras.Model extra limitation to save a model with layers that
    # have multiple inputs, we use `model.call` here to trigger the forward
    # path. Note that, this disables some keras magics happens in `__call__`.
Hongkun Yu's avatar
Hongkun Yu committed
161
    detections = self.model.call(
Abdullah Rashwan's avatar
Abdullah Rashwan committed
162
        images=images,
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
163
        image_shape=input_image_shape,
Abdullah Rashwan's avatar
Abdullah Rashwan committed
164
165
166
        anchor_boxes=anchor_boxes,
        training=False)

Fan Yang's avatar
Fan Yang committed
167
    if self.params.task.model.detection_generator.apply_nms:
168
169
170
171
172
173
174
175
176
      # For RetinaNet model, apply export_config.
      # TODO(huizhongc): Add export_config to fasterrcnn and maskrcnn as needed.
      if isinstance(self.params.task.model, configs.retinanet.RetinaNet):
        export_config = self.params.task.export_config
        # Normalize detection box coordinates to [0, 1].
        if export_config.output_normalized_coordinates:
          detection_boxes = (
              detections['detection_boxes'] /
              tf.tile(image_info[:, 2:3, :], [1, 1, 2]))
Abdullah Rashwan's avatar
Abdullah Rashwan committed
177
          detections['detection_boxes'] = box_ops.normalize_boxes(
178
179
180
181
182
183
184
185
186
187
188
189
              detection_boxes, image_info[:, 0:1, :])

        # Cast num_detections and detection_classes to float. This allows the
        # model inference to work on chain (go/chain) as chain requires floating
        # point outputs.
        if export_config.cast_num_detections_to_float:
          detections['num_detections'] = tf.cast(
              detections['num_detections'], dtype=tf.float32)
        if export_config.cast_detection_classes_to_float:
          detections['detection_classes'] = tf.cast(
              detections['detection_classes'], dtype=tf.float32)

Fan Yang's avatar
Fan Yang committed
190
191
192
193
194
195
196
197
198
      final_outputs = {
          'detection_boxes': detections['detection_boxes'],
          'detection_scores': detections['detection_scores'],
          'detection_classes': detections['detection_classes'],
          'num_detections': detections['num_detections']
      }
    else:
      final_outputs = {
          'decoded_boxes': detections['decoded_boxes'],
Xianzhi Du's avatar
Xianzhi Du committed
199
          'decoded_box_scores': detections['decoded_box_scores']
Fan Yang's avatar
Fan Yang committed
200
201
      }

Abdullah Rashwan's avatar
Abdullah Rashwan committed
202
    if 'detection_masks' in detections.keys():
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
203
      final_outputs['detection_masks'] = detections['detection_masks']
Abdullah Rashwan's avatar
Abdullah Rashwan committed
204

Fan Yang's avatar
Fan Yang committed
205
    final_outputs.update({'image_info': image_info})
Abdullah Rashwan's avatar
Abdullah Rashwan committed
206
    return final_outputs