panoptic_maskrcnn.py 17.8 KB
Newer Older
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
1
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Jaeyoun Kim's avatar
Jaeyoun Kim committed
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Panoptic MaskRCNN task definition."""
Abdullah Rashwan's avatar
Abdullah Rashwan committed
16
17
from typing import Any, Dict, List, Mapping, Optional, Tuple

Jaeyoun Kim's avatar
Jaeyoun Kim committed
18
19
20
21
22
from absl import logging
import tensorflow as tf

from official.common import dataset_fn
from official.core import task_factory
Abdullah Rashwan's avatar
Abdullah Rashwan committed
23
24
25
from official.projects.panoptic.configs import panoptic_maskrcnn as exp_cfg
from official.projects.panoptic.dataloaders import panoptic_maskrcnn_input
from official.projects.panoptic.modeling import factory
Abdullah Rashwan's avatar
Abdullah Rashwan committed
26
27
28
29
30
from official.vision.dataloaders import input_reader_factory
from official.vision.evaluation import panoptic_quality_evaluator
from official.vision.evaluation import segmentation_metrics
from official.vision.losses import segmentation_losses
from official.vision.tasks import maskrcnn
Jaeyoun Kim's avatar
Jaeyoun Kim committed
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64


@task_factory.register_task_cls(exp_cfg.PanopticMaskRCNNTask)
class PanopticMaskRCNNTask(maskrcnn.MaskRCNNTask):

  """A single-replica view of training procedure.

  Panoptic Mask R-CNN task provides artifacts for training/evalution procedures,
  including loading/iterating over Datasets, initializing the model, calculating
  the loss, post-processing, and customized metrics with reduction.
  """

  def build_model(self) -> tf.keras.Model:
    """Build Panoptic Mask R-CNN model."""

    input_specs = tf.keras.layers.InputSpec(
        shape=[None] + self.task_config.model.input_size)

    l2_weight_decay = self.task_config.losses.l2_weight_decay
    # Divide weight decay by 2.0 to match the implementation of tf.nn.l2_loss.
    # (https://www.tensorflow.org/api_docs/python/tf/keras/regularizers/l2)
    # (https://www.tensorflow.org/api_docs/python/tf/nn/l2_loss)
    l2_regularizer = (tf.keras.regularizers.l2(
        l2_weight_decay / 2.0) if l2_weight_decay else None)

    model = factory.build_panoptic_maskrcnn(
        input_specs=input_specs,
        model_config=self.task_config.model,
        l2_regularizer=l2_regularizer)
    return model

  def initialize(self, model: tf.keras.Model) -> None:
    """Loading pretrained checkpoint."""

Abdullah Rashwan's avatar
Abdullah Rashwan committed
65
    if not self.task_config.init_checkpoint:
Jaeyoun Kim's avatar
Jaeyoun Kim committed
66
67
68
      return

    def _get_checkpoint_path(checkpoint_dir_or_file):
69
      checkpoint_path = checkpoint_dir_or_file
Jaeyoun Kim's avatar
Jaeyoun Kim committed
70
71
72
73
74
75
76
77
78
79
80
      if tf.io.gfile.isdir(checkpoint_dir_or_file):
        checkpoint_path = tf.train.latest_checkpoint(
            checkpoint_dir_or_file)
      return checkpoint_path

    for init_module in self.task_config.init_checkpoint_modules:
      # Restoring checkpoint.
      if init_module == 'all':
        checkpoint_path = _get_checkpoint_path(
            self.task_config.init_checkpoint)
        ckpt = tf.train.Checkpoint(**model.checkpoint_items)
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
81
82
        status = ckpt.read(checkpoint_path)
        status.expect_partial().assert_existing_objects_matched()
Jaeyoun Kim's avatar
Jaeyoun Kim committed
83
84
85
86
87

      elif init_module == 'backbone':
        checkpoint_path = _get_checkpoint_path(
            self.task_config.init_checkpoint)
        ckpt = tf.train.Checkpoint(backbone=model.backbone)
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
88
        status = ckpt.read(checkpoint_path)
Jaeyoun Kim's avatar
Jaeyoun Kim committed
89
90
91
92
93
94
95
        status.expect_partial().assert_existing_objects_matched()

      elif init_module == 'segmentation_backbone':
        checkpoint_path = _get_checkpoint_path(
            self.task_config.segmentation_init_checkpoint)
        ckpt = tf.train.Checkpoint(
            segmentation_backbone=model.segmentation_backbone)
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
96
        status = ckpt.read(checkpoint_path)
Jaeyoun Kim's avatar
Jaeyoun Kim committed
97
98
99
100
101
102
103
        status.expect_partial().assert_existing_objects_matched()

      elif init_module == 'segmentation_decoder':
        checkpoint_path = _get_checkpoint_path(
            self.task_config.segmentation_init_checkpoint)
        ckpt = tf.train.Checkpoint(
            segmentation_decoder=model.segmentation_decoder)
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
104
        status = ckpt.read(checkpoint_path)
Jaeyoun Kim's avatar
Jaeyoun Kim committed
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
        status.expect_partial().assert_existing_objects_matched()

      else:
        raise ValueError(
            "Only 'all', 'backbone', 'segmentation_backbone' and/or "
            "segmentation_backbone' can be used to initialize the model, but "
            "got {}".format(init_module))
      logging.info('Finished loading pretrained checkpoint from %s for %s',
                   checkpoint_path, init_module)

  def build_inputs(
      self,
      params: exp_cfg.DataConfig,
      input_context: Optional[tf.distribute.InputContext] = None
  ) -> tf.data.Dataset:
    """Build input dataset."""
    decoder_cfg = params.decoder.get()
    if params.decoder.type == 'simple_decoder':
      decoder = panoptic_maskrcnn_input.TfExampleDecoder(
          regenerate_source_id=decoder_cfg.regenerate_source_id,
125
          mask_binarize_threshold=decoder_cfg.mask_binarize_threshold,
Abdullah Rashwan's avatar
Abdullah Rashwan committed
126
127
128
          include_panoptic_masks=decoder_cfg.include_panoptic_masks,
          panoptic_category_mask_key=decoder_cfg.panoptic_category_mask_key,
          panoptic_instance_mask_key=decoder_cfg.panoptic_instance_mask_key)
Jaeyoun Kim's avatar
Jaeyoun Kim committed
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
    else:
      raise ValueError('Unknown decoder type: {}!'.format(params.decoder.type))

    parser = panoptic_maskrcnn_input.Parser(
        output_size=self.task_config.model.input_size[:2],
        min_level=self.task_config.model.min_level,
        max_level=self.task_config.model.max_level,
        num_scales=self.task_config.model.anchor.num_scales,
        aspect_ratios=self.task_config.model.anchor.aspect_ratios,
        anchor_size=self.task_config.model.anchor.anchor_size,
        dtype=params.dtype,
        rpn_match_threshold=params.parser.rpn_match_threshold,
        rpn_unmatched_threshold=params.parser.rpn_unmatched_threshold,
        rpn_batch_size_per_im=params.parser.rpn_batch_size_per_im,
        rpn_fg_fraction=params.parser.rpn_fg_fraction,
        aug_rand_hflip=params.parser.aug_rand_hflip,
        aug_scale_min=params.parser.aug_scale_min,
        aug_scale_max=params.parser.aug_scale_max,
        skip_crowd_during_training=params.parser.skip_crowd_during_training,
        max_num_instances=params.parser.max_num_instances,
        mask_crop_size=params.parser.mask_crop_size,
        segmentation_resize_eval_groundtruth=params.parser
        .segmentation_resize_eval_groundtruth,
        segmentation_groundtruth_padded_size=params.parser
        .segmentation_groundtruth_padded_size,
154
        segmentation_ignore_label=params.parser.segmentation_ignore_label,
srihari-humbarwadi's avatar
srihari-humbarwadi committed
155
        panoptic_ignore_label=params.parser.panoptic_ignore_label,
156
        include_panoptic_masks=params.parser.include_panoptic_masks)
Jaeyoun Kim's avatar
Jaeyoun Kim committed
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181

    reader = input_reader_factory.input_reader_generator(
        params,
        dataset_fn=dataset_fn.pick_dataset_fn(params.file_type),
        decoder_fn=decoder.decode,
        parser_fn=parser.parse_fn(params.is_training))
    dataset = reader.read(input_context=input_context)

    return dataset

  def build_losses(self,
                   outputs: Mapping[str, Any],
                   labels: Mapping[str, Any],
                   aux_losses: Optional[Any] = None) -> Dict[str, tf.Tensor]:
    """Build Panoptic Mask R-CNN losses."""
    params = self.task_config.losses

    use_groundtruth_dimension = params.semantic_segmentation_use_groundtruth_dimension

    segmentation_loss_fn = segmentation_losses.SegmentationLoss(
        label_smoothing=params.semantic_segmentation_label_smoothing,
        class_weights=params.semantic_segmentation_class_weights,
        ignore_label=params.semantic_segmentation_ignore_label,
        use_groundtruth_dimension=use_groundtruth_dimension,
        top_k_percent_pixels=params.semantic_segmentation_top_k_percent_pixels)
182

183
    instance_segmentation_weight = params.instance_segmentation_weight
Jaeyoun Kim's avatar
Jaeyoun Kim committed
184
185
186
187
188
189
190
191
192
193
194
195
    semantic_segmentation_weight = params.semantic_segmentation_weight

    losses = super(PanopticMaskRCNNTask, self).build_losses(
        outputs=outputs,
        labels=labels,
        aux_losses=None)
    maskrcnn_loss = losses['model_loss']
    segmentation_loss = segmentation_loss_fn(
        outputs['segmentation_outputs'],
        labels['gt_segmentation_mask'])

    model_loss = (
196
        instance_segmentation_weight * maskrcnn_loss +
197
        semantic_segmentation_weight * segmentation_loss)
Jaeyoun Kim's avatar
Jaeyoun Kim committed
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215

    total_loss = model_loss
    if aux_losses:
      reg_loss = tf.reduce_sum(aux_losses)
      total_loss = model_loss + reg_loss

    losses.update({
        'total_loss': total_loss,
        'maskrcnn_loss': maskrcnn_loss,
        'segmentation_loss': segmentation_loss,
        'model_loss': model_loss,
    })
    return losses

  def build_metrics(self, training: bool = True) -> List[
      tf.keras.metrics.Metric]:
    """Build detection metrics."""
    metrics = []
srihari-humbarwadi's avatar
srihari-humbarwadi committed
216
    num_segmentation_classes = self.task_config.model.segmentation_model.num_classes
Jaeyoun Kim's avatar
Jaeyoun Kim committed
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
    if training:
      metric_names = [
          'total_loss',
          'rpn_score_loss',
          'rpn_box_loss',
          'frcnn_cls_loss',
          'frcnn_box_loss',
          'mask_loss',
          'maskrcnn_loss',
          'segmentation_loss',
          'model_loss'
      ]
      for name in metric_names:
        metrics.append(tf.keras.metrics.Mean(name, dtype=tf.float32))

      if self.task_config.segmentation_evaluation.report_train_mean_iou:
        self.segmentation_train_mean_iou = segmentation_metrics.MeanIoU(
            name='train_mean_iou',
235
            num_classes=num_segmentation_classes,
Jaeyoun Kim's avatar
Jaeyoun Kim committed
236
237
238
239
            rescale_predictions=False,
            dtype=tf.float32)

    else:
240
      self._build_coco_metrics()
Jaeyoun Kim's avatar
Jaeyoun Kim committed
241
242
243

      rescale_predictions = (not self.task_config.validation_data.parser
                             .segmentation_resize_eval_groundtruth)
244

Jaeyoun Kim's avatar
Jaeyoun Kim committed
245
246
      self.segmentation_perclass_iou_metric = segmentation_metrics.PerClassIoU(
          name='per_class_iou',
247
          num_classes=num_segmentation_classes,
Jaeyoun Kim's avatar
Jaeyoun Kim committed
248
249
          rescale_predictions=rescale_predictions,
          dtype=tf.float32)
250

251
252
253
254
255
      if isinstance(tf.distribute.get_strategy(), tf.distribute.TPUStrategy):
        self._process_iou_metric_on_cpu = True
      else:
        self._process_iou_metric_on_cpu = False

256
      if self.task_config.model.generate_panoptic_masks:
257
258
259
        if not self.task_config.validation_data.parser.include_panoptic_masks:
          raise ValueError('`include_panoptic_masks` should be set to True when'
                           ' computing panoptic quality.')
260
        pq_config = self.task_config.panoptic_quality_evaluator
srihari-humbarwadi's avatar
srihari-humbarwadi committed
261
262
263
264
265
        self.panoptic_quality_metric = panoptic_quality_evaluator.PanopticQualityEvaluator(
            num_categories=pq_config.num_categories,
            ignored_label=pq_config.ignored_label,
            max_instances_per_category=pq_config.max_instances_per_category,
            offset=pq_config.offset,
266
267
            is_thing=pq_config.is_thing,
            rescale_predictions=pq_config.rescale_predictions)
268

Jaeyoun Kim's avatar
Jaeyoun Kim committed
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
    return metrics

  def train_step(self,
                 inputs: Tuple[Any, Any],
                 model: tf.keras.Model,
                 optimizer: tf.keras.optimizers.Optimizer,
                 metrics: Optional[List[Any]] = None) -> Dict[str, Any]:
    """Does forward and backward.

    Args:
      inputs: a dictionary of input tensors.
      model: the model, forward pass definition.
      optimizer: the optimizer for this training step.
      metrics: a nested structure of metrics objects.

    Returns:
      A dictionary of logs.
    """
    images, labels = inputs
    num_replicas = tf.distribute.get_strategy().num_replicas_in_sync

    with tf.GradientTape() as tape:
      outputs = model(
          images,
srihari-humbarwadi's avatar
srihari-humbarwadi committed
293
          image_info=labels['image_info'],
Jaeyoun Kim's avatar
Jaeyoun Kim committed
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
          anchor_boxes=labels['anchor_boxes'],
          gt_boxes=labels['gt_boxes'],
          gt_classes=labels['gt_classes'],
          gt_masks=(labels['gt_masks'] if self.task_config.model.include_mask
                    else None),
          training=True)
      outputs = tf.nest.map_structure(
          lambda x: tf.cast(x, tf.float32), outputs)

      # Computes per-replica loss.
      losses = self.build_losses(
          outputs=outputs, labels=labels, aux_losses=model.losses)
      scaled_loss = losses['total_loss'] / num_replicas

      # For mixed_precision policy, when LossScaleOptimizer is used, loss is
      # scaled for numerical stability.
      if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
        scaled_loss = optimizer.get_scaled_loss(scaled_loss)

    tvars = model.trainable_variables
    grads = tape.gradient(scaled_loss, tvars)
    # Scales back gradient when LossScaleOptimizer is used.
    if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
      grads = optimizer.get_unscaled_gradients(grads)
    optimizer.apply_gradients(list(zip(grads, tvars)))

    logs = {self.loss: losses['total_loss']}

    if metrics:
      for m in metrics:
        m.update_state(losses[m.name])

    if self.task_config.segmentation_evaluation.report_train_mean_iou:
      segmentation_labels = {
          'masks': labels['gt_segmentation_mask'],
          'valid_masks': labels['gt_segmentation_valid_mask'],
          'image_info': labels['image_info']
      }
      self.process_metrics(
          metrics=[self.segmentation_train_mean_iou],
          labels=segmentation_labels,
          model_outputs=outputs['segmentation_outputs'])
      logs.update({
          self.segmentation_train_mean_iou.name:
              self.segmentation_train_mean_iou.result()
      })

    return logs

  def validation_step(self,
                      inputs: Tuple[Any, Any],
                      model: tf.keras.Model,
                      metrics: Optional[List[Any]] = None) -> Dict[str, Any]:
    """Validatation step.

    Args:
      inputs: a dictionary of input tensors.
      model: the keras.Model.
      metrics: a nested structure of metrics objects.

    Returns:
      A dictionary of logs.
    """
    images, labels = inputs

    outputs = model(
        images,
        anchor_boxes=labels['anchor_boxes'],
srihari-humbarwadi's avatar
srihari-humbarwadi committed
362
        image_info=labels['image_info'],
Jaeyoun Kim's avatar
Jaeyoun Kim committed
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
        training=False)

    logs = {self.loss: 0}
    coco_model_outputs = {
        'detection_masks': outputs['detection_masks'],
        'detection_boxes': outputs['detection_boxes'],
        'detection_scores': outputs['detection_scores'],
        'detection_classes': outputs['detection_classes'],
        'num_detections': outputs['num_detections'],
        'source_id': labels['groundtruths']['source_id'],
        'image_info': labels['image_info']
    }
    segmentation_labels = {
        'masks': labels['groundtruths']['gt_segmentation_mask'],
        'valid_masks': labels['groundtruths']['gt_segmentation_valid_mask'],
        'image_info': labels['image_info']
    }
380

381
382
    logs.update(
        {self.coco_metric.name: (labels['groundtruths'], coco_model_outputs)})
383
384
    if self._process_iou_metric_on_cpu:
      logs.update({
385
386
          self.segmentation_perclass_iou_metric.name:
              (segmentation_labels, outputs['segmentation_outputs'])
387
388
389
390
391
392
      })
    else:
      self.segmentation_perclass_iou_metric.update_state(
          segmentation_labels,
          outputs['segmentation_outputs'])

393
    if self.task_config.model.generate_panoptic_masks:
394
395
396
397
      pq_metric_labels = {
          'category_mask':
              labels['groundtruths']['gt_panoptic_category_mask'],
          'instance_mask':
398
399
              labels['groundtruths']['gt_panoptic_instance_mask'],
          'image_info': labels['image_info']
400
401
402
403
            }
      logs.update({
          self.panoptic_quality_metric.name:
              (pq_metric_labels, outputs['panoptic_outputs'])})
Jaeyoun Kim's avatar
Jaeyoun Kim committed
404
405
406
407
408
409
410
    return logs

  def aggregate_logs(self, state=None, step_outputs=None):
    if state is None:
      self.coco_metric.reset_states()
      self.segmentation_perclass_iou_metric.reset_states()
      state = [self.coco_metric, self.segmentation_perclass_iou_metric]
411
      if self.task_config.model.generate_panoptic_masks:
412
        state += [self.panoptic_quality_metric]
Jaeyoun Kim's avatar
Jaeyoun Kim committed
413
414
415
416

    self.coco_metric.update_state(
        step_outputs[self.coco_metric.name][0],
        step_outputs[self.coco_metric.name][1])
417
418
419
420
421

    if self._process_iou_metric_on_cpu:
      self.segmentation_perclass_iou_metric.update_state(
          step_outputs[self.segmentation_perclass_iou_metric.name][0],
          step_outputs[self.segmentation_perclass_iou_metric.name][1])
422

423
    if self.task_config.model.generate_panoptic_masks:
424
425
426
427
      self.panoptic_quality_metric.update_state(
          step_outputs[self.panoptic_quality_metric.name][0],
          step_outputs[self.panoptic_quality_metric.name][1])

Jaeyoun Kim's avatar
Jaeyoun Kim committed
428
429
430
431
    return state

  def reduce_aggregated_logs(self, aggregated_logs, global_step=None):
    result = {}
432
    result = super(
Jaeyoun Kim's avatar
Jaeyoun Kim committed
433
434
435
436
437
438
439
440
441
442
        PanopticMaskRCNNTask, self).reduce_aggregated_logs(
            aggregated_logs=aggregated_logs,
            global_step=global_step)

    ious = self.segmentation_perclass_iou_metric.result()
    if self.task_config.segmentation_evaluation.report_per_class_iou:
      for i, value in enumerate(ious.numpy()):
        result.update({'segmentation_iou/class_{}'.format(i): value})
    # Computes mean IoU
    result.update({'segmentation_mean_iou': tf.reduce_mean(ious).numpy()})
443

444
    if self.task_config.model.generate_panoptic_masks:
445
446
447
448
449
450
451
452
453
454
455
456
      report_per_class_metrics = self.task_config.panoptic_quality_evaluator.report_per_class_metrics
      panoptic_quality_results = self.panoptic_quality_metric.result()
      for k, value in panoptic_quality_results.items():
        if k.endswith('per_class'):
          if report_per_class_metrics:
            for i, per_class_value in enumerate(value):
              metric_key = 'panoptic_quality/{}/class_{}'.format(k, i)
              result[metric_key] = per_class_value
          else:
            continue
        else:
          result['panoptic_quality/{}'.format(k)] = value
457

Jaeyoun Kim's avatar
Jaeyoun Kim committed
458
    return result