Commit 10989f9d authored by A. Unique TensorFlower's avatar A. Unique TensorFlower
Browse files

Internal change

PiperOrigin-RevId: 388500087
parent d7e2f581
......@@ -14,7 +14,7 @@
"""Mask R-CNN model."""
from typing import Any, List, Mapping, Optional, Union
from typing import Any, List, Mapping, Optional, Tuple, Union
import tensorflow as tf
......@@ -143,6 +143,34 @@ class MaskRCNNModel(tf.keras.Model):
gt_classes: Optional[tf.Tensor] = None,
gt_masks: Optional[tf.Tensor] = None,
training: Optional[bool] = None) -> Mapping[str, tf.Tensor]:
model_outputs, intermediate_outputs = self._call_box_outputs(
images=images, image_shape=image_shape, anchor_boxes=anchor_boxes,
gt_boxes=gt_boxes, gt_classes=gt_classes, training=training)
if not self._include_mask:
return model_outputs
model_mask_outputs = self._call_mask_outputs(
model_box_outputs=model_outputs,
features=intermediate_outputs['features'],
current_rois=intermediate_outputs['current_rois'],
matched_gt_indices=intermediate_outputs['matched_gt_indices'],
matched_gt_boxes=intermediate_outputs['matched_gt_boxes'],
matched_gt_classes=intermediate_outputs['matched_gt_classes'],
gt_masks=gt_masks,
training=training)
model_outputs.update(model_mask_outputs)
return model_outputs
def _call_box_outputs(
self, images: tf.Tensor,
image_shape: tf.Tensor,
anchor_boxes: Optional[Mapping[str, tf.Tensor]] = None,
gt_boxes: Optional[tf.Tensor] = None,
gt_classes: Optional[tf.Tensor] = None,
training: Optional[bool] = None) -> Tuple[
Mapping[str, tf.Tensor], Mapping[str, tf.Tensor]]:
"""Implementation of the Faster-RCNN logic for boxes."""
model_outputs = {}
# Feature extraction.
......@@ -239,9 +267,28 @@ class MaskRCNNModel(tf.keras.Model):
'decoded_box_scores': detections['decoded_box_scores']
})
if not self._include_mask:
return model_outputs
intermediate_outputs = {
'matched_gt_boxes': matched_gt_boxes,
'matched_gt_indices': matched_gt_indices,
'matched_gt_classes': matched_gt_classes,
'features': features,
'current_rois': current_rois,
}
return (model_outputs, intermediate_outputs)
def _call_mask_outputs(
self,
model_box_outputs: Mapping[str, tf.Tensor],
features: tf.Tensor,
current_rois: tf.Tensor,
matched_gt_indices: tf.Tensor,
matched_gt_boxes: tf.Tensor,
matched_gt_classes: tf.Tensor,
gt_masks: tf.Tensor,
training: Optional[bool] = None) -> Mapping[str, tf.Tensor]:
"""Implementation of Mask-RCNN mask prediction logic."""
model_outputs = dict(model_box_outputs)
if training:
current_rois, roi_classes, roi_masks = self.mask_sampler(
current_rois, matched_gt_boxes, matched_gt_classes,
......
......@@ -22,6 +22,9 @@ import dataclasses
from official.core import config_definitions as cfg
from official.core import exp_factory
from official.modeling import optimization
from official.vision.beta.configs import backbones
from official.vision.beta.configs import common
from official.vision.beta.configs import decoders
from official.vision.beta.configs import maskrcnn as maskrcnn_config
from official.vision.beta.configs import retinanet as retinanet_config
......@@ -59,20 +62,18 @@ def deep_mask_head_rcnn_resnetfpn_coco() -> cfg.ExperimentConfig:
annotation_file=os.path.join(maskrcnn_config.COCO_INPUT_PATH_BASE,
'instances_val2017.json'),
model=DeepMaskHeadRCNN(
num_classes=91,
input_size=[1024, 1024, 3],
include_mask=True), # pytype: disable=wrong-keyword-args
num_classes=91, input_size=[1024, 1024, 3], include_mask=True), # pytype: disable=wrong-keyword-args
losses=maskrcnn_config.Losses(l2_weight_decay=0.00004),
train_data=maskrcnn_config.DataConfig(
input_path=os.path.join(
maskrcnn_config.COCO_INPUT_PATH_BASE, 'train*'),
input_path=os.path.join(maskrcnn_config.COCO_INPUT_PATH_BASE,
'train*'),
is_training=True,
global_batch_size=global_batch_size,
parser=maskrcnn_config.Parser(
aug_rand_hflip=True, aug_scale_min=0.8, aug_scale_max=1.25)),
validation_data=maskrcnn_config.DataConfig(
input_path=os.path.join(
maskrcnn_config.COCO_INPUT_PATH_BASE, 'val*'),
input_path=os.path.join(maskrcnn_config.COCO_INPUT_PATH_BASE,
'val*'),
is_training=False,
global_batch_size=8)), # pytype: disable=wrong-keyword-args
trainer=cfg.TrainerConfig(
......@@ -110,3 +111,87 @@ def deep_mask_head_rcnn_resnetfpn_coco() -> cfg.ExperimentConfig:
])
return config
@exp_factory.register_config_factory('deep_mask_head_rcnn_spinenet_coco')
def deep_mask_head_rcnn_spinenet_coco() -> cfg.ExperimentConfig:
"""COCO object detection with Mask R-CNN with SpineNet backbone."""
steps_per_epoch = 463
coco_val_samples = 5000
train_batch_size = 256
eval_batch_size = 8
config = cfg.ExperimentConfig(
runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
task=DeepMaskHeadRCNNTask(
annotation_file=os.path.join(maskrcnn_config.COCO_INPUT_PATH_BASE,
'instances_val2017.json'), # pytype: disable=wrong-keyword-args
model=DeepMaskHeadRCNN(
backbone=backbones.Backbone(
type='spinenet',
spinenet=backbones.SpineNet(
model_id='49',
min_level=3,
max_level=7,
)),
decoder=decoders.Decoder(
type='identity', identity=decoders.Identity()),
anchor=maskrcnn_config.Anchor(anchor_size=3),
norm_activation=common.NormActivation(use_sync_bn=True),
num_classes=91,
input_size=[640, 640, 3],
min_level=3,
max_level=7,
include_mask=True), # pytype: disable=wrong-keyword-args
losses=maskrcnn_config.Losses(l2_weight_decay=0.00004),
train_data=maskrcnn_config.DataConfig(
input_path=os.path.join(maskrcnn_config.COCO_INPUT_PATH_BASE,
'train*'),
is_training=True,
global_batch_size=train_batch_size,
parser=maskrcnn_config.Parser(
aug_rand_hflip=True, aug_scale_min=0.5, aug_scale_max=2.0)),
validation_data=maskrcnn_config.DataConfig(
input_path=os.path.join(maskrcnn_config.COCO_INPUT_PATH_BASE,
'val*'),
is_training=False,
global_batch_size=eval_batch_size,
drop_remainder=False)), # pytype: disable=wrong-keyword-args
trainer=cfg.TrainerConfig(
train_steps=steps_per_epoch * 350,
validation_steps=coco_val_samples // eval_batch_size,
validation_interval=steps_per_epoch,
steps_per_loop=steps_per_epoch,
summary_interval=steps_per_epoch,
checkpoint_interval=steps_per_epoch,
optimizer_config=optimization.OptimizationConfig({
'optimizer': {
'type': 'sgd',
'sgd': {
'momentum': 0.9
}
},
'learning_rate': {
'type': 'stepwise',
'stepwise': {
'boundaries': [
steps_per_epoch * 320, steps_per_epoch * 340
],
'values': [0.32, 0.032, 0.0032],
}
},
'warmup': {
'type': 'linear',
'linear': {
'warmup_steps': 2000,
'warmup_learning_rate': 0.0067
}
}
})),
restrictions=[
'task.train_data.is_training != None',
'task.validation_data.is_training != None',
'task.model.min_level == task.model.backbone.spinenet.min_level',
'task.model.max_level == task.model.backbone.spinenet.max_level',
])
return config
......@@ -25,6 +25,10 @@ class DeepMaskHeadRcnnConfigTest(tf.test.TestCase):
config = deep_mask_head_rcnn.deep_mask_head_rcnn_resnetfpn_coco()
self.assertIsInstance(config.task, deep_mask_head_rcnn.DeepMaskHeadRCNNTask)
def test_config_spinenet(self):
config = deep_mask_head_rcnn.deep_mask_head_rcnn_spinenet_coco()
self.assertIsInstance(config.task, deep_mask_head_rcnn.DeepMaskHeadRCNNTask)
if __name__ == '__main__':
tf.test.main()
......@@ -14,12 +14,14 @@
"""Mask R-CNN model."""
from typing import List, Mapping, Optional, Union
# Import libraries
from absl import logging
import tensorflow as tf
from official.vision.beta.ops import box_ops
from official.vision.beta.modeling import maskrcnn_model
def resize_as(source, size):
......@@ -30,21 +32,30 @@ def resize_as(source, size):
@tf.keras.utils.register_keras_serializable(package='Vision')
class DeepMaskRCNNModel(tf.keras.Model):
class DeepMaskRCNNModel(maskrcnn_model.MaskRCNNModel):
"""The Mask R-CNN model."""
def __init__(self,
backbone,
decoder,
rpn_head,
detection_head,
roi_generator,
roi_sampler,
roi_aligner,
detection_generator,
mask_head=None,
mask_sampler=None,
mask_roi_aligner=None,
backbone: tf.keras.Model,
decoder: tf.keras.Model,
rpn_head: tf.keras.layers.Layer,
detection_head: Union[tf.keras.layers.Layer,
List[tf.keras.layers.Layer]],
roi_generator: tf.keras.layers.Layer,
roi_sampler: Union[tf.keras.layers.Layer,
List[tf.keras.layers.Layer]],
roi_aligner: tf.keras.layers.Layer,
detection_generator: tf.keras.layers.Layer,
mask_head: Optional[tf.keras.layers.Layer] = None,
mask_sampler: Optional[tf.keras.layers.Layer] = None,
mask_roi_aligner: Optional[tf.keras.layers.Layer] = None,
class_agnostic_bbox_pred: bool = False,
cascade_class_ensemble: bool = False,
min_level: Optional[int] = None,
max_level: Optional[int] = None,
num_scales: Optional[int] = None,
aspect_ratios: Optional[List[float]] = None,
anchor_size: Optional[float] = None,
use_gt_boxes_for_masks=False,
**kwargs):
"""Initializes the Mask R-CNN model.
......@@ -53,122 +64,99 @@ class DeepMaskRCNNModel(tf.keras.Model):
backbone: `tf.keras.Model`, the backbone network.
decoder: `tf.keras.Model`, the decoder network.
rpn_head: the RPN head.
detection_head: the detection head.
detection_head: the detection head or a list of heads.
roi_generator: the ROI generator.
roi_sampler: the ROI sampler.
roi_sampler: a single ROI sampler or a list of ROI samplers for cascade
detection heads.
roi_aligner: the ROI aligner.
detection_generator: the detection generator.
mask_head: the mask head.
mask_sampler: the mask sampler.
mask_roi_aligner: the ROI alginer for mask prediction.
use_gt_boxes_for_masks: bool, if set, crop using groundtruth boxes
instead of proposals for training mask head
class_agnostic_bbox_pred: if True, perform class agnostic bounding box
prediction. Needs to be `True` for Cascade RCNN models.
cascade_class_ensemble: if True, ensemble classification scores over all
detection heads.
min_level: Minimum level in output feature maps.
max_level: Maximum level in output feature maps.
num_scales: A number representing intermediate scales added on each level.
For instances, num_scales=2 adds one additional intermediate anchor
scales [2^0, 2^0.5] on each level.
aspect_ratios: A list representing the aspect raito anchors added on each
level. The number indicates the ratio of width to height. For instances,
aspect_ratios=[1.0, 2.0, 0.5] adds three anchors on each scale level.
anchor_size: A number representing the scale of size of the base anchor to
the feature stride 2^level.
use_gt_boxes_for_masks: bool, if set, crop using groundtruth boxes instead
of proposals for training mask head
**kwargs: keyword arguments to be passed.
"""
super(DeepMaskRCNNModel, self).__init__(**kwargs)
self._config_dict = {
'backbone': backbone,
'decoder': decoder,
'rpn_head': rpn_head,
'detection_head': detection_head,
'roi_generator': roi_generator,
'roi_sampler': roi_sampler,
'roi_aligner': roi_aligner,
'detection_generator': detection_generator,
'mask_head': mask_head,
'mask_sampler': mask_sampler,
'mask_roi_aligner': mask_roi_aligner,
'use_gt_boxes_for_masks': use_gt_boxes_for_masks
}
self.backbone = backbone
self.decoder = decoder
self.rpn_head = rpn_head
self.detection_head = detection_head
self.roi_generator = roi_generator
self.roi_sampler = roi_sampler
self.roi_aligner = roi_aligner
self.detection_generator = detection_generator
self._include_mask = mask_head is not None
self.mask_head = mask_head
if self._include_mask and mask_sampler is None:
raise ValueError('`mask_sampler` is not provided in Mask R-CNN.')
self.mask_sampler = mask_sampler
if self._include_mask and mask_roi_aligner is None:
raise ValueError('`mask_roi_aligner` is not provided in Mask R-CNN.')
self.mask_roi_aligner = mask_roi_aligner
super(DeepMaskRCNNModel, self).__init__(
backbone=backbone,
decoder=decoder,
rpn_head=rpn_head,
detection_head=detection_head,
roi_generator=roi_generator,
roi_sampler=roi_sampler,
roi_aligner=roi_aligner,
detection_generator=detection_generator,
mask_head=mask_head,
mask_sampler=mask_sampler,
mask_roi_aligner=mask_roi_aligner,
class_agnostic_bbox_pred=class_agnostic_bbox_pred,
cascade_class_ensemble=cascade_class_ensemble,
min_level=min_level,
max_level=max_level,
num_scales=num_scales,
aspect_ratios=aspect_ratios,
anchor_size=anchor_size,
**kwargs)
self._config_dict['use_gt_boxes_for_masks'] = use_gt_boxes_for_masks
def call(self,
images,
image_shape,
anchor_boxes=None,
gt_boxes=None,
gt_classes=None,
gt_masks=None,
training=None):
model_outputs = {}
# Feature extraction.
features = self.backbone(images)
if self.decoder:
features = self.decoder(features)
# Region proposal network.
rpn_scores, rpn_boxes = self.rpn_head(features)
model_outputs.update({
'rpn_boxes': rpn_boxes,
'rpn_scores': rpn_scores
})
# Generate RoIs.
rois, _ = self.roi_generator(
rpn_boxes, rpn_scores, anchor_boxes, image_shape, training)
if training:
rois = tf.stop_gradient(rois)
rois, matched_gt_boxes, matched_gt_classes, matched_gt_indices = (
self.roi_sampler(rois, gt_boxes, gt_classes))
# Assign target for the 2nd stage classification.
box_targets = box_ops.encode_boxes(
matched_gt_boxes, rois, weights=[10.0, 10.0, 5.0, 5.0])
# If the target is background, the box target is set to all 0s.
box_targets = tf.where(
tf.tile(
tf.expand_dims(tf.equal(matched_gt_classes, 0), axis=-1),
[1, 1, 4]),
tf.zeros_like(box_targets),
box_targets)
model_outputs.update({
'class_targets': matched_gt_classes,
'box_targets': box_targets,
})
# RoI align.
roi_features = self.roi_aligner(features, rois)
# Detection head.
raw_scores, raw_boxes = self.detection_head(roi_features)
if training:
model_outputs.update({
'class_outputs': raw_scores,
'box_outputs': raw_boxes,
})
else:
# Post-processing.
detections = self.detection_generator(
raw_boxes, raw_scores, rois, image_shape)
model_outputs.update({
'detection_boxes': detections['detection_boxes'],
'detection_scores': detections['detection_scores'],
'detection_classes': detections['detection_classes'],
'num_detections': detections['num_detections'],
})
images: tf.Tensor,
image_shape: tf.Tensor,
anchor_boxes: Optional[Mapping[str, tf.Tensor]] = None,
gt_boxes: Optional[tf.Tensor] = None,
gt_classes: Optional[tf.Tensor] = None,
gt_masks: Optional[tf.Tensor] = None,
training: Optional[bool] = None) -> Mapping[str, tf.Tensor]:
model_outputs, intermediate_outputs = self._call_box_outputs(
images=images, image_shape=image_shape, anchor_boxes=anchor_boxes,
gt_boxes=gt_boxes, gt_classes=gt_classes, training=training)
if not self._include_mask:
return model_outputs
model_mask_outputs = self._call_mask_outputs(
model_box_outputs=model_outputs,
features=intermediate_outputs['features'],
current_rois=intermediate_outputs['current_rois'],
matched_gt_indices=intermediate_outputs['matched_gt_indices'],
matched_gt_boxes=intermediate_outputs['matched_gt_boxes'],
matched_gt_classes=intermediate_outputs['matched_gt_classes'],
gt_masks=gt_masks,
gt_classes=gt_classes,
gt_boxes=gt_boxes,
training=training)
model_outputs.update(model_mask_outputs)
return model_outputs
def _call_mask_outputs(
self,
model_box_outputs: Mapping[str, tf.Tensor],
features: tf.Tensor,
current_rois: tf.Tensor,
matched_gt_indices: tf.Tensor,
matched_gt_boxes: tf.Tensor,
matched_gt_classes: tf.Tensor,
gt_masks: tf.Tensor,
gt_classes: tf.Tensor,
gt_boxes: tf.Tensor,
training: Optional[bool] = None) -> Mapping[str, tf.Tensor]:
model_outputs = dict(model_box_outputs)
if training:
if self._config_dict['use_gt_boxes_for_masks']:
mask_size = (
......@@ -184,11 +172,8 @@ class DeepMaskRCNNModel(tf.keras.Model):
})
else:
rois, roi_classes, roi_masks = self.mask_sampler(
rois,
matched_gt_boxes,
matched_gt_classes,
matched_gt_indices,
gt_masks)
current_rois, matched_gt_boxes, matched_gt_classes,
matched_gt_indices, gt_masks)
roi_masks = tf.stop_gradient(roi_masks)
model_outputs.update({
'mask_class_targets': roi_classes,
......@@ -219,24 +204,3 @@ class DeepMaskRCNNModel(tf.keras.Model):
'detection_masks': tf.math.sigmoid(raw_masks),
})
return model_outputs
@property
def checkpoint_items(self):
"""Returns a dictionary of items to be additionally checkpointed."""
items = dict(
backbone=self.backbone,
rpn_head=self.rpn_head,
detection_head=self.detection_head)
if self.decoder is not None:
items.update(decoder=self.decoder)
if self._include_mask:
items.update(mask_head=self.mask_head)
return items
def get_config(self):
return self._config_dict
@classmethod
def from_config(cls, config):
return cls(**config)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment