"git@developer.sourcefind.cn:OpenDAS/megatron-lm.git" did not exist on "7a77abd9b6267dc0020a60b424b4748fc22790bb"
Commit 5294dd29 authored by A. Unique TensorFlower's avatar A. Unique TensorFlower
Browse files

Support build anchors inside Mask-RCNN model if not provided.

PiperOrigin-RevId: 370732694
parent 8717bca2
...@@ -207,7 +207,12 @@ def build_maskrcnn( ...@@ -207,7 +207,12 @@ def build_maskrcnn(
mask_sampler=mask_sampler_obj, mask_sampler=mask_sampler_obj,
mask_roi_aligner=mask_roi_aligner_obj, mask_roi_aligner=mask_roi_aligner_obj,
class_agnostic_bbox_pred=detection_head_config.class_agnostic_bbox_pred, class_agnostic_bbox_pred=detection_head_config.class_agnostic_bbox_pred,
cascade_class_ensemble=detection_head_config.cascade_class_ensemble) cascade_class_ensemble=detection_head_config.cascade_class_ensemble,
min_level=model_config.min_level,
max_level=model_config.max_level,
num_scales=model_config.anchor.num_scales,
aspect_ratios=model_config.anchor.aspect_ratios,
anchor_size=model_config.anchor.anchor_size)
return model return model
......
...@@ -19,6 +19,7 @@ from typing import Any, List, Mapping, Optional, Union ...@@ -19,6 +19,7 @@ from typing import Any, List, Mapping, Optional, Union
# Import libraries # Import libraries
import tensorflow as tf import tensorflow as tf
from official.vision.beta.ops import anchor
from official.vision.beta.ops import box_ops from official.vision.beta.ops import box_ops
...@@ -41,6 +42,11 @@ class MaskRCNNModel(tf.keras.Model): ...@@ -41,6 +42,11 @@ class MaskRCNNModel(tf.keras.Model):
mask_roi_aligner: Optional[tf.keras.layers.Layer] = None, mask_roi_aligner: Optional[tf.keras.layers.Layer] = None,
class_agnostic_bbox_pred: bool = False, class_agnostic_bbox_pred: bool = False,
cascade_class_ensemble: bool = False, cascade_class_ensemble: bool = False,
min_level: Optional[int] = None,
max_level: Optional[int] = None,
num_scales: Optional[int] = None,
aspect_ratios: Optional[List[float]] = None,
anchor_size: Optional[float] = None,
**kwargs): **kwargs):
"""Initializes the Mask R-CNN model. """Initializes the Mask R-CNN model.
...@@ -61,6 +67,17 @@ class MaskRCNNModel(tf.keras.Model): ...@@ -61,6 +67,17 @@ class MaskRCNNModel(tf.keras.Model):
prediction. Needs to be `True` for Cascade RCNN models. prediction. Needs to be `True` for Cascade RCNN models.
cascade_class_ensemble: if True, ensemble classification scores over cascade_class_ensemble: if True, ensemble classification scores over
all detection heads. all detection heads.
min_level: Minimum level in output feature maps.
max_level: Maximum level in output feature maps.
num_scales: A number representing intermediate scales added
on each level. For instances, num_scales=2 adds one additional
intermediate anchor scales [2^0, 2^0.5] on each level.
aspect_ratios: A list representing the aspect raito
anchors added on each level. The number indicates the ratio of width to
height. For instances, aspect_ratios=[1.0, 2.0, 0.5] adds three anchors
on each scale level.
anchor_size: A number representing the scale of size of the base
anchor to the feature stride 2^level.
**kwargs: keyword arguments to be passed. **kwargs: keyword arguments to be passed.
""" """
super(MaskRCNNModel, self).__init__(**kwargs) super(MaskRCNNModel, self).__init__(**kwargs)
...@@ -78,6 +95,11 @@ class MaskRCNNModel(tf.keras.Model): ...@@ -78,6 +95,11 @@ class MaskRCNNModel(tf.keras.Model):
'mask_roi_aligner': mask_roi_aligner, 'mask_roi_aligner': mask_roi_aligner,
'class_agnostic_bbox_pred': class_agnostic_bbox_pred, 'class_agnostic_bbox_pred': class_agnostic_bbox_pred,
'cascade_class_ensemble': cascade_class_ensemble, 'cascade_class_ensemble': cascade_class_ensemble,
'min_level': min_level,
'max_level': max_level,
'num_scales': num_scales,
'aspect_ratios': aspect_ratios,
'anchor_size': anchor_size,
} }
self.backbone = backbone self.backbone = backbone
self.decoder = decoder self.decoder = decoder
...@@ -133,6 +155,21 @@ class MaskRCNNModel(tf.keras.Model): ...@@ -133,6 +155,21 @@ class MaskRCNNModel(tf.keras.Model):
'rpn_scores': rpn_scores 'rpn_scores': rpn_scores
}) })
# Generate anchor boxes for this batch if not provided.
if anchor_boxes is None:
_, image_height, image_width, _ = images.get_shape().as_list()
anchor_boxes = anchor.Anchor(
min_level=self._config_dict['min_level'],
max_level=self._config_dict['max_level'],
num_scales=self._config_dict['num_scales'],
aspect_ratios=self._config_dict['aspect_ratios'],
anchor_size=self._config_dict['anchor_size'],
image_size=(image_height, image_width)).multilevel_boxes
for l in anchor_boxes:
anchor_boxes[l] = tf.tile(
tf.expand_dims(anchor_boxes[l], axis=0),
[tf.shape(images)[0], 1, 1, 1])
# Generate RoIs. # Generate RoIs.
current_rois, _ = self.roi_generator(rpn_boxes, rpn_scores, anchor_boxes, current_rois, _ = self.roi_generator(rpn_boxes, rpn_scores, anchor_boxes,
image_shape, training) image_shape, training)
...@@ -255,7 +292,7 @@ class MaskRCNNModel(tf.keras.Model): ...@@ -255,7 +292,7 @@ class MaskRCNNModel(tf.keras.Model):
# Only used during training. # Only used during training.
matched_gt_boxes, matched_gt_classes, matched_gt_indices = (None, None, matched_gt_boxes, matched_gt_classes, matched_gt_indices = (None, None,
None) None)
if training: if training and gt_boxes is not None:
rois = tf.stop_gradient(rois) rois = tf.stop_gradient(rois)
current_roi_sampler = self.roi_sampler[layer_num] current_roi_sampler = self.roi_sampler[layer_num]
......
...@@ -21,6 +21,8 @@ from absl.testing import parameterized ...@@ -21,6 +21,8 @@ from absl.testing import parameterized
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
from tensorflow.python.distribute import combinations
from tensorflow.python.distribute import strategy_combinations
from official.vision.beta.modeling import maskrcnn_model from official.vision.beta.modeling import maskrcnn_model
from official.vision.beta.modeling.backbones import resnet from official.vision.beta.modeling.backbones import resnet
from official.vision.beta.modeling.decoders import fpn from official.vision.beta.modeling.decoders import fpn
...@@ -36,34 +38,39 @@ from official.vision.beta.ops import anchor ...@@ -36,34 +38,39 @@ from official.vision.beta.ops import anchor
class MaskRCNNModelTest(parameterized.TestCase, tf.test.TestCase): class MaskRCNNModelTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters( @combinations.generate(
(3, 3, 7, 3, [1.0], 50, False, False, 41953246), combinations.combine(
) include_mask=[True, False],
def test_num_params(self, use_separable_conv=[True, False],
num_classes, build_anchor_boxes=[True, False],
min_level, is_training=[True, False]))
max_level, def test_build_model(self, include_mask, use_separable_conv,
num_scales, build_anchor_boxes, is_training):
aspect_ratios, num_classes = 3
resnet_model_id, min_level = 3
use_separable_conv, max_level = 7
include_mask, num_scales = 3
expected_num_params): aspect_ratios = [1.0]
anchor_size = 3
resnet_model_id = 50
num_anchors_per_location = num_scales * len(aspect_ratios) num_anchors_per_location = num_scales * len(aspect_ratios)
image_size = 384 image_size = 384
images = np.random.rand(2, image_size, image_size, 3) images = np.random.rand(2, image_size, image_size, 3)
image_shape = np.array([[image_size, image_size], [image_size, image_size]]) image_shape = np.array([[image_size, image_size], [image_size, image_size]])
anchor_boxes = anchor.Anchor( if build_anchor_boxes:
min_level=min_level, anchor_boxes = anchor.Anchor(
max_level=max_level, min_level=min_level,
num_scales=num_scales, max_level=max_level,
aspect_ratios=aspect_ratios, num_scales=num_scales,
anchor_size=3, aspect_ratios=aspect_ratios,
image_size=(image_size, image_size)).multilevel_boxes anchor_size=3,
for l in anchor_boxes: image_size=(image_size, image_size)).multilevel_boxes
anchor_boxes[l] = tf.tile( for l in anchor_boxes:
tf.expand_dims(anchor_boxes[l], axis=0), [2, 1, 1, 1]) anchor_boxes[l] = tf.tile(
tf.expand_dims(anchor_boxes[l], axis=0), [2, 1, 1, 1])
else:
anchor_boxes = None
backbone = resnet.ResNet(model_id=resnet_model_id) backbone = resnet.ResNet(model_id=resnet_model_id)
decoder = fpn.FPN( decoder = fpn.FPN(
...@@ -76,8 +83,7 @@ class MaskRCNNModelTest(parameterized.TestCase, tf.test.TestCase): ...@@ -76,8 +83,7 @@ class MaskRCNNModelTest(parameterized.TestCase, tf.test.TestCase):
max_level=max_level, max_level=max_level,
num_anchors_per_location=num_anchors_per_location, num_anchors_per_location=num_anchors_per_location,
num_convs=1) num_convs=1)
detection_head = instance_heads.DetectionHead( detection_head = instance_heads.DetectionHead(num_classes=num_classes)
num_classes=num_classes)
roi_generator_obj = roi_generator.MultilevelROIGenerator() roi_generator_obj = roi_generator.MultilevelROIGenerator()
roi_sampler_obj = roi_sampler.ROISampler() roi_sampler_obj = roi_sampler.ROISampler()
roi_aligner_obj = roi_aligner.MultilevelROIAligner() roi_aligner_obj = roi_aligner.MultilevelROIAligner()
...@@ -103,7 +109,12 @@ class MaskRCNNModelTest(parameterized.TestCase, tf.test.TestCase): ...@@ -103,7 +109,12 @@ class MaskRCNNModelTest(parameterized.TestCase, tf.test.TestCase):
detection_generator_obj, detection_generator_obj,
mask_head, mask_head,
mask_sampler_obj, mask_sampler_obj,
mask_roi_aligner_obj) mask_roi_aligner_obj,
min_level=min_level,
max_level=max_level,
num_scales=num_scales,
aspect_ratios=aspect_ratios,
anchor_size=anchor_size)
gt_boxes = np.array( gt_boxes = np.array(
[[[10, 10, 15, 15], [2.5, 2.5, 7.5, 7.5], [-1, -1, -1, -1]], [[[10, 10, 15, 15], [2.5, 2.5, 7.5, 7.5], [-1, -1, -1, -1]],
...@@ -115,31 +126,35 @@ class MaskRCNNModelTest(parameterized.TestCase, tf.test.TestCase): ...@@ -115,31 +126,35 @@ class MaskRCNNModelTest(parameterized.TestCase, tf.test.TestCase):
else: else:
gt_masks = None gt_masks = None
_ = model(images, # Results will be checked in test_forward.
image_shape, _ = model(
anchor_boxes, images,
gt_boxes, image_shape,
gt_classes, anchor_boxes,
gt_masks, gt_boxes,
training=True) gt_classes,
self.assertEqual(expected_num_params, model.count_params()) gt_masks,
training=is_training)
@parameterized.parameters( @combinations.generate(
(False, False, False), combinations.combine(
(False, True, False), strategy=[
(False, False, True), strategy_combinations.cloud_tpu_strategy,
(False, True, True), strategy_combinations.one_device_strategy_gpu,
(True, False, False), ],
(True, True, False), include_mask=[True, False],
(True, False, True), build_anchor_boxes=[True, False],
(True, True, True), use_cascade_heads=[True, False],
) training=[True, False],
def test_forward(self, include_mask, training, use_cascade_heads): ))
def test_forward(self, strategy, include_mask, build_anchor_boxes, training,
use_cascade_heads):
num_classes = 3 num_classes = 3
min_level = 3 min_level = 3
max_level = 4 max_level = 4
num_scales = 3 num_scales = 3
aspect_ratios = [1.0] aspect_ratios = [1.0]
anchor_size = 3
if use_cascade_heads: if use_cascade_heads:
cascade_iou_thresholds = [0.6] cascade_iou_thresholds = [0.6]
class_agnostic_bbox_pred = True class_agnostic_bbox_pred = True
...@@ -152,87 +167,96 @@ class MaskRCNNModelTest(parameterized.TestCase, tf.test.TestCase): ...@@ -152,87 +167,96 @@ class MaskRCNNModelTest(parameterized.TestCase, tf.test.TestCase):
image_size = (256, 256) image_size = (256, 256)
images = np.random.rand(2, image_size[0], image_size[1], 3) images = np.random.rand(2, image_size[0], image_size[1], 3)
image_shape = np.array([[224, 100], [100, 224]]) image_shape = np.array([[224, 100], [100, 224]])
anchor_boxes = anchor.Anchor( with strategy.scope():
min_level=min_level, if build_anchor_boxes:
max_level=max_level, anchor_boxes = anchor.Anchor(
num_scales=num_scales, min_level=min_level,
aspect_ratios=aspect_ratios, max_level=max_level,
anchor_size=3, num_scales=num_scales,
image_size=image_size).multilevel_boxes aspect_ratios=aspect_ratios,
num_anchors_per_location = len(aspect_ratios) * num_scales anchor_size=anchor_size,
image_size=image_size).multilevel_boxes
else:
anchor_boxes = None
num_anchors_per_location = len(aspect_ratios) * num_scales
input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, 3]) input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, 3])
backbone = resnet.ResNet(model_id=50, input_specs=input_specs) backbone = resnet.ResNet(model_id=50, input_specs=input_specs)
decoder = fpn.FPN( decoder = fpn.FPN(
min_level=min_level, min_level=min_level,
max_level=max_level, max_level=max_level,
input_specs=backbone.output_specs) input_specs=backbone.output_specs)
rpn_head = dense_prediction_heads.RPNHead( rpn_head = dense_prediction_heads.RPNHead(
min_level=min_level, min_level=min_level,
max_level=max_level, max_level=max_level,
num_anchors_per_location=num_anchors_per_location) num_anchors_per_location=num_anchors_per_location)
detection_head = instance_heads.DetectionHead( detection_head = instance_heads.DetectionHead(
num_classes=num_classes, num_classes=num_classes,
class_agnostic_bbox_pred=class_agnostic_bbox_pred) class_agnostic_bbox_pred=class_agnostic_bbox_pred)
roi_generator_obj = roi_generator.MultilevelROIGenerator() roi_generator_obj = roi_generator.MultilevelROIGenerator()
roi_sampler_cascade = []
roi_sampler_obj = roi_sampler.ROISampler()
roi_sampler_cascade.append(roi_sampler_obj)
if cascade_iou_thresholds:
for iou in cascade_iou_thresholds:
roi_sampler_obj = roi_sampler.ROISampler(
mix_gt_boxes=False,
foreground_iou_threshold=iou,
background_iou_high_threshold=iou,
background_iou_low_threshold=0.0,
skip_subsampling=True)
roi_sampler_cascade.append(roi_sampler_obj)
roi_aligner_obj = roi_aligner.MultilevelROIAligner() roi_sampler_cascade = []
detection_generator_obj = detection_generator.DetectionGenerator() roi_sampler_obj = roi_sampler.ROISampler()
if include_mask: roi_sampler_cascade.append(roi_sampler_obj)
mask_head = instance_heads.MaskHead( if cascade_iou_thresholds:
num_classes=num_classes, upsample_factor=2) for iou in cascade_iou_thresholds:
mask_sampler_obj = mask_sampler.MaskSampler( roi_sampler_obj = roi_sampler.ROISampler(
mask_target_size=28, num_sampled_masks=1) mix_gt_boxes=False,
mask_roi_aligner_obj = roi_aligner.MultilevelROIAligner(crop_size=14) foreground_iou_threshold=iou,
else: background_iou_high_threshold=iou,
mask_head = None background_iou_low_threshold=0.0,
mask_sampler_obj = None skip_subsampling=True)
mask_roi_aligner_obj = None roi_sampler_cascade.append(roi_sampler_obj)
model = maskrcnn_model.MaskRCNNModel( roi_aligner_obj = roi_aligner.MultilevelROIAligner()
backbone, detection_generator_obj = detection_generator.DetectionGenerator()
decoder, if include_mask:
rpn_head, mask_head = instance_heads.MaskHead(
detection_head, num_classes=num_classes, upsample_factor=2)
roi_generator_obj, mask_sampler_obj = mask_sampler.MaskSampler(
roi_sampler_cascade, mask_target_size=28, num_sampled_masks=1)
roi_aligner_obj, mask_roi_aligner_obj = roi_aligner.MultilevelROIAligner(crop_size=14)
detection_generator_obj, else:
mask_head, mask_head = None
mask_sampler_obj, mask_sampler_obj = None
mask_roi_aligner_obj, mask_roi_aligner_obj = None
class_agnostic_bbox_pred=class_agnostic_bbox_pred, model = maskrcnn_model.MaskRCNNModel(
cascade_class_ensemble=cascade_class_ensemble) backbone,
decoder,
rpn_head,
detection_head,
roi_generator_obj,
roi_sampler_obj,
roi_aligner_obj,
detection_generator_obj,
mask_head,
mask_sampler_obj,
mask_roi_aligner_obj,
class_agnostic_bbox_pred=class_agnostic_bbox_pred,
cascade_class_ensemble=cascade_class_ensemble,
min_level=min_level,
max_level=max_level,
num_scales=num_scales,
aspect_ratios=aspect_ratios,
anchor_size=anchor_size)
gt_boxes = np.array( gt_boxes = np.array(
[[[10, 10, 15, 15], [2.5, 2.5, 7.5, 7.5], [-1, -1, -1, -1]], [[[10, 10, 15, 15], [2.5, 2.5, 7.5, 7.5], [-1, -1, -1, -1]],
[[100, 100, 150, 150], [-1, -1, -1, -1], [-1, -1, -1, -1]]], [[100, 100, 150, 150], [-1, -1, -1, -1], [-1, -1, -1, -1]]],
dtype=np.float32) dtype=np.float32)
gt_classes = np.array([[2, 1, -1], [1, -1, -1]], dtype=np.int32) gt_classes = np.array([[2, 1, -1], [1, -1, -1]], dtype=np.int32)
if include_mask: if include_mask:
gt_masks = np.ones((2, 3, 100, 100)) gt_masks = np.ones((2, 3, 100, 100))
else: else:
gt_masks = None gt_masks = None
results = model(images, results = model(
image_shape, images,
anchor_boxes, image_shape,
gt_boxes, anchor_boxes,
gt_classes, gt_boxes,
gt_masks, gt_classes,
training=training) gt_masks,
training=training)
self.assertIn('rpn_boxes', results) self.assertIn('rpn_boxes', results)
self.assertIn('rpn_scores', results) self.assertIn('rpn_scores', results)
...@@ -259,22 +283,16 @@ class MaskRCNNModelTest(parameterized.TestCase, tf.test.TestCase): ...@@ -259,22 +283,16 @@ class MaskRCNNModelTest(parameterized.TestCase, tf.test.TestCase):
input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, 3]) input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, 3])
backbone = resnet.ResNet(model_id=50, input_specs=input_specs) backbone = resnet.ResNet(model_id=50, input_specs=input_specs)
decoder = fpn.FPN( decoder = fpn.FPN(
min_level=3, min_level=3, max_level=7, input_specs=backbone.output_specs)
max_level=7,
input_specs=backbone.output_specs)
rpn_head = dense_prediction_heads.RPNHead( rpn_head = dense_prediction_heads.RPNHead(
min_level=3, min_level=3, max_level=7, num_anchors_per_location=3)
max_level=7, detection_head = instance_heads.DetectionHead(num_classes=2)
num_anchors_per_location=3)
detection_head = instance_heads.DetectionHead(
num_classes=2)
roi_generator_obj = roi_generator.MultilevelROIGenerator() roi_generator_obj = roi_generator.MultilevelROIGenerator()
roi_sampler_obj = roi_sampler.ROISampler() roi_sampler_obj = roi_sampler.ROISampler()
roi_aligner_obj = roi_aligner.MultilevelROIAligner() roi_aligner_obj = roi_aligner.MultilevelROIAligner()
detection_generator_obj = detection_generator.DetectionGenerator() detection_generator_obj = detection_generator.DetectionGenerator()
if include_mask: if include_mask:
mask_head = instance_heads.MaskHead( mask_head = instance_heads.MaskHead(num_classes=2, upsample_factor=2)
num_classes=2, upsample_factor=2)
mask_sampler_obj = mask_sampler.MaskSampler( mask_sampler_obj = mask_sampler.MaskSampler(
mask_target_size=28, num_sampled_masks=1) mask_target_size=28, num_sampled_masks=1)
mask_roi_aligner_obj = roi_aligner.MultilevelROIAligner(crop_size=14) mask_roi_aligner_obj = roi_aligner.MultilevelROIAligner(crop_size=14)
...@@ -293,7 +311,12 @@ class MaskRCNNModelTest(parameterized.TestCase, tf.test.TestCase): ...@@ -293,7 +311,12 @@ class MaskRCNNModelTest(parameterized.TestCase, tf.test.TestCase):
detection_generator_obj, detection_generator_obj,
mask_head, mask_head,
mask_sampler_obj, mask_sampler_obj,
mask_roi_aligner_obj) mask_roi_aligner_obj,
min_level=3,
max_level=7,
num_scales=3,
aspect_ratios=[1.0],
anchor_size=3)
config = model.get_config() config = model.get_config()
new_model = maskrcnn_model.MaskRCNNModel.from_config(config) new_model = maskrcnn_model.MaskRCNNModel.from_config(config)
...@@ -329,11 +352,23 @@ class MaskRCNNModelTest(parameterized.TestCase, tf.test.TestCase): ...@@ -329,11 +352,23 @@ class MaskRCNNModelTest(parameterized.TestCase, tf.test.TestCase):
mask_head = None mask_head = None
mask_sampler_obj = None mask_sampler_obj = None
mask_roi_aligner_obj = None mask_roi_aligner_obj = None
model = maskrcnn_model.MaskRCNNModel(backbone, decoder, rpn_head, model = maskrcnn_model.MaskRCNNModel(
detection_head, roi_generator_obj, backbone,
roi_sampler_obj, roi_aligner_obj, decoder,
detection_generator_obj, mask_head, rpn_head,
mask_sampler_obj, mask_roi_aligner_obj) detection_head,
roi_generator_obj,
roi_sampler_obj,
roi_aligner_obj,
detection_generator_obj,
mask_head,
mask_sampler_obj,
mask_roi_aligner_obj,
min_level=3,
max_level=7,
num_scales=3,
aspect_ratios=[1.0],
anchor_size=3)
expect_checkpoint_items = dict( expect_checkpoint_items = dict(
backbone=backbone, backbone=backbone,
decoder=decoder, decoder=decoder,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment