"test/old-api/spectests.cpp" did not exist on "a5607f82a302c35b30f05141551f734284da1e5f"
Unverified Commit c127d527 authored by Srihari Humbarwadi's avatar Srihari Humbarwadi Committed by GitHub
Browse files

Merge branch 'panoptic-segmentation' into panoptic-deeplab-modeling

parents 78657911 457bcb85
...@@ -64,6 +64,72 @@ class MovinetLayersTest(parameterized.TestCase, tf.test.TestCase): ...@@ -64,6 +64,72 @@ class MovinetLayersTest(parameterized.TestCase, tf.test.TestCase):
self.assertEqual(predicted.shape, expected.shape) self.assertEqual(predicted.shape, expected.shape)
self.assertAllClose(predicted, expected) self.assertAllClose(predicted, expected)
def test_mobile_conv2d_bn(self):
batch_norm_op = tf.keras.layers.BatchNormalization(
momentum=0.9,
epsilon=1.,
name='bn')
conv2d = movinet_layers.MobileConv2D(
filters=3,
kernel_size=(3, 3),
strides=(1, 1),
padding='same',
kernel_initializer='ones',
use_bias=False,
use_depthwise=False,
use_temporal=False,
use_buffered_input=True,
batch_norm_op=batch_norm_op,
)
inputs = tf.ones([1, 2, 2, 2, 3])
predicted = conv2d(inputs)
expected = tf.constant(
[[[[[8.48528, 8.48528, 8.48528],
[8.48528, 8.48528, 8.48528]],
[[8.48528, 8.48528, 8.48528],
[8.48528, 8.48528, 8.48528]]],
[[[8.48528, 8.48528, 8.48528],
[8.48528, 8.48528, 8.48528]],
[[8.48528, 8.48528, 8.48528],
[8.48528, 8.48528, 8.48528]]]]])
self.assertEqual(predicted.shape, expected.shape)
self.assertAllClose(predicted, expected)
def test_mobile_conv2d_activation(self):
conv2d = movinet_layers.MobileConv2D(
filters=3,
kernel_size=(3, 3),
strides=(1, 1),
padding='same',
kernel_initializer='ones',
use_bias=False,
use_depthwise=False,
use_temporal=False,
use_buffered_input=True,
activation_op=tf.nn.relu6,
)
inputs = tf.ones([1, 2, 2, 2, 3])
predicted = conv2d(inputs)
expected = tf.constant(
[[[[[6., 6., 6.],
[6., 6., 6.]],
[[6., 6., 6.],
[6., 6., 6.]]],
[[[6., 6., 6.],
[6., 6., 6.]],
[[6., 6., 6.],
[6., 6., 6.]]]]])
self.assertEqual(predicted.shape, expected.shape)
self.assertAllClose(predicted, expected)
def test_mobile_conv2d_temporal(self): def test_mobile_conv2d_temporal(self):
conv2d = movinet_layers.MobileConv2D( conv2d = movinet_layers.MobileConv2D(
filters=3, filters=3,
...@@ -378,6 +444,35 @@ class MovinetLayersTest(parameterized.TestCase, tf.test.TestCase): ...@@ -378,6 +444,35 @@ class MovinetLayersTest(parameterized.TestCase, tf.test.TestCase):
self.assertEqual(predicted.shape, expected.shape) self.assertEqual(predicted.shape, expected.shape)
self.assertAllClose(predicted, expected) self.assertAllClose(predicted, expected)
def test_stream_movinet_block_none_se(self):
block = movinet_layers.MovinetBlock(
out_filters=3,
expand_filters=6,
kernel_size=(3, 3, 3),
strides=(1, 2, 2),
causal=True,
se_type='none',
state_prefix='test',
)
inputs = tf.range(4, dtype=tf.float32) + 1.
inputs = tf.reshape(inputs, [1, 4, 1, 1, 1])
inputs = tf.tile(inputs, [1, 1, 2, 1, 3])
expected, expected_states = block(inputs)
for num_splits in [1, 2, 4]:
frames = tf.split(inputs, inputs.shape[1] // num_splits, axis=1)
states = {}
predicted = []
for frame in frames:
x, states = block(frame, states=states)
predicted.append(x)
predicted = tf.concat(predicted, axis=1)
self.assertEqual(predicted.shape, expected.shape)
self.assertAllClose(predicted, expected)
self.assertAllEqual(list(expected_states.keys()), ['test_stream_buffer'])
def test_stream_classifier_head(self): def test_stream_classifier_head(self):
head = movinet_layers.Head(project_filters=5) head = movinet_layers.Head(project_filters=5)
classifier_head = movinet_layers.ClassifierHead( classifier_head = movinet_layers.ClassifierHead(
......
...@@ -99,6 +99,49 @@ class MoViNetTest(parameterized.TestCase, tf.test.TestCase): ...@@ -99,6 +99,49 @@ class MoViNetTest(parameterized.TestCase, tf.test.TestCase):
self.assertEqual(predicted.shape, expected.shape) self.assertEqual(predicted.shape, expected.shape)
self.assertAllClose(predicted, expected, 1e-5, 1e-5) self.assertAllClose(predicted, expected, 1e-5, 1e-5)
def test_movinet_stream_nse(self):
"""Test if the backbone can be run in streaming mode w/o SE layer."""
tf.keras.backend.set_image_data_format('channels_last')
backbone = movinet.Movinet(
model_id='a0',
causal=True,
use_external_states=True,
se_type='none',
)
inputs = tf.ones([1, 5, 128, 128, 3])
init_states = backbone.init_states(tf.shape(inputs))
expected_endpoints, _ = backbone({**init_states, 'image': inputs})
frames = tf.split(inputs, inputs.shape[1], axis=1)
states = init_states
for frame in frames:
output, states = backbone({**states, 'image': frame})
predicted_endpoints = output
predicted = predicted_endpoints['head']
# The expected final output is simply the mean across frames
expected = expected_endpoints['head']
expected = tf.reduce_mean(expected, 1, keepdims=True)
self.assertEqual(predicted.shape, expected.shape)
self.assertAllClose(predicted, expected, 1e-5, 1e-5)
# Check contents in the states dictionary.
state_keys = list(init_states.keys())
self.assertIn('state_head_pool_buffer', state_keys)
self.assertIn('state_head_pool_frame_count', state_keys)
state_keys.remove('state_head_pool_buffer')
state_keys.remove('state_head_pool_frame_count')
# From now on, there are only 'stream_buffer' for the convolutions.
for state_key in state_keys:
self.assertIn(
'stream_buffer', state_key,
msg=f'Expecting stream_buffer only, found {state_key}')
def test_movinet_2plus1d_stream(self): def test_movinet_2plus1d_stream(self):
tf.keras.backend.set_image_data_format('channels_last') tf.keras.backend.set_image_data_format('channels_last')
......
...@@ -82,6 +82,9 @@ flags.DEFINE_string( ...@@ -82,6 +82,9 @@ flags.DEFINE_string(
flags.DEFINE_string( flags.DEFINE_string(
'activation', 'swish', 'activation', 'swish',
'The main activation to use across layers.') 'The main activation to use across layers.')
flags.DEFINE_string(
'classifier_activation', 'swish',
'The classifier activation to use.')
flags.DEFINE_string( flags.DEFINE_string(
'gating_activation', 'sigmoid', 'gating_activation', 'sigmoid',
'The gating activation to use in squeeze-excitation layers.') 'The gating activation to use in squeeze-excitation layers.')
...@@ -124,11 +127,15 @@ def main(_) -> None: ...@@ -124,11 +127,15 @@ def main(_) -> None:
# states. These dimensions can be set to `None` once the model is built. # states. These dimensions can be set to `None` once the model is built.
input_shape = [1 if s is None else s for s in input_specs.shape] input_shape = [1 if s is None else s for s in input_specs.shape]
# Override swish activation implementation to remove custom gradients
activation = FLAGS.activation activation = FLAGS.activation
if activation == 'swish': if activation == 'swish':
# Override swish activation implementation to remove custom gradients
activation = 'simple_swish' activation = 'simple_swish'
classifier_activation = FLAGS.classifier_activation
if classifier_activation == 'swish':
classifier_activation = 'simple_swish'
backbone = movinet.Movinet( backbone = movinet.Movinet(
model_id=FLAGS.model_id, model_id=FLAGS.model_id,
causal=FLAGS.causal, causal=FLAGS.causal,
...@@ -145,9 +152,7 @@ def main(_) -> None: ...@@ -145,9 +152,7 @@ def main(_) -> None:
num_classes=FLAGS.num_classes, num_classes=FLAGS.num_classes,
output_states=FLAGS.causal, output_states=FLAGS.causal,
input_specs=dict(image=input_specs), input_specs=dict(image=input_specs),
# TODO(dankondratyuk): currently set to swish, but will need to activation=classifier_activation)
# re-train to use other activations.
activation='simple_swish')
model.build(input_shape) model.build(input_shape)
# Compile model to generate some internal Keras variables. # Compile model to generate some internal Keras variables.
......
...@@ -18,7 +18,7 @@ from absl import flags ...@@ -18,7 +18,7 @@ from absl import flags
import tensorflow as tf import tensorflow as tf
import tensorflow_hub as hub import tensorflow_hub as hub
from official.projects.movinet import export_saved_model from official.projects.movinet.tools import export_saved_model
FLAGS = flags.FLAGS FLAGS = flags.FLAGS
......
...@@ -145,7 +145,7 @@ class Encoder(tf.keras.layers.Layer): ...@@ -145,7 +145,7 @@ class Encoder(tf.keras.layers.Layer):
self._encoder_layers = [] self._encoder_layers = []
# Set layer norm epsilons to 1e-6 to be consistent with JAX implementation. # Set layer norm epsilons to 1e-6 to be consistent with JAX implementation.
# https://flax.readthedocs.io/en/latest/_autosummary/flax.nn.LayerNorm.html # https://flax.readthedocs.io/en/latest/_autosummary/flax.deprecated.nn.LayerNorm.html
for i in range(self._num_layers): for i in range(self._num_layers):
encoder_layer = nn_blocks.TransformerEncoderBlock( encoder_layer = nn_blocks.TransformerEncoderBlock(
inner_activation=activations.gelu, inner_activation=activations.gelu,
......
...@@ -12,3 +12,8 @@ ...@@ -12,3 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Vision package definition."""
# Lint as: python3
# pylint: disable=unused-import
from official.vision.beta import configs
from official.vision.beta import tasks
...@@ -55,6 +55,20 @@ depth, label smoothing and dropout. ...@@ -55,6 +55,20 @@ depth, label smoothing and dropout.
| ResNet-RS-350 | 256x256 | 164.3 | 83.7 | 96.7 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs350_i256.yaml) \| [ckpt](https://storage.cloud.google.com/tf_model_garden/vision/resnet-rs/resnet-rs-350-i256.tar.gz) | | ResNet-RS-350 | 256x256 | 164.3 | 83.7 | 96.7 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs350_i256.yaml) \| [ckpt](https://storage.cloud.google.com/tf_model_garden/vision/resnet-rs/resnet-rs-350-i256.tar.gz) |
| ResNet-RS-350 | 320x320 | 164.3 | 84.2 | 96.9 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs420_i256.yaml) \| [ckpt](https://storage.cloud.google.com/tf_model_garden/vision/resnet-rs/resnet-rs-350-i320.tar.gz) | | ResNet-RS-350 | 320x320 | 164.3 | 84.2 | 96.9 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/image_classification/imagenet_resnetrs420_i256.yaml) \| [ckpt](https://storage.cloud.google.com/tf_model_garden/vision/resnet-rs/resnet-rs-350-i320.tar.gz) |
#### Vision Transformer (ViT)
We support [ViT](https://arxiv.org/abs/2010.11929) and [DEIT](https://arxiv.org/abs/2012.12877) implementations in a TF
Vision
[project](https://github.com/tensorflow/models/tree/master/official/projects/vit). ViT models trained under the DEIT settings:
model | resolution | Top-1 | Top-5 |
--------- | :--------: | ----: | ----: |
ViT-s16 | 224x224 | 79.4 | 94.7 |
ViT-b16 | 224x224 | 81.8 | 95.8 |
ViT-l16 | 224x224 | 82.2 | 95.8 |
## Object Detection and Instance Segmentation ## Object Detection and Instance Segmentation
### Common Settings and Notes ### Common Settings and Notes
...@@ -123,6 +137,7 @@ evaluated on [COCO](https://cocodataset.org/) val2017. ...@@ -123,6 +137,7 @@ evaluated on [COCO](https://cocodataset.org/) val2017.
| Backbone | Resolution | Epochs | Params (M) | Box AP | Mask AP | Download | Backbone | Resolution | Epochs | Params (M) | Box AP | Mask AP | Download
------------ | :--------: | -----: | ---------: | -----: | ------: | -------: ------------ | :--------: | -----: | ---------: | -----: | ------: | -------:
| SpineNet-49 | 640x640 | 500 | 56.4 | 46.4 | 40.0 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/maskrcnn/coco_spinenet49_cascadercnn_tpu.yaml)| | SpineNet-49 | 640x640 | 500 | 56.4 | 46.4 | 40.0 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/maskrcnn/coco_spinenet49_cascadercnn_tpu.yaml)|
| SpineNet-96 | 1024x1024 | 500 | 70.8 | 50.9 | 43.8 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/maskrcnn/coco_spinenet96_cascadercnn_tpu.yaml)|
| SpineNet-143 | 1280x1280 | 500 | 94.9 | 51.9 | 45.0 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/maskrcnn/coco_spinenet143_cascadercnn_tpu.yaml)| | SpineNet-143 | 1280x1280 | 500 | 94.9 | 51.9 | 45.0 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/maskrcnn/coco_spinenet143_cascadercnn_tpu.yaml)|
## Semantic Segmentation ## Semantic Segmentation
......
# MobileNetV3Small ImageNet classification. 67.5% top-1 and 87.6% top-5 accuracy.
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
task:
model:
num_classes: 1001
input_size: [224, 224, 3]
backbone:
type: 'mobilenet'
mobilenet:
model_id: 'MobileNetV3Small'
filter_size_scale: 1.0
norm_activation:
activation: 'relu'
norm_momentum: 0.997
norm_epsilon: 0.001
use_sync_bn: false
dropout_rate: 0.2
losses:
l2_weight_decay: 0.00001
one_hot: true
label_smoothing: 0.1
train_data:
input_path: 'imagenet-2012-tfrecord/train*'
is_training: true
global_batch_size: 4096
dtype: 'bfloat16'
validation_data:
input_path: 'imagenet-2012-tfrecord/valid*'
is_training: false
global_batch_size: 4096
dtype: 'bfloat16'
drop_remainder: false
trainer:
train_steps: 312000 # 1000 epochs
validation_steps: 12
validation_interval: 312
steps_per_loop: 312 # NUM_EXAMPLES (1281167) // global_batch_size
summary_interval: 312
checkpoint_interval: 312
optimizer_config:
optimizer:
type: 'rmsprop'
rmsprop:
rho: 0.9
momentum: 0.9
epsilon: 0.002
learning_rate:
type: 'exponential'
exponential:
initial_learning_rate: 0.01
decay_steps: 936 # 3 * steps_per_epoch
decay_rate: 0.99
staircase: true
ema:
average_decay: 0.9999
trainable_weights_only: false
warmup:
type: 'linear'
linear:
warmup_steps: 1560
warmup_learning_rate: 0.001
# --experiment_type=cascadercnn_spinenet_coco
# Expect to reach: box mAP: 51.9%, mask mAP: 45.0% on COCO
runtime: runtime:
distribution_strategy: 'tpu' distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16' mixed_precision_dtype: 'bfloat16'
...@@ -8,12 +10,12 @@ task: ...@@ -8,12 +10,12 @@ task:
parser: parser:
aug_rand_hflip: true aug_rand_hflip: true
aug_scale_min: 0.1 aug_scale_min: 0.1
aug_scale_max: 2.0 aug_scale_max: 2.5
losses: losses:
l2_weight_decay: 0.00004 l2_weight_decay: 0.00004
model: model:
anchor: anchor:
anchor_size: 3.0 anchor_size: 4.0
num_scales: 3 num_scales: 3
min_level: 3 min_level: 3
max_level: 7 max_level: 7
......
...@@ -714,7 +714,8 @@ class InvertedBottleneckBlock(tf.keras.layers.Layer): ...@@ -714,7 +714,8 @@ class InvertedBottleneckBlock(tf.keras.layers.Layer):
'use_depthwise': self._use_depthwise, 'use_depthwise': self._use_depthwise,
'use_residual': self._use_residual, 'use_residual': self._use_residual,
'norm_momentum': self._norm_momentum, 'norm_momentum': self._norm_momentum,
'norm_epsilon': self._norm_epsilon 'norm_epsilon': self._norm_epsilon,
'output_intermediate_endpoints': self._output_intermediate_endpoints
} }
base_config = super(InvertedBottleneckBlock, self).get_config() base_config = super(InvertedBottleneckBlock, self).get_config()
return dict(list(base_config.items()) + list(config.items())) return dict(list(base_config.items()) + list(config.items()))
......
...@@ -2284,8 +2284,9 @@ class MixupAndCutmix: ...@@ -2284,8 +2284,9 @@ class MixupAndCutmix:
lambda x: _fill_rectangle(*x), lambda x: _fill_rectangle(*x),
(images, random_center_width, random_center_height, cut_width // 2, (images, random_center_width, random_center_height, cut_width // 2,
cut_height // 2, tf.reverse(images, [0])), cut_height // 2, tf.reverse(images, [0])),
dtype=(tf.float32, tf.int32, tf.int32, tf.int32, tf.int32, tf.float32), dtype=(
fn_output_signature=tf.TensorSpec(images.shape[1:], dtype=tf.float32)) images.dtype, tf.int32, tf.int32, tf.int32, tf.int32, images.dtype),
fn_output_signature=tf.TensorSpec(images.shape[1:], dtype=images.dtype))
return images, labels, lam return images, labels, lam
...@@ -2294,7 +2295,8 @@ class MixupAndCutmix: ...@@ -2294,7 +2295,8 @@ class MixupAndCutmix:
lam = MixupAndCutmix._sample_from_beta(self.mixup_alpha, self.mixup_alpha, lam = MixupAndCutmix._sample_from_beta(self.mixup_alpha, self.mixup_alpha,
labels.shape) labels.shape)
lam = tf.reshape(lam, [-1, 1, 1, 1]) lam = tf.reshape(lam, [-1, 1, 1, 1])
images = lam * images + (1. - lam) * tf.reverse(images, [0]) lam_cast = tf.cast(lam, dtype=images.dtype)
images = lam_cast * images + (1. - lam_cast) * tf.reverse(images, [0])
return images, labels, tf.squeeze(lam) return images, labels, tf.squeeze(lam)
......
...@@ -366,14 +366,19 @@ class RandomErasingTest(tf.test.TestCase, parameterized.TestCase): ...@@ -366,14 +366,19 @@ class RandomErasingTest(tf.test.TestCase, parameterized.TestCase):
self.assertNotEqual(0, tf.reduce_max(aug_image)) self.assertNotEqual(0, tf.reduce_max(aug_image))
class MixupAndCutmixTest(tf.test.TestCase, parameterized.TestCase): @parameterized.named_parameters([
('float16_images', tf.float16),
def test_mixup_and_cutmix_smoothes_labels(self): ('bfloat16_images', tf.bfloat16),
('float32_images', tf.float32),
])
class MixupAndCutmixTest(parameterized.TestCase, tf.test.TestCase):
def test_mixup_and_cutmix_smoothes_labels(self, image_dtype):
batch_size = 12 batch_size = 12
num_classes = 1000 num_classes = 1000
label_smoothing = 0.1 label_smoothing = 0.1
images = tf.random.normal((batch_size, 224, 224, 3), dtype=tf.float32) images = tf.random.normal((batch_size, 224, 224, 3), dtype=image_dtype)
labels = tf.range(batch_size) labels = tf.range(batch_size)
augmenter = augment.MixupAndCutmix( augmenter = augment.MixupAndCutmix(
num_classes=num_classes, label_smoothing=label_smoothing) num_classes=num_classes, label_smoothing=label_smoothing)
...@@ -388,12 +393,12 @@ class MixupAndCutmixTest(tf.test.TestCase, parameterized.TestCase): ...@@ -388,12 +393,12 @@ class MixupAndCutmixTest(tf.test.TestCase, parameterized.TestCase):
self.assertAllGreaterEqual(aug_labels, label_smoothing / num_classes - self.assertAllGreaterEqual(aug_labels, label_smoothing / num_classes -
1e4) # With tolerance 1e4) # With tolerance
def test_mixup_changes_image(self): def test_mixup_changes_image(self, image_dtype):
batch_size = 12 batch_size = 12
num_classes = 1000 num_classes = 1000
label_smoothing = 0.1 label_smoothing = 0.1
images = tf.random.normal((batch_size, 224, 224, 3), dtype=tf.float32) images = tf.random.normal((batch_size, 224, 224, 3), dtype=image_dtype)
labels = tf.range(batch_size) labels = tf.range(batch_size)
augmenter = augment.MixupAndCutmix( augmenter = augment.MixupAndCutmix(
mixup_alpha=1., cutmix_alpha=0., num_classes=num_classes) mixup_alpha=1., cutmix_alpha=0., num_classes=num_classes)
...@@ -409,12 +414,12 @@ class MixupAndCutmixTest(tf.test.TestCase, parameterized.TestCase): ...@@ -409,12 +414,12 @@ class MixupAndCutmixTest(tf.test.TestCase, parameterized.TestCase):
1e4) # With tolerance 1e4) # With tolerance
self.assertFalse(tf.math.reduce_all(images == aug_images)) self.assertFalse(tf.math.reduce_all(images == aug_images))
def test_cutmix_changes_image(self): def test_cutmix_changes_image(self, image_dtype):
batch_size = 12 batch_size = 12
num_classes = 1000 num_classes = 1000
label_smoothing = 0.1 label_smoothing = 0.1
images = tf.random.normal((batch_size, 224, 224, 3), dtype=tf.float32) images = tf.random.normal((batch_size, 224, 224, 3), dtype=image_dtype)
labels = tf.range(batch_size) labels = tf.range(batch_size)
augmenter = augment.MixupAndCutmix( augmenter = augment.MixupAndCutmix(
mixup_alpha=0., cutmix_alpha=1., num_classes=num_classes) mixup_alpha=0., cutmix_alpha=1., num_classes=num_classes)
......
...@@ -25,6 +25,7 @@ from official.modeling import optimization ...@@ -25,6 +25,7 @@ from official.modeling import optimization
from official.vision.beta.configs import common from official.vision.beta.configs import common
from official.vision.beta.configs import maskrcnn from official.vision.beta.configs import maskrcnn
from official.vision.beta.configs import semantic_segmentation from official.vision.beta.configs import semantic_segmentation
from official.vision.beta.projects.deepmac_maskrcnn.configs import deep_mask_head_rcnn as deepmac_maskrcnn
SEGMENTATION_MODEL = semantic_segmentation.SemanticSegmentationModel SEGMENTATION_MODEL = semantic_segmentation.SemanticSegmentationModel
...@@ -89,7 +90,7 @@ class PanopticSegmentationGenerator(hyperparams.Config): ...@@ -89,7 +90,7 @@ class PanopticSegmentationGenerator(hyperparams.Config):
@dataclasses.dataclass @dataclasses.dataclass
class PanopticMaskRCNN(maskrcnn.MaskRCNN): class PanopticMaskRCNN(deepmac_maskrcnn.DeepMaskHeadRCNN):
"""Panoptic Mask R-CNN model config.""" """Panoptic Mask R-CNN model config."""
segmentation_model: semantic_segmentation.SemanticSegmentationModel = ( segmentation_model: semantic_segmentation.SemanticSegmentationModel = (
SEGMENTATION_MODEL(num_classes=2)) SEGMENTATION_MODEL(num_classes=2))
......
...@@ -17,10 +17,10 @@ ...@@ -17,10 +17,10 @@
import tensorflow as tf import tensorflow as tf
from official.vision.beta.modeling import backbones from official.vision.beta.modeling import backbones
from official.vision.beta.modeling import factory as models_factory
from official.vision.beta.modeling.decoders import factory as decoder_factory from official.vision.beta.modeling.decoders import factory as decoder_factory
from official.vision.beta.modeling.heads import segmentation_heads from official.vision.beta.modeling.heads import segmentation_heads
from official.vision.beta.projects.panoptic_maskrcnn.configs import panoptic_deeplab as panoptic_deeplab_cfg from official.vision.beta.projects.panoptic_maskrcnn.configs import panoptic_deeplab as panoptic_deeplab_cfg
from official.vision.beta.projects.deepmac_maskrcnn.tasks import deep_mask_head_rcnn
from official.vision.beta.projects.panoptic_maskrcnn.configs import panoptic_maskrcnn as panoptic_maskrcnn_cfg from official.vision.beta.projects.panoptic_maskrcnn.configs import panoptic_maskrcnn as panoptic_maskrcnn_cfg
from official.vision.beta.projects.panoptic_maskrcnn.modeling import panoptic_deeplab_model from official.vision.beta.projects.panoptic_maskrcnn.modeling import panoptic_deeplab_model
from official.vision.beta.projects.panoptic_maskrcnn.modeling.heads import panoptic_deeplab_heads from official.vision.beta.projects.panoptic_maskrcnn.modeling.heads import panoptic_deeplab_heads
...@@ -50,7 +50,7 @@ def build_panoptic_maskrcnn( ...@@ -50,7 +50,7 @@ def build_panoptic_maskrcnn(
segmentation_config = model_config.segmentation_model segmentation_config = model_config.segmentation_model
# Builds the maskrcnn model. # Builds the maskrcnn model.
maskrcnn_model = models_factory.build_maskrcnn( maskrcnn_model = deep_mask_head_rcnn.build_maskrcnn(
input_specs=input_specs, input_specs=input_specs,
model_config=model_config, model_config=model_config,
l2_regularizer=l2_regularizer) l2_regularizer=l2_regularizer)
...@@ -120,6 +120,7 @@ def build_panoptic_maskrcnn( ...@@ -120,6 +120,7 @@ def build_panoptic_maskrcnn(
# Combines maskrcnn, and segmentation models to build panoptic segmentation # Combines maskrcnn, and segmentation models to build panoptic segmentation
# model. # model.
model = panoptic_maskrcnn_model.PanopticMaskRCNNModel( model = panoptic_maskrcnn_model.PanopticMaskRCNNModel(
backbone=maskrcnn_model.backbone, backbone=maskrcnn_model.backbone,
decoder=maskrcnn_model.decoder, decoder=maskrcnn_model.decoder,
......
...@@ -18,10 +18,10 @@ from typing import List, Mapping, Optional, Union ...@@ -18,10 +18,10 @@ from typing import List, Mapping, Optional, Union
import tensorflow as tf import tensorflow as tf
from official.vision.beta.modeling import maskrcnn_model from official.vision.beta.projects.deepmac_maskrcnn.modeling import maskrcnn_model
class PanopticMaskRCNNModel(maskrcnn_model.MaskRCNNModel): class PanopticMaskRCNNModel(maskrcnn_model.DeepMaskRCNNModel):
"""The Panoptic Segmentation model.""" """The Panoptic Segmentation model."""
def __init__( def __init__(
...@@ -49,7 +49,8 @@ class PanopticMaskRCNNModel(maskrcnn_model.MaskRCNNModel): ...@@ -49,7 +49,8 @@ class PanopticMaskRCNNModel(maskrcnn_model.MaskRCNNModel):
max_level: Optional[int] = None, max_level: Optional[int] = None,
num_scales: Optional[int] = None, num_scales: Optional[int] = None,
aspect_ratios: Optional[List[float]] = None, aspect_ratios: Optional[List[float]] = None,
anchor_size: Optional[float] = None, # pytype: disable=annotation-type-mismatch # typed-keras anchor_size: Optional[float] = None,
use_gt_boxes_for_masks: bool = False, # pytype: disable=annotation-type-mismatch # typed-keras
**kwargs): **kwargs):
"""Initializes the Panoptic Mask R-CNN model. """Initializes the Panoptic Mask R-CNN model.
...@@ -94,6 +95,7 @@ class PanopticMaskRCNNModel(maskrcnn_model.MaskRCNNModel): ...@@ -94,6 +95,7 @@ class PanopticMaskRCNNModel(maskrcnn_model.MaskRCNNModel):
aspect_ratios=[1.0, 2.0, 0.5] adds three anchors on each scale level. aspect_ratios=[1.0, 2.0, 0.5] adds three anchors on each scale level.
anchor_size: A number representing the scale of size of the base anchor to anchor_size: A number representing the scale of size of the base anchor to
the feature stride 2^level. the feature stride 2^level.
use_gt_boxes_for_masks: `bool`, whether to use only gt boxes for masks.
**kwargs: keyword arguments to be passed. **kwargs: keyword arguments to be passed.
""" """
super(PanopticMaskRCNNModel, self).__init__( super(PanopticMaskRCNNModel, self).__init__(
...@@ -115,6 +117,7 @@ class PanopticMaskRCNNModel(maskrcnn_model.MaskRCNNModel): ...@@ -115,6 +117,7 @@ class PanopticMaskRCNNModel(maskrcnn_model.MaskRCNNModel):
num_scales=num_scales, num_scales=num_scales,
aspect_ratios=aspect_ratios, aspect_ratios=aspect_ratios,
anchor_size=anchor_size, anchor_size=anchor_size,
use_gt_boxes_for_masks=use_gt_boxes_for_masks,
**kwargs) **kwargs)
self._config_dict.update({ self._config_dict.update({
......
...@@ -97,6 +97,20 @@ class PanopticSegmentationModule(detection.DetectionModule): ...@@ -97,6 +97,20 @@ class PanopticSegmentationModule(detection.DetectionModule):
anchor_boxes=anchor_boxes, anchor_boxes=anchor_boxes,
training=False) training=False)
detections.pop('rpn_boxes')
detections.pop('rpn_scores')
detections.pop('cls_outputs')
detections.pop('box_outputs')
detections.pop('backbone_features')
detections.pop('decoder_features')
# Normalize detection boxes to [0, 1]. Here we first map them to the
# original image size, then normalize them to [0, 1].
detections['detection_boxes'] = (
detections['detection_boxes'] /
tf.tile(image_info[:, 2:3, :], [1, 1, 2]) /
tf.tile(image_info[:, 0:1, :], [1, 1, 2]))
if model_params.detection_generator.apply_nms: if model_params.detection_generator.apply_nms:
final_outputs = { final_outputs = {
'detection_boxes': detections['detection_boxes'], 'detection_boxes': detections['detection_boxes'],
...@@ -109,10 +123,15 @@ class PanopticSegmentationModule(detection.DetectionModule): ...@@ -109,10 +123,15 @@ class PanopticSegmentationModule(detection.DetectionModule):
'decoded_boxes': detections['decoded_boxes'], 'decoded_boxes': detections['decoded_boxes'],
'decoded_box_scores': detections['decoded_box_scores'] 'decoded_box_scores': detections['decoded_box_scores']
} }
masks = detections['segmentation_outputs']
masks = tf.image.resize(masks, self._input_image_size, method='bilinear')
classes = tf.math.argmax(masks, axis=-1)
scores = tf.nn.softmax(masks, axis=-1)
final_outputs.update({ final_outputs.update({
'detection_masks': detections['detection_masks'], 'detection_masks': detections['detection_masks'],
'segmentation_outputs': detections['segmentation_outputs'], 'masks': masks,
'scores': scores,
'classes': classes,
'image_info': image_info 'image_info': image_info
}) })
if model_params.generate_panoptic_masks: if model_params.generate_panoptic_masks:
......
...@@ -61,7 +61,7 @@ class PanopticMaskRCNNTask(maskrcnn.MaskRCNNTask): ...@@ -61,7 +61,7 @@ class PanopticMaskRCNNTask(maskrcnn.MaskRCNNTask):
def initialize(self, model: tf.keras.Model) -> None: def initialize(self, model: tf.keras.Model) -> None:
"""Loading pretrained checkpoint.""" """Loading pretrained checkpoint."""
if not self.task_config.init_checkpoint_modules: if not self.task_config.init_checkpoint:
return return
def _get_checkpoint_path(checkpoint_dir_or_file): def _get_checkpoint_path(checkpoint_dir_or_file):
......
...@@ -34,7 +34,7 @@ import PIL.ImageFont as ImageFont ...@@ -34,7 +34,7 @@ import PIL.ImageFont as ImageFont
import six import six
import tensorflow as tf import tensorflow as tf
from official.vision.beta.ops import box_ops from official.vision.ops import box_ops
from official.vision.utils.object_detection import shape_utils from official.vision.utils.object_detection import shape_utils
_TITLE_LEFT_MARGIN = 10 _TITLE_LEFT_MARGIN = 10
......
...@@ -1050,6 +1050,8 @@ class CenterNetCenterHeatmapTargetAssigner(object): ...@@ -1050,6 +1050,8 @@ class CenterNetCenterHeatmapTargetAssigner(object):
else: else:
raise ValueError(f'Unknown heatmap type - {self._box_heatmap_type}') raise ValueError(f'Unknown heatmap type - {self._box_heatmap_type}')
heatmap = tf.stop_gradient(heatmap)
heatmaps.append(heatmap) heatmaps.append(heatmap)
# Return the stacked heatmaps over the batch. # Return the stacked heatmaps over the batch.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment