Unverified Commit 5ffcc5b6 authored by Anirudh Vegesana's avatar Anirudh Vegesana Committed by GitHub
Browse files

Merge branch 'purdue-yolo' into detection_generator_pr

parents 0b81a843 76e0c014
...@@ -53,6 +53,18 @@ flags.DEFINE_string( ...@@ -53,6 +53,18 @@ flags.DEFINE_string(
'3x3 followed by 5x1 conv). 3d_2plus1d uses (2+1)D convolution with ' '3x3 followed by 5x1 conv). 3d_2plus1d uses (2+1)D convolution with '
'Conv3D and no 2D reshaping (e.g., a 5x3x3 kernel becomes 1x3x3 ' 'Conv3D and no 2D reshaping (e.g., a 5x3x3 kernel becomes 1x3x3 '
'followed by 5x1x1 conv).') 'followed by 5x1x1 conv).')
flags.DEFINE_string(
'se_type', '3d',
'3d, 2d, or 2plus3d. 3d uses the default 3D spatiotemporal global average'
'pooling for squeeze excitation. 2d uses 2D spatial global average pooling '
'on each frame. 2plus3d concatenates both 3D and 2D global average '
'pooling.')
flags.DEFINE_string(
'activation', 'swish',
'The main activation to use across layers.')
flags.DEFINE_string(
'gating_activation', 'sigmoid',
'The gating activation to use in squeeze-excitation layers.')
flags.DEFINE_bool( flags.DEFINE_bool(
'use_positional_encoding', False, 'use_positional_encoding', False,
'Whether to use positional encoding (only applied when causal=True).') 'Whether to use positional encoding (only applied when causal=True).')
...@@ -94,6 +106,9 @@ def main(_) -> None: ...@@ -94,6 +106,9 @@ def main(_) -> None:
conv_type=FLAGS.conv_type, conv_type=FLAGS.conv_type,
use_external_states=FLAGS.causal, use_external_states=FLAGS.causal,
input_specs=input_specs, input_specs=input_specs,
activation=FLAGS.activation,
gating_activation=FLAGS.gating_activation,
se_type=FLAGS.se_type,
use_positional_encoding=FLAGS.use_positional_encoding) use_positional_encoding=FLAGS.use_positional_encoding)
model = movinet_model.MovinetClassifier( model = movinet_model.MovinetClassifier(
backbone, backbone,
......
...@@ -307,8 +307,10 @@ class Movinet(tf.keras.Model): ...@@ -307,8 +307,10 @@ class Movinet(tf.keras.Model):
causal: bool = False, causal: bool = False,
use_positional_encoding: bool = False, use_positional_encoding: bool = False,
conv_type: str = '3d', conv_type: str = '3d',
se_type: str = '3d',
input_specs: Optional[tf.keras.layers.InputSpec] = None, input_specs: Optional[tf.keras.layers.InputSpec] = None,
activation: str = 'swish', activation: str = 'swish',
gating_activation: str = 'sigmoid',
use_sync_bn: bool = True, use_sync_bn: bool = True,
norm_momentum: float = 0.99, norm_momentum: float = 0.99,
norm_epsilon: float = 0.001, norm_epsilon: float = 0.001,
...@@ -332,8 +334,13 @@ class Movinet(tf.keras.Model): ...@@ -332,8 +334,13 @@ class Movinet(tf.keras.Model):
3x3 followed by 5x1 conv). '3d_2plus1d' uses (2+1)D convolution with 3x3 followed by 5x1 conv). '3d_2plus1d' uses (2+1)D convolution with
Conv3D and no 2D reshaping (e.g., a 5x3x3 kernel becomes 1x3x3 followed Conv3D and no 2D reshaping (e.g., a 5x3x3 kernel becomes 1x3x3 followed
by 5x1x1 conv). by 5x1x1 conv).
se_type: '3d', '2d', or '2plus3d'. '3d' uses the default 3D
spatiotemporal global average pooling for squeeze excitation. '2d'
uses 2D spatial global average pooling on each frame. '2plus3d'
concatenates both 3D and 2D global average pooling.
input_specs: the model input spec to use. input_specs: the model input spec to use.
activation: name of the activation function. activation: name of the main activation function.
gating_activation: gating activation to use in squeeze excitation layers.
use_sync_bn: if True, use synchronized batch normalization. use_sync_bn: if True, use synchronized batch normalization.
norm_momentum: normalization momentum for the moving average. norm_momentum: normalization momentum for the moving average.
norm_epsilon: small float added to variance to avoid dividing by norm_epsilon: small float added to variance to avoid dividing by
...@@ -354,15 +361,19 @@ class Movinet(tf.keras.Model): ...@@ -354,15 +361,19 @@ class Movinet(tf.keras.Model):
if conv_type not in ('3d', '2plus1d', '3d_2plus1d'): if conv_type not in ('3d', '2plus1d', '3d_2plus1d'):
raise ValueError('Unknown conv type: {}'.format(conv_type)) raise ValueError('Unknown conv type: {}'.format(conv_type))
if se_type not in ('3d', '2d', '2plus3d'):
raise ValueError('Unknown squeeze excitation type: {}'.format(se_type))
self._model_id = model_id self._model_id = model_id
self._block_specs = block_specs self._block_specs = block_specs
self._causal = causal self._causal = causal
self._use_positional_encoding = use_positional_encoding self._use_positional_encoding = use_positional_encoding
self._conv_type = conv_type self._conv_type = conv_type
self._se_type = se_type
self._input_specs = input_specs self._input_specs = input_specs
self._use_sync_bn = use_sync_bn self._use_sync_bn = use_sync_bn
self._activation = activation self._activation = activation
self._gating_activation = gating_activation
self._norm_momentum = norm_momentum self._norm_momentum = norm_momentum
self._norm_epsilon = norm_epsilon self._norm_epsilon = norm_epsilon
if use_sync_bn: if use_sync_bn:
...@@ -475,10 +486,12 @@ class Movinet(tf.keras.Model): ...@@ -475,10 +486,12 @@ class Movinet(tf.keras.Model):
strides=strides, strides=strides,
causal=self._causal, causal=self._causal,
activation=self._activation, activation=self._activation,
gating_activation=self._gating_activation,
stochastic_depth_drop_rate=stochastic_depth_drop_rate, stochastic_depth_drop_rate=stochastic_depth_drop_rate,
conv_type=self._conv_type, conv_type=self._conv_type,
use_positional_encoding=self._use_positional_encoding and se_type=self._se_type,
self._causal, use_positional_encoding=
self._use_positional_encoding and self._causal,
kernel_initializer=self._kernel_initializer, kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer, kernel_regularizer=self._kernel_regularizer,
batch_norm_layer=self._norm, batch_norm_layer=self._norm,
...@@ -691,8 +704,10 @@ def build_movinet( ...@@ -691,8 +704,10 @@ def build_movinet(
causal=backbone_cfg.causal, causal=backbone_cfg.causal,
use_positional_encoding=backbone_cfg.use_positional_encoding, use_positional_encoding=backbone_cfg.use_positional_encoding,
conv_type=backbone_cfg.conv_type, conv_type=backbone_cfg.conv_type,
se_type=backbone_cfg.se_type,
input_specs=input_specs, input_specs=input_specs,
activation=norm_activation_config.activation, activation=backbone_cfg.activation,
gating_activation=backbone_cfg.gating_activation,
use_sync_bn=norm_activation_config.use_sync_bn, use_sync_bn=norm_activation_config.use_sync_bn,
norm_momentum=norm_activation_config.norm_momentum, norm_momentum=norm_activation_config.norm_momentum,
norm_epsilon=norm_activation_config.norm_epsilon, norm_epsilon=norm_activation_config.norm_epsilon,
......
...@@ -314,6 +314,43 @@ class MovinetLayersTest(parameterized.TestCase, tf.test.TestCase): ...@@ -314,6 +314,43 @@ class MovinetLayersTest(parameterized.TestCase, tf.test.TestCase):
[[4., 4., 4.]]]]], [[4., 4., 4.]]]]],
1e-5, 1e-5) 1e-5, 1e-5)
def test_stream_squeeze_excitation_2plus3d(self):
se = movinet_layers.StreamSqueezeExcitation(
3,
se_type='2plus3d',
causal=True,
activation='hard_swish',
gating_activation='hard_sigmoid',
kernel_initializer='ones')
inputs = tf.range(4, dtype=tf.float32) + 1.
inputs = tf.reshape(inputs, [1, 4, 1, 1, 1])
inputs = tf.tile(inputs, [1, 1, 2, 1, 3])
expected, _ = se(inputs)
for num_splits in [1, 2, 4]:
frames = tf.split(inputs, inputs.shape[1] // num_splits, axis=1)
states = {}
predicted = []
for frame in frames:
x, states = se(frame, states=states)
predicted.append(x)
predicted = tf.concat(predicted, axis=1)
self.assertEqual(predicted.shape, expected.shape)
self.assertAllClose(predicted, expected)
self.assertAllClose(
predicted,
[[[[[1., 1., 1.]],
[[1., 1., 1.]]],
[[[2., 2., 2.]],
[[2., 2., 2.]]],
[[[3., 3., 3.]],
[[3., 3., 3.]]],
[[[4., 4., 4.]],
[[4., 4., 4.]]]]])
def test_stream_movinet_block(self): def test_stream_movinet_block(self):
block = movinet_layers.MovinetBlock( block = movinet_layers.MovinetBlock(
out_filters=3, out_filters=3,
......
...@@ -46,6 +46,7 @@ from official.modeling import performance ...@@ -46,6 +46,7 @@ from official.modeling import performance
# Import movinet libraries to register the backbone and model into tf.vision # Import movinet libraries to register the backbone and model into tf.vision
# model garden factory. # model garden factory.
# pylint: disable=unused-import # pylint: disable=unused-import
# the followings are the necessary imports.
from official.vision.beta.projects.movinet.modeling import movinet from official.vision.beta.projects.movinet.modeling import movinet
from official.vision.beta.projects.movinet.modeling import movinet_model from official.vision.beta.projects.movinet.modeling import movinet_model
# pylint: enable=unused-import # pylint: enable=unused-import
......
# Panoptic Segmentation
## Description
Panoptic Segmentation combines the two distinct vision tasks - semantic
segmentation and instance segmentation. These tasks are unified such that, each
pixel in the image is assigned the label of the class it belongs to, and also
the instance identifier of the object it a part of.
## Environment setup
The code can be run on multiple GPUs or TPUs with different distribution
strategies. See the TensorFlow distributed training
[guide](https://www.tensorflow.org/guide/distributed_training) for an overview
of `tf.distribute`.
The code is compatible with TensorFlow 2.4+. See requirements.txt for all
prerequisites, and you can also install them using the following command. `pip
install -r ./official/requirements.txt`
**DISCLAIMER**: Panoptic MaskRCNN is still under active development, stay tuned!
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment