Merge branch 'purdue-yolo' into detection_generator_pr

5ffcc5b6 · Anirudh Vegesana · GitHub · 0b81a843 · 76e0c014 · 5ffcc5b6
Unverified Commit 5ffcc5b6 authored Jul 21, 2021 by Anirudh Vegesana Committed by GitHub Jul 21, 2021
20 changed files
--- a/official/vision/beta/projects/movinet/export_saved_model.py
+++ b/official/vision/beta/projects/movinet/export_saved_model.py
@@ -53,6 +53,18 @@ flags.DEFINE_string(
    '3x3 followed by 5x1 conv). 3d_2plus1d uses (2+1)D convolution with '
    'Conv3D and no 2D reshaping (e.g., a 5x3x3 kernel becomes 1x3x3 '
    'followed by 5x1x1 conv).')
+flags.DEFINE_string(
+    'se_type', '3d',
+    '3d, 2d, or 2plus3d. 3d uses the default 3D spatiotemporal global average'
+    'pooling for squeeze excitation. 2d uses 2D spatial global average pooling '
+    'on each frame. 2plus3d concatenates both 3D and 2D global average '
+    'pooling.')
+flags.DEFINE_string(
+    'activation', 'swish',
+    'The main activation to use across layers.')
+flags.DEFINE_string(
+    'gating_activation', 'sigmoid',
+    'The gating activation to use in squeeze-excitation layers.')
 flags.DEFINE_bool(
    'use_positional_encoding', False,
    'Whether to use positional encoding (only applied when causal=True).')
@@ -94,6 +106,9 @@ def main(_) -> None:
      conv_type=FLAGS.conv_type,
      use_external_states=FLAGS.causal,
      input_specs=input_specs,
+      activation=FLAGS.activation,
+      gating_activation=FLAGS.gating_activation,
+      se_type=FLAGS.se_type,
      use_positional_encoding=FLAGS.use_positional_encoding)
  model = movinet_model.MovinetClassifier(
      backbone,

--- a/official/vision/beta/projects/movinet/modeling/movinet.py
+++ b/official/vision/beta/projects/movinet/modeling/movinet.py
@@ -307,8 +307,10 @@ class Movinet(tf.keras.Model):
               causal: bool = False,
               use_positional_encoding: bool = False,
               conv_type: str = '3d',
+               se_type: str = '3d',
               input_specs: Optional[tf.keras.layers.InputSpec] = None,
               activation: str = 'swish',
+               gating_activation: str = 'sigmoid',
               use_sync_bn: bool = True,
               norm_momentum: float = 0.99,
               norm_epsilon: float = 0.001,
@@ -332,8 +334,13 @@ class Movinet(tf.keras.Model):
        3x3 followed by 5x1 conv). '3d_2plus1d' uses (2+1)D convolution with
        Conv3D and no 2D reshaping (e.g., a 5x3x3 kernel becomes 1x3x3 followed
        by 5x1x1 conv).
+      se_type: '3d', '2d', or '2plus3d'. '3d' uses the default 3D
+          spatiotemporal global average pooling for squeeze excitation. '2d'
+          uses 2D spatial global average pooling  on each frame. '2plus3d'
+          concatenates both 3D and 2D global average pooling.
      input_specs: the model input spec to use.
-      activation: name of the activation function.
+      activation: name of the main activation function.
+      gating_activation: gating activation to use in squeeze excitation layers.
      use_sync_bn: if True, use synchronized batch normalization.
      norm_momentum: normalization momentum for the moving average.
      norm_epsilon: small float added to variance to avoid dividing by
@@ -354,15 +361,19 @@ class Movinet(tf.keras.Model):
    if conv_type not in ('3d', '2plus1d', '3d_2plus1d'):
      raise ValueError('Unknown conv type: {}'.format(conv_type))
+    if se_type not in ('3d', '2d', '2plus3d'):
+      raise ValueError('Unknown squeeze excitation type: {}'.format(se_type))
    self._model_id = model_id
    self._block_specs = block_specs
    self._causal = causal
    self._use_positional_encoding = use_positional_encoding
    self._conv_type = conv_type
+    self._se_type = se_type
    self._input_specs = input_specs
    self._use_sync_bn = use_sync_bn
    self._activation = activation
+    self._gating_activation = gating_activation
    self._norm_momentum = norm_momentum
    self._norm_epsilon = norm_epsilon
    if use_sync_bn:
@@ -475,10 +486,12 @@ class Movinet(tf.keras.Model):
              strides=strides,
              causal=self._causal,
              activation=self._activation,
+              gating_activation=self._gating_activation,
              stochastic_depth_drop_rate=stochastic_depth_drop_rate,
              conv_type=self._conv_type,
-              use_positional_encoding=self._use_positional_encoding and
+              se_type=self._se_type,
-              self._causal,
+              use_positional_encoding=
+              self._use_positional_encoding and self._causal,
              kernel_initializer=self._kernel_initializer,
              kernel_regularizer=self._kernel_regularizer,
              batch_norm_layer=self._norm,
@@ -691,8 +704,10 @@ def build_movinet(
      causal=backbone_cfg.causal,
      use_positional_encoding=backbone_cfg.use_positional_encoding,
      conv_type=backbone_cfg.conv_type,
+      se_type=backbone_cfg.se_type,
      input_specs=input_specs,
-      activation=norm_activation_config.activation,
+      activation=backbone_cfg.activation,
+      gating_activation=backbone_cfg.gating_activation,
      use_sync_bn=norm_activation_config.use_sync_bn,
      norm_momentum=norm_activation_config.norm_momentum,
      norm_epsilon=norm_activation_config.norm_epsilon,

--- a/official/vision/beta/projects/movinet/modeling/movinet_layers.py
+++ b/official/vision/beta/projects/movinet/modeling/movinet_layers.py
--- a/official/vision/beta/projects/movinet/modeling/movinet_layers_test.py
+++ b/official/vision/beta/projects/movinet/modeling/movinet_layers_test.py
@@ -314,6 +314,43 @@ class MovinetLayersTest(parameterized.TestCase, tf.test.TestCase):
             [[4., 4., 4.]]]]],
          1e-5, 1e-5)
+  def test_stream_squeeze_excitation_2plus3d(self):
+    se = movinet_layers.StreamSqueezeExcitation(
+        3,
+        se_type='2plus3d',
+        causal=True,
+        activation='hard_swish',
+        gating_activation='hard_sigmoid',
+        kernel_initializer='ones')
+    inputs = tf.range(4, dtype=tf.float32) + 1.
+    inputs = tf.reshape(inputs, [1, 4, 1, 1, 1])
+    inputs = tf.tile(inputs, [1, 1, 2, 1, 3])
+    expected, _ = se(inputs)
+    for num_splits in [1, 2, 4]:
+      frames = tf.split(inputs, inputs.shape[1] // num_splits, axis=1)
+      states = {}
+      predicted = []
+      for frame in frames:
+        x, states = se(frame, states=states)
+        predicted.append(x)
+      predicted = tf.concat(predicted, axis=1)
+      self.assertEqual(predicted.shape, expected.shape)
+      self.assertAllClose(predicted, expected)
+      self.assertAllClose(
+          predicted,
+          [[[[[1., 1., 1.]],
+             [[1., 1., 1.]]],
+            [[[2., 2., 2.]],
+             [[2., 2., 2.]]],
+            [[[3., 3., 3.]],
+             [[3., 3., 3.]]],
+            [[[4., 4., 4.]],
+             [[4., 4., 4.]]]]])
  def test_stream_movinet_block(self):
    block = movinet_layers.MovinetBlock(
        out_filters=3,

--- a/official/vision/beta/projects/movinet/modeling/movinet_model_test.py
+++ b/official/vision/beta/projects/movinet/modeling/movinet_model_test.py
--- a/official/vision/beta/projects/movinet/tools/convert_3d_2plus1d.py
+++ b/official/vision/beta/projects/movinet/tools/convert_3d_2plus1d.py
--- a/official/vision/beta/projects/movinet/tools/convert_3d_2plus1d_test.py
+++ b/official/vision/beta/projects/movinet/tools/convert_3d_2plus1d_test.py
--- a/official/vision/beta/projects/movinet/train.py
+++ b/official/vision/beta/projects/movinet/train.py
@@ -46,6 +46,7 @@ from official.modeling import performance
 # Import movinet libraries to register the backbone and model into tf.vision
 # model garden factory.
 # pylint: disable=unused-import
+# the followings are the necessary imports.
 from official.vision.beta.projects.movinet.modeling import movinet
 from official.vision.beta.projects.movinet.modeling import movinet_model
 # pylint: enable=unused-import

--- a/official/vision/beta/projects/panoptic_maskrcnn/README.md
+++ b/official/vision/beta/projects/panoptic_maskrcnn/README.md
+# Panoptic Segmentation
+## Description
+Panoptic Segmentation combines the two distinct vision tasks - semantic
+segmentation and instance segmentation. These tasks are unified such that, each
+pixel in the image is assigned the label of the class it belongs to, and also
+the instance identifier of the object it a part of.
+## Environment setup
+The code can be run on multiple GPUs or TPUs with different distribution
+strategies. See the TensorFlow distributed training
+[guide](https://www.tensorflow.org/guide/distributed_training) for an overview
+of `tf.distribute`.
+The code is compatible with TensorFlow 2.4+. See requirements.txt for all
+prerequisites, and you can also install them using the following command. `pip
+install -r ./official/requirements.txt`
+**DISCLAIMER**: Panoptic MaskRCNN is still under active development, stay tuned!
--- a/official/vision/beta/projects/panoptic_maskrcnn/__init__.py
+++ b/official/vision/beta/projects/panoptic_maskrcnn/__init__.py
--- a/official/vision/beta/projects/panoptic_maskrcnn/configs/panoptic_maskrcnn.py
+++ b/official/vision/beta/projects/panoptic_maskrcnn/configs/panoptic_maskrcnn.py
--- a/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory.py
+++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory.py
--- a/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory_test.py
+++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/factory_test.py
--- a/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_maskrcnn_model.py
+++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_maskrcnn_model.py
--- a/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_maskrcnn_model_test.py
+++ b/official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_maskrcnn_model_test.py
--- a/official/vision/beta/projects/simclr/heads/simclr_head.py
+++ b/official/vision/beta/projects/simclr/heads/simclr_head.py
--- a/official/vision/beta/projects/simclr/modeling/simclr_model.py
+++ b/official/vision/beta/projects/simclr/modeling/simclr_model.py
--- a/official/vision/beta/projects/simclr/tasks/simclr.py
+++ b/official/vision/beta/projects/simclr/tasks/simclr.py
--- a/official/vision/beta/projects/vit/README.md
+++ b/official/vision/beta/projects/vit/README.md
--- a/official/vision/beta/projects/vit/configs/__init__.py
+++ b/official/vision/beta/projects/vit/configs/__init__.py