Internal change to docstring.

PiperOrigin-RevId: 362111110

Internal change to docstring.
PiperOrigin-RevId: 362111110
5b952c08 · Fan Yang · A. Unique TensorFlower · 5df0cd30 · 5b952c08 · 5b952c08
Commit 5b952c08 authored Mar 10, 2021 by Fan Yang Committed by A. Unique TensorFlower Mar 10, 2021
9 changed files
--- a/official/vision/beta/modeling/layers/box_sampler.py
+++ b/official/vision/beta/modeling/layers/box_sampler.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Box sampler."""
+"""Contains definitions of box sampler."""

 # Import libraries
 import tensorflow as tf
@@ -22,19 +22,19 @@ from official.vision.beta.ops import sampling_ops

 @tf.keras.utils.register_keras_serializable(package='Vision')
 class BoxSampler(tf.keras.layers.Layer):
-  """Sample positive and negative boxes."""
+  """Creates a BoxSampler to sample positive and negative boxes."""

  def __init__(self,
               num_samples=512,
               foreground_fraction=0.25,
               **kwargs):
-    """Initializes a ROI sampler.
+    """Initializes a box sampler.

    Args:
-      num_samples: int, the number of sampled boxes per image.
-      foreground_fraction: float in [0, 1], what percentage of boxes should be
-        sampled from the positive examples.
-      **kwargs: other key word arguments passed to Layer.
+      num_samples: An `int` of the number of sampled boxes per image.
+      foreground_fraction: A `float` in [0, 1], what percentage of boxes should
+        be sampled from the positive examples.
+      **kwargs: Additional keyword arguments passed to Layer.
    """
    self._config_dict = {
        'num_samples': num_samples,
@@ -43,22 +43,22 @@ class BoxSampler(tf.keras.layers.Layer):
    super(BoxSampler, self).__init__(**kwargs)

  def call(self, positive_matches, negative_matches, ignored_matches):
-    """Sample and select positive and negative instances.
+    """Samples and selects positive and negative instances.

    Args:
-      positive_matches: a `bool` tensor of shape of [batch, N] where N is the
+      positive_matches: A `bool` tensor of shape of [batch, N] where N is the
        number of instances. For each element, `True` means the instance
        corresponds to a positive example.
-      negative_matches: a `bool` tensor of shape of [batch, N] where N is the
+      negative_matches: A `bool` tensor of shape of [batch, N] where N is the
        number of instances. For each element, `True` means the instance
        corresponds to a negative example.
-      ignored_matches: a `bool` tensor of shape of [batch, N] where N is the
-        number of instances. For each element, `True` means the instance
-        should be ignored.
+      ignored_matches: A `bool` tensor of shape of [batch, N] where N is the
+        number of instances. For each element, `True` means the instance should
+        be ignored.

    Returns:
-      selected_indices: a tensor of shape of [batch_size, K], storing the
-        indices of the sampled examples, where K is `num_samples`.
+      A `tf.tensor` of shape of [batch_size, K], storing the indices of the
+        sampled examples, where K is `num_samples`.
    """
    sample_candidates = tf.logical_and(
        tf.logical_or(positive_matches, negative_matches),

--- a/official/vision/beta/modeling/layers/detection_generator.py
+++ b/official/vision/beta/modeling/layers/detection_generator.py
--- a/official/vision/beta/modeling/layers/mask_sampler.py
+++ b/official/vision/beta/modeling/layers/mask_sampler.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Mask sampler."""
+"""Contains definitions of mask sampler."""

 # Import libraries
 import tensorflow as tf
@@ -30,34 +30,34 @@ def _sample_and_crop_foreground_masks(candidate_rois,
  """Samples and creates cropped foreground masks for training.

  Args:
-    candidate_rois: a tensor of shape of [batch_size, N, 4], where N is the
+    candidate_rois: A `tf.Tensor` of shape of [batch_size, N, 4], where N is the
      number of candidate RoIs to be considered for mask sampling. It includes
      both positive and negative RoIs. The `num_mask_samples_per_image` positive
      RoIs will be sampled to create mask training targets.
-    candidate_gt_boxes: a tensor of shape of [batch_size, N, 4], storing the
-      corresponding groundtruth boxes to the `candidate_rois`.
-    candidate_gt_classes: a tensor of shape of [batch_size, N], storing the
+    candidate_gt_boxes: A `tf.Tensor` of shape of [batch_size, N, 4], storing
+      the corresponding groundtruth boxes to the `candidate_rois`.
+    candidate_gt_classes: A `tf.Tensor` of shape of [batch_size, N], storing the
      corresponding groundtruth classes to the `candidate_rois`. 0 in the tensor
      corresponds to the background class, i.e. negative RoIs.
-    candidate_gt_indices: a tensor of shape [batch_size, N], storing the
+    candidate_gt_indices: A `tf.Tensor` of shape [batch_size, N], storing the
      corresponding groundtruth instance indices to the `candidate_gt_boxes`,
      i.e. gt_boxes[candidate_gt_indices[:, i]] = candidate_gt_boxes[:, i] and
-      gt_boxes which is of shape [batch_size, MAX_INSTANCES, 4], M >= N, is the
-      superset of candidate_gt_boxes.
-    gt_masks: a tensor of [batch_size, MAX_INSTANCES, mask_height, mask_width]
-      containing all the groundtruth masks which sample masks are drawn from.
-    num_sampled_masks: an integer which specifies the number of masks
-      to sample.
-    mask_target_size: an integer which specifies the final cropped mask size
-      after sampling. The output masks are resized w.r.t the sampled RoIs.
+      gt_boxes which is of shape [batch_size, MAX_INSTANCES, 4], M >= N, is
+      the superset of candidate_gt_boxes.
+    gt_masks: A `tf.Tensor` of [batch_size, MAX_INSTANCES, mask_height,
+      mask_width] containing all the groundtruth masks which sample masks are
+      drawn from.
+    num_sampled_masks: An `int` that specifies the number of masks to sample.
+    mask_target_size: An `int` that specifies the final cropped mask size after
+      sampling. The output masks are resized w.r.t the sampled RoIs.

  Returns:
-    foreground_rois: a tensor of shape of [batch_size, K, 4] storing the RoI
-      that corresponds to the sampled foreground masks, where
+    foreground_rois: A `tf.Tensor` of shape of [batch_size, K, 4] storing the
+      RoI that corresponds to the sampled foreground masks, where
      K = num_mask_samples_per_image.
-    foreground_classes: a tensor of shape of [batch_size, K] storing the classes
-      corresponding to the sampled foreground masks.
-    cropoped_foreground_masks: a tensor of shape of
+    foreground_classes: A `tf.Tensor` of shape of [batch_size, K] storing the
+      classes corresponding to the sampled foreground masks.
+    cropoped_foreground_masks: A `tf.Tensor` of shape of
      [batch_size, K, mask_target_size, mask_target_size] storing the cropped
      foreground masks used for training.
  """
@@ -120,34 +120,36 @@ class MaskSampler(tf.keras.layers.Layer):
           candidate_gt_classes,
           candidate_gt_indices,
           gt_masks):
-    """Sample and create mask targets for training.
+    """Samples and creates mask targets for training.

    Args:
-      candidate_rois: a tensor of shape of [batch_size, N, 4], where N is the
-        number of candidate RoIs to be considered for mask sampling. It includes
-        both positive and negative RoIs. The `num_mask_samples_per_image`
-        positive RoIs will be sampled to create mask training targets.
-      candidate_gt_boxes: a tensor of shape of [batch_size, N, 4], storing the
-        corresponding groundtruth boxes to the `candidate_rois`.
-      candidate_gt_classes: a tensor of shape of [batch_size, N], storing the
-        corresponding groundtruth classes to the `candidate_rois`. 0 in the
+      candidate_rois: A `tf.Tensor` of shape of [batch_size, N, 4], where N is
+        the number of candidate RoIs to be considered for mask sampling. It
+        includes both positive and negative RoIs. The
+        `num_mask_samples_per_image` positive RoIs will be sampled to create
+        mask training targets.
+      candidate_gt_boxes: A `tf.Tensor` of shape of [batch_size, N, 4], storing
+        the corresponding groundtruth boxes to the `candidate_rois`.
+      candidate_gt_classes: A `tf.Tensor` of shape of [batch_size, N], storing
+        the corresponding groundtruth classes to the `candidate_rois`. 0 in the
        tensor corresponds to the background class, i.e. negative RoIs.
-      candidate_gt_indices: a tensor of shape [batch_size, N], storing the
+      candidate_gt_indices: A `tf.Tensor` of shape [batch_size, N], storing the
        corresponding groundtruth instance indices to the `candidate_gt_boxes`,
        i.e. gt_boxes[candidate_gt_indices[:, i]] = candidate_gt_boxes[:, i],
-        where gt_boxes which is of shape [batch_size, MAX_INSTANCES, 4], M >= N,
-        is the superset of candidate_gt_boxes.
-      gt_masks: a tensor of [batch_size, MAX_INSTANCES, mask_height, mask_width]
-        containing all the groundtruth masks which sample masks are drawn from.
-        after sampling. The output masks are resized w.r.t the sampled RoIs.
+          where gt_boxes which is of shape [batch_size, MAX_INSTANCES, 4], M >=
+          N, is the superset of candidate_gt_boxes.
+      gt_masks: A `tf.Tensor` of [batch_size, MAX_INSTANCES, mask_height,
+        mask_width] containing all the groundtruth masks which sample masks are
+        drawn from. after sampling. The output masks are resized w.r.t the
+        sampled RoIs.

    Returns:
-      foreground_rois: a tensor of shape of [batch_size, K, 4] storing the RoI
-        that corresponds to the sampled foreground masks, where
+      foreground_rois: A `tf.Tensor` of shape of [batch_size, K, 4] storing the
+        RoI that corresponds to the sampled foreground masks, where
        K = num_mask_samples_per_image.
-      foreground_classes: a tensor of shape of [batch_size, K] storing the
+      foreground_classes: A `tf.Tensor` of shape of [batch_size, K] storing the
        classes corresponding to the sampled foreground masks.
-      cropoped_foreground_masks: a tensor of shape of
+      cropoped_foreground_masks: A `tf.Tensor` of shape of
        [batch_size, K, mask_target_size, mask_target_size] storing the
        cropped foreground masks used for training.
    """

--- a/official/vision/beta/modeling/layers/nn_blocks.py
+++ b/official/vision/beta/modeling/layers/nn_blocks.py
--- a/official/vision/beta/modeling/layers/nn_blocks_3d.py
+++ b/official/vision/beta/modeling/layers/nn_blocks_3d.py
@@ -21,14 +21,21 @@ from official.modeling import tf_utils

 @tf.keras.utils.register_keras_serializable(package='Vision')
 class SelfGating(tf.keras.layers.Layer):
-  """Feature gating as used in S3D-G (https://arxiv.org/pdf/1712.04851.pdf)."""
+  """Feature gating as used in S3D-G.
+
+  This implements the S3D-G network from:
+  Saining Xie, Chen Sun, Jonathan Huang, Zhuowen Tu, Kevin Murphy.
+  Rethinking Spatiotemporal Feature Learning: Speed-Accuracy Trade-offs in Video
+  Classification.
+  (https://arxiv.org/pdf/1712.04851.pdf)
+  """

  def __init__(self, filters, **kwargs):
-    """Constructor.
+    """Initializes a self-gating layer.

    Args:
-      filters: `int` number of filters for the convolutional layer.
-      **kwargs: keyword arguments to be passed.
+      filters: An `int` number of filters for the convolutional layer.
+      **kwargs: Additional keyword arguments to be passed.
    """
    super(SelfGating, self).__init__(**kwargs)
    self._filters = filters
@@ -61,7 +68,7 @@ class SelfGating(tf.keras.layers.Layer):

 @tf.keras.utils.register_keras_serializable(package='Vision')
 class BottleneckBlock3D(tf.keras.layers.Layer):
-  """A 3D bottleneck block."""
+  """Creates a 3D bottleneck block."""

  def __init__(self,
               filters,
@@ -77,28 +84,29 @@ class BottleneckBlock3D(tf.keras.layers.Layer):
               norm_momentum=0.99,
               norm_epsilon=0.001,
               **kwargs):
-    """A 3D bottleneck block with BN after convolutions.
+    """Initializes a 3D bottleneck block with BN after convolutions.

    Args:
-      filters: `int` number of filters for the first two convolutions. Note that
-        the third and final convolution will use 4 times as many filters.
-      temporal_kernel_size: `int` kernel size for the temporal convolutional
-        layer.
-      temporal_strides: `int` temporal stride for the temporal convolutional
+      filters: An `int` number of filters for the first two convolutions. Note
+        that the third and final convolution will use 4 times as many filters.
+      temporal_kernel_size: An `int` of kernel size for the temporal
+        convolutional layer.
+      temporal_strides: An `int` of ftemporal stride for the temporal
+        convolutional layer.
+      spatial_strides: An `int` of spatial stride for the spatial convolutional
        layer.
-      spatial_strides: `int` spatial stride for the spatial convolutional layer.
-      use_self_gating: `bool` apply self-gating module or not.
-      kernel_initializer: kernel_initializer for convolutional layers.
-      kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
-        Default to None.
-      bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
+      use_self_gating: A `bool` of whether to apply self-gating module or not.
+      kernel_initializer: A `str` of kernel_initializer for convolutional
+        layers.
+      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
+        Conv2D. Default to None.
+      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2d.
        Default to None.
-      activation: `str` name of the activation function.
-      use_sync_bn: if True, use synchronized batch normalization.
-      norm_momentum: `float` normalization omentum for the moving average.
-      norm_epsilon: `float` small float added to variance to avoid dividing by
-        zero.
-      **kwargs: keyword arguments to be passed.
+      activation: A `str` name of the activation function.
+      use_sync_bn: A `bool`. If True, use synchronized batch normalization.
+      norm_momentum: A `float` of normalization momentum for the moving average.
+      norm_epsilon: A `float` added to variance to avoid dividing by zero.
+      **kwargs: Additional keyword arguments to be passed.
    """
    super(BottleneckBlock3D, self).__init__(**kwargs)


--- a/official/vision/beta/modeling/layers/nn_layers.py
+++ b/official/vision/beta/modeling/layers/nn_layers.py
@@ -22,7 +22,7 @@ import tensorflow as tf
 from official.modeling import tf_utils


-# Type annotations
+# Type annotations.
 States = Dict[str, tf.Tensor]
 Activation = Union[str, Callable]

@@ -34,12 +34,12 @@ def make_divisible(value: float,
  """This is to ensure that all layers have channels that are divisible by 8.

  Args:
-    value: `float` original value.
-    divisor: `int` the divisor that need to be checked upon.
-    min_value: `float` minimum value threshold.
+    value: A `float` of original value.
+    divisor: An `int` off the divisor that need to be checked upon.
+    min_value: A `float` of  minimum value threshold.

  Returns:
-    The adjusted value in `int` that divisible against divisor.
+    The adjusted value in `int` that is divisible against divisor.
  """
  if min_value is None:
    min_value = divisor
@@ -55,7 +55,7 @@ def round_filters(filters: int,
                  divisor: int = 8,
                  min_depth: Optional[int] = None,
                  skip: bool = False):
-  """Round number of filters based on width multiplier."""
+  """Rounds number of filters based on width multiplier."""
  orig_f = filters
  if skip or not multiplier:
    return filters
@@ -70,7 +70,7 @@ def round_filters(filters: int,

 @tf.keras.utils.register_keras_serializable(package='Vision')
 class SqueezeExcitation(tf.keras.layers.Layer):
-  """Squeeze and excitation layer."""
+  """Creates a squeeze and excitation layer."""

  def __init__(self,
               in_filters,
@@ -84,25 +84,26 @@ class SqueezeExcitation(tf.keras.layers.Layer):
               activation='relu',
               gating_activation='sigmoid',
               **kwargs):
-    """Implementation for squeeze and excitation.
+    """Initializes a squeeze and excitation layer.

    Args:
-      in_filters: `int` number of filters of the input tensor.
-      out_filters: `int` number of filters of the output tensor.
-      se_ratio: `float` or None. If not None, se ratio for the squeeze and
+      in_filters: An `int` number of filters of the input tensor.
+      out_filters: An `int` number of filters of the output tensor.
+      se_ratio: A `float` or None. If not None, se ratio for the squeeze and
        excitation layer.
-      divisible_by: `int` ensures all inner dimensions are divisible by this
-        number.
-      use_3d_input: `bool` 2D image or 3D input type.
-      kernel_initializer: kernel_initializer for convolutional layers.
-      kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
+      divisible_by: An `int` that ensures all inner dimensions are divisible by
+        this number.
+      use_3d_input: A `bool` of whether input is 2D or 3D image.
+      kernel_initializer: A `str` of kernel_initializer for convolutional
+        layers.
+      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
+        Conv2D. Default to None.
+      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2d.
        Default to None.
-      bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
-        Default to None.
-      activation: `str` name of the activation function.
-      gating_activation: `str` name of the activation function for final gating
-        function.
-      **kwargs: keyword arguments to be passed.
+      activation: A `str` name of the activation function.
+      gating_activation: A `str` name of the activation function for final
+        gating function.
+      **kwargs: Additional keyword arguments to be passed.
    """
    super(SqueezeExcitation, self).__init__(**kwargs)

@@ -183,9 +184,9 @@ def get_stochastic_depth_rate(init_rate, i, n):
  """Get drop connect rate for the ith block.

  Args:
-    init_rate: `float` initial drop rate.
-    i: `int` order of the current block.
-    n: `int` total number of blocks.
+    init_rate: A `float` of initial drop rate.
+    i: An `int` of order of the current block.
+    n: An `int` total number of blocks.

  Returns:
    Drop rate of the ith block.
@@ -201,17 +202,17 @@ def get_stochastic_depth_rate(init_rate, i, n):

 @tf.keras.utils.register_keras_serializable(package='Vision')
 class StochasticDepth(tf.keras.layers.Layer):
-  """Stochastic depth layer."""
+  """Creates a stochastic depth layer."""

  def __init__(self, stochastic_depth_drop_rate, **kwargs):
-    """Initialize stochastic depth.
+    """Initializes a stochastic depth layer.

    Args:
-      stochastic_depth_drop_rate: `float` drop rate.
-      **kwargs: keyword arguments to be passed.
+      stochastic_depth_drop_rate: A `float` of drop rate.
+      **kwargs: Additional keyword arguments to be passed.

    Returns:
-      A output tensor, which should have the same shape as input.
+      A output `tf.Tensor` of which should have the same shape as input.
    """
    super(StochasticDepth, self).__init__(**kwargs)
    self._drop_rate = stochastic_depth_drop_rate
@@ -239,15 +240,15 @@ class StochasticDepth(tf.keras.layers.Layer):

 @tf.keras.utils.register_keras_serializable(package='Vision')
 def pyramid_feature_fusion(inputs, target_level):
-  """Fuse all feature maps in the feature pyramid at the target level.
+  """Fuses all feature maps in the feature pyramid at the target level.

  Args:
-    inputs: a dictionary containing the feature pyramid. The size of the input
+    inputs: A dictionary containing the feature pyramid. The size of the input
      tensor needs to be fixed.
-    target_level: `int` the target feature level for feature fusion.
+    target_level: An `int` of the target feature level for feature fusion.

  Returns:
-    A float Tensor of shape [batch_size, feature_height, feature_width,
+    A `float` `tf.Tensor` of shape [batch_size, feature_height, feature_width,
      feature_channel].
  """
  # Convert keys to int.
@@ -279,8 +280,13 @@ def pyramid_feature_fusion(inputs, target_level):
 class Scale(tf.keras.layers.Layer):
  """Scales the input by a trainable scalar weight.

-  Useful for applying ReZero to layers, which improves convergence speed.
-  Reference: https://arxiv.org/pdf/2003.04887.pdf
+  This is useful for applying ReZero to layers, which improves convergence
+  speed. This implements the paper:
+
+  Thomas Bachlechner, Bodhisattwa Prasad Majumder, Huanru Henry Mao,
+  Garrison W. Cottrell, Julian McAuley.
+  ReZero is All You Need: Fast Convergence at Large Depth.
+  (https://arxiv.org/pdf/2003.04887.pdf).
  """

  def __init__(
@@ -288,15 +294,15 @@ class Scale(tf.keras.layers.Layer):
      initializer: tf.keras.initializers.Initializer = 'ones',
      regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
      **kwargs):
-    """Initializes scale layer.
+    """Initializes a scale layer.

    Args:
-      initializer: initializer for the scalar weight.
-      regularizer: regularizer for the scalar weight.
-      **kwargs: keyword arguments to be passed to this layer.
+      initializer: A `str` of initializer for the scalar weight.
+      regularizer: A `tf.keras.regularizers.Regularizer` for the scalar weight.
+      **kwargs: Additional keyword arguments to be passed to this layer.

    Returns:
-      A output tensor, which should have the same shape as input.
+      An `tf.Tensor` of which should have the same shape as input.
    """
    super(Scale, self).__init__(**kwargs)

@@ -328,11 +334,15 @@ class Scale(tf.keras.layers.Layer):

 @tf.keras.utils.register_keras_serializable(package='Vision')
 class TemporalSoftmaxPool(tf.keras.layers.Layer):
-  """Network layer corresponding to temporal softmax pooling.
+  """Creates a network layer corresponding to temporal softmax pooling.

-  This is useful for multi-class logits (used in e.g., Charades).
-  Modified from AssembleNet Charades evaluation.
-  Reference: https://arxiv.org/pdf/1905.13209.pdf.
+  This is useful for multi-class logits (used in e.g., Charades). Modified from
+  AssembleNet Charades evaluation from:
+
+  Michael S. Ryoo, AJ Piergiovanni, Mingxing Tan, Anelia Angelova.
+  AssembleNet: Searching for Multi-Stream Neural Connectivity in Video
+  Architectures.
+  (https://arxiv.org/pdf/1905.13209.pdf).
  """

  def call(self, inputs):
@@ -347,13 +357,16 @@ class TemporalSoftmaxPool(tf.keras.layers.Layer):

 @tf.keras.utils.register_keras_serializable(package='Vision')
 class PositionalEncoding(tf.keras.layers.Layer):
-  """Network layer that adds a sinusoidal positional encoding.
+  """Creates a network layer that adds a sinusoidal positional encoding.

  Positional encoding is incremented across frames, and is added to the input.
  The positional encoding is first weighted at 0 so that the network can choose
-  to ignore it.
+  to ignore it. This implements:

-  Reference: https://arxiv.org/pdf/1706.03762.pdf
+  Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones,
+  Aidan N. Gomez, Lukasz Kaiser, Illia Polosukhin.
+  Attention Is All You Need.
+  (https://arxiv.org/pdf/1706.03762.pdf).
  """

  def __init__(self,
@@ -363,15 +376,15 @@ class PositionalEncoding(tf.keras.layers.Layer):
    """Initializes positional encoding.

    Args:
-      initializer: initializer for weighting the positional encoding.
-      cache_encoding: if True, cache the positional encoding tensor after
-          calling build. Otherwise, rebuild the tensor for every call. Setting
-          this to False can be useful when we want to input a variable number of
-          frames, so the positional encoding tensor can change shape.
-      **kwargs: keyword arguments to be passed to this layer.
+      initializer: A `str` of initializer for weighting the positional encoding.
+      cache_encoding: A `bool`. If True, cache the positional encoding tensor
+        after calling build. Otherwise, rebuild the tensor for every call.
+        Setting this to False can be useful when we want to input a variable
+        number of frames, so the positional encoding tensor can change shape.
+      **kwargs: Additional keyword arguments to be passed to this layer.

    Returns:
-      An output tensor, which should have the same shape as input.
+      A `tf.Tensor` of which should have the same shape as input.
    """
    super(PositionalEncoding, self).__init__(**kwargs)
    self._initializer = initializer
@@ -395,9 +408,9 @@ class PositionalEncoding(tf.keras.layers.Layer):
    """Creates a sequence of sinusoidal positional encoding vectors.

    Args:
-      num_positions: the number of positions (frames).
-      hidden_size: the number of channels used for the hidden vectors.
-      dtype: the dtype of the output tensor.
+      num_positions: An `int` of number of positions (frames).
+      hidden_size: An `int` of number of channels used for the hidden vectors.
+      dtype: The dtype of the output tensor.

    Returns:
      The positional encoding tensor with shape [num_positions, hidden_size].
@@ -430,10 +443,10 @@ class PositionalEncoding(tf.keras.layers.Layer):
    """Builds the layer with the given input shape.

    Args:
-      input_shape: the input shape.
+      input_shape: The input shape.

    Raises:
-      ValueError: if using 'channels_first' data format.
+      ValueError: If using 'channels_first' data format.
    """
    if tf.keras.backend.image_data_format() == 'channels_first':
      raise ValueError('"channels_first" mode is unsupported.')
@@ -457,7 +470,7 @@ class PositionalEncoding(tf.keras.layers.Layer):

 @tf.keras.utils.register_keras_serializable(package='Vision')
 class GlobalAveragePool3D(tf.keras.layers.Layer):
-  """Global average pooling layer with causal mode.
+  """Creates a global average pooling layer with causal mode.

  Implements causal mode, which runs a cumulative sum (with `tf.cumsum`) across
  frames in the time dimension, allowing the use of a stream buffer. Sums any
@@ -469,15 +482,16 @@ class GlobalAveragePool3D(tf.keras.layers.Layer):
               keepdims: bool = False,
               causal: bool = False,
               **kwargs):
-    """Initializes global average pool.
+    """Initializes a global average pool layer.

    Args:
-      keepdims: if True, keep the averaged dimensions.
-      causal: run in causal mode with a cumulative sum across frames.
-      **kwargs: keyword arguments to be passed to this layer.
+      keepdims: A `bool`. If True, keep the averaged dimensions.
+      causal: A `bool` of whether to run in causal mode with a cumulative sum
+        across frames.
+      **kwargs: Additional keyword arguments to be passed to this layer.

    Returns:
-      An output tensor.
+      An output `tf.Tensor`.
    """
    super(GlobalAveragePool3D, self).__init__(**kwargs)

@@ -514,14 +528,14 @@ class GlobalAveragePool3D(tf.keras.layers.Layer):
    """Calls the layer with the given inputs.

    Args:
-      inputs: the input tensor.
-      states: a dict of states such that, if any of the keys match for this
-          layer, will overwrite the contents of the buffer(s).
-      output_states: if True, returns the output tensor and output states.
-          Returns just the output tensor otherwise.
+      inputs: An input `tf.Tensor`.
+      states: A `dict` of states such that, if any of the keys match for this
+        layer, will overwrite the contents of the buffer(s).
+      output_states: A `bool`. If True, returns the output tensor and output
+        states. Returns just the output tensor otherwise.

    Returns:
-      the output tensor (and optionally the states if `output_states=True`).
+      An output `tf.Tensor` (and optionally the states if `output_states=True`).
      If `causal=True`, the output tensor will have shape
      `[batch_size, num_frames, 1, 1, channels]` if `keepdims=True`. We keep
      the frame dimension in this case to simulate a cumulative global average
@@ -531,7 +545,7 @@ class GlobalAveragePool3D(tf.keras.layers.Layer):
      buffer stored in `states`).

    Raises:
-      ValueError: if using 'channels_first' data format.
+      ValueError: If using 'channels_first' data format.
    """
    states = dict(states) if states is not None else {}

@@ -592,18 +606,17 @@ class GlobalAveragePool3D(tf.keras.layers.Layer):

 @tf.keras.utils.register_keras_serializable(package='Vision')
 class SpatialAveragePool3D(tf.keras.layers.Layer):
-  """Global average pooling layer pooling across spatial dimentions.
-  """
+  """Creates a global average pooling layer pooling across spatial dimentions."""

  def __init__(self, keepdims: bool = False, **kwargs):
-    """Initializes global average pool.
+    """Initializes a global average pool layer.

    Args:
-      keepdims: if True, keep the averaged dimensions.
-      **kwargs: keyword arguments to be passed to this layer.
+      keepdims: A `bool`. If True, keep the averaged dimensions.
+      **kwargs: Additional keyword arguments to be passed to this layer.

    Returns:
-      An output tensor.
+      An output `tf.Tensor`.
    """
    super(SpatialAveragePool3D, self).__init__(**kwargs)
    self._keepdims = keepdims
@@ -650,10 +663,10 @@ class CausalConvMixin:
    """Calculates padding for 'causal' option for conv layers.

    Args:
-      inputs: optional input tensor to be padded.
-      use_buffered_input: if True, use 'valid' padding along the time dimension.
-          This should be set when applying the stream buffer.
-      time_axis: the axis of the time dimension
+      inputs: An optional input `tf.Tensor` to be padded.
+      use_buffered_input: A `bool`. If True, use 'valid' padding along the time
+        dimension. This should be set when applying the stream buffer.
+      time_axis: An `int` of the axis of the time dimension.

    Returns:
      A list of paddings for `tf.pad`.
@@ -719,14 +732,14 @@ class Conv2D(tf.keras.layers.Conv2D, CausalConvMixin):
    """Initializes conv2d.

    Args:
-      *args: arguments to be passed.
-      use_buffered_input: if True, the input is expected to be padded
-          beforehand. In effect, calling this layer will use 'valid' padding on
-          the temporal dimension to simulate 'causal' padding.
-      **kwargs: keyword arguments to be passed.
+      *args: Arguments to be passed.
+      use_buffered_input: A `bool`. If True, the input is expected to be padded
+        beforehand. In effect, calling this layer will use 'valid' padding on
+        the temporal dimension to simulate 'causal' padding.
+      **kwargs: Additional keyword arguments to be passed.

    Returns:
-      A output tensor of the Conv2D operation.
+      An output `tf.Tensor` of the Conv2D operation.
    """
    super(Conv2D, self).__init__(*args, **kwargs)
    self._use_buffered_input = use_buffered_input
@@ -767,14 +780,14 @@ class DepthwiseConv2D(tf.keras.layers.DepthwiseConv2D, CausalConvMixin):
    """Initializes depthwise conv2d.

    Args:
-      *args: arguments to be passed.
-      use_buffered_input: if True, the input is expected to be padded
-          beforehand. In effect, calling this layer will use 'valid' padding on
-          the temporal dimension to simulate 'causal' padding.
-      **kwargs: keyword arguments to be passed.
+      *args: Arguments to be passed.
+      use_buffered_input: A `bool`. If True, the input is expected to be padded
+        beforehand. In effect, calling this layer will use 'valid' padding on
+        the temporal dimension to simulate 'causal' padding.
+      **kwargs: Additional keyword arguments to be passed.

    Returns:
-      A output tensor of the DepthwiseConv2D operation.
+      An output `tf.Tensor` of the DepthwiseConv2D operation.
    """
    super(DepthwiseConv2D, self).__init__(*args, **kwargs)
    self._use_buffered_input = use_buffered_input
@@ -829,14 +842,14 @@ class Conv3D(tf.keras.layers.Conv3D, CausalConvMixin):
    """Initializes conv3d.

    Args:
-      *args: arguments to be passed.
-      use_buffered_input: if True, the input is expected to be padded
-          beforehand. In effect, calling this layer will use 'valid' padding on
-          the temporal dimension to simulate 'causal' padding.
-      **kwargs: keyword arguments to be passed.
+      *args: Arguments to be passed.
+      use_buffered_input: A `bool`. If True, the input is expected to be padded
+        beforehand. In effect, calling this layer will use 'valid' padding on
+        the temporal dimension to simulate 'causal' padding.
+      **kwargs: Additional keyword arguments to be passed.

    Returns:
-      A output tensor of the Conv3D operation.
+      An output `tf.Tensor` of the Conv3D operation.
    """
    super(Conv3D, self).__init__(*args, **kwargs)
    self._use_buffered_input = use_buffered_input

--- a/official/vision/beta/modeling/layers/roi_aligner.py
+++ b/official/vision/beta/modeling/layers/roi_aligner.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""ROI align."""
+"""Contains definitions of ROI aligner."""

 import tensorflow as tf

@@ -30,9 +30,9 @@ class MultilevelROIAligner(tf.keras.layers.Layer):
    """Initializes a ROI aligner.

    Args:
-      crop_size: int, the output size of the cropped features.
-      sample_offset: float in [0, 1], the subpixel sample offset.
-      **kwargs: other key word arguments passed to Layer.
+      crop_size: An `int` of the output size of the cropped features.
+      sample_offset: A `float` in [0, 1] of the subpixel sample offset.
+      **kwargs: Additional keyword arguments passed to Layer.
    """
    self._config_dict = {
        'crop_size': crop_size,
@@ -47,13 +47,13 @@ class MultilevelROIAligner(tf.keras.layers.Layer):
      features: A dictionary with key as pyramid level and value as features.
        The features are in shape of
        [batch_size, height_l, width_l, num_filters].
-      boxes: A 3-D Tensor of shape [batch_size, num_boxes, 4]. Each row
+      boxes: A 3-D `tf.Tensor` of shape [batch_size, num_boxes, 4]. Each row
        represents a box with [y1, x1, y2, x2] in un-normalized coordinates.
        from grid point.
-      training: bool, whether it is in training mode.
+      training: A `bool` of whether it is in training mode.

    Returns:
-      roi_features: A 5-D tensor representing feature crop of shape
+      A 5-D `tf.Tensor` representing feature crop of shape
      [batch_size, num_boxes, crop_size, crop_size, num_filters].
    """
    roi_features = spatial_transform_ops.multilevel_crop_and_resize(

--- a/official/vision/beta/modeling/layers/roi_generator.py
+++ b/official/vision/beta/modeling/layers/roi_generator.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""ROI generator."""
+"""Contains definitions of ROI generator."""

 # Import libraries
 import tensorflow as tf
@@ -48,46 +48,48 @@ def _multilevel_propose_rois(raw_boxes,
    3. Apply an overall top k to generate the final selected RoIs.

  Args:
-    raw_boxes: a dict with keys representing FPN levels and values representing
-      box tenors of shape [batch_size, feature_h, feature_w, num_anchors * 4].
-    raw_scores: a dict with keys representing FPN levels and values representing
-      logit tensors of shape [batch_size, feature_h, feature_w, num_anchors].
-    anchor_boxes: a dict with keys representing FPN levels and values
+    raw_boxes: A `dict` with keys representing FPN levels and values
+      representing box tenors of shape
+      [batch_size, feature_h, feature_w, num_anchors * 4].
+    raw_scores: A `dict` with keys representing FPN levels and values
+      representing logit tensors of shape
+      [batch_size, feature_h, feature_w, num_anchors].
+    anchor_boxes: A `dict` with keys representing FPN levels and values
      representing anchor box tensors of shape
      [batch_size, feature_h * feature_w * num_anchors, 4].
-    image_shape: a tensor of shape [batch_size, 2] where the last dimension are
-      [height, width] of the scaled image.
-    pre_nms_top_k: an integer of top scoring RPN proposals *per level* to
-      keep before applying NMS. Default: 2000.
-    pre_nms_score_threshold: a float between 0 and 1 representing the minimal
-      box  score to keep before applying NMS. This is often used as a
+    image_shape: A `tf.Tensor` of shape [batch_size, 2] where the last dimension
+      are [height, width] of the scaled image.
+    pre_nms_top_k: An `int` of top scoring RPN proposals *per level* to keep
+      before applying NMS. Default: 2000.
+    pre_nms_score_threshold: A `float` between 0 and 1 representing the minimal
+      box score to keep before applying NMS. This is often used as a
      pre-filtering step for better performance. Default: 0, no filtering is
      applied.
-    pre_nms_min_size_threshold: a float representing the minimal box size in
+    pre_nms_min_size_threshold: A `float` representing the minimal box size in
      each side (w.r.t. the scaled image) to keep before applying NMS. This is
      often used as a pre-filtering step for better performance. Default: 0, no
      filtering is applied.
-    nms_iou_threshold: a float between 0 and 1 representing the IoU threshold
+    nms_iou_threshold: A `float` between 0 and 1 representing the IoU threshold
      used for NMS. If 0.0, no NMS is applied. Default: 0.7.
-    num_proposals: an integer of top scoring RPN proposals *in total* to
-      keep after applying NMS. Default: 1000.
-    use_batched_nms: a boolean indicating whether NMS is applied in batch using
+    num_proposals: An `int` of top scoring RPN proposals *in total* to keep
+      after applying NMS. Default: 1000.
+    use_batched_nms: A `bool` indicating whether NMS is applied in batch using
      `tf.image.combined_non_max_suppression`. Currently only available in
-      CPU/GPU. Default: False.
-    decode_boxes: a boolean indicating whether `raw_boxes` needs to be decoded
+      CPU/GPU. Default is False.
+    decode_boxes: A `bool` indicating whether `raw_boxes` needs to be decoded
      using `anchor_boxes`. If False, use `raw_boxes` directly and ignore
-      `anchor_boxes`. Default: True.
-    clip_boxes: a boolean indicating whether boxes are first clipped to the
+      `anchor_boxes`. Default is True.
+    clip_boxes: A `bool` indicating whether boxes are first clipped to the
      scaled image size before appliying NMS. If False, no clipping is applied
-      and `image_shape` is ignored. Default: True.
-    apply_sigmoid_to_score: a boolean indicating whether apply sigmoid to
-      `raw_scores` before applying NMS. Default: True.
+      and `image_shape` is ignored. Default is True.
+    apply_sigmoid_to_score: A `bool` indicating whether apply sigmoid to
+      `raw_scores` before applying NMS. Default is True.

  Returns:
-    selected_rois: a tensor of shape [batch_size, num_proposals, 4],
+    selected_rois: A `tf.Tensor` of shape [batch_size, num_proposals, 4],
      representing the box coordinates of the selected proposals w.r.t. the
      scaled image.
-    selected_roi_scores: a tensor of shape [batch_size, num_proposals, 1],
+    selected_roi_scores: A `tf.Tensor` of shape [batch_size, num_proposals, 1],
      representing the scores of the selected proposals.
  """
  with tf.name_scope('multilevel_propose_rois'):
@@ -196,30 +198,31 @@ class MultilevelROIGenerator(tf.keras.layers.Layer):
    The ROI generator transforms the raw predictions from RPN to ROIs.

    Args:
-      pre_nms_top_k: int, the number of top scores proposals to be kept before
-        applying NMS.
-      pre_nms_score_threshold: float, the score threshold to apply before
+      pre_nms_top_k: An `int` of the number of top scores proposals to be kept
+        before applying NMS.
+      pre_nms_score_threshold: A `float` of the score threshold to apply before
        applying NMS. Proposals whose scores are below this threshold are
        thrown away.
-      pre_nms_min_size_threshold: float, the threshold of each side of the box
-        (w.r.t. the scaled image). Proposals whose sides are below this
+      pre_nms_min_size_threshold: A `float` of the threshold of each side of the
+        box (w.r.t. the scaled image). Proposals whose sides are below this
        threshold are thrown away.
-      nms_iou_threshold: float in [0, 1], the NMS IoU threshold.
-      num_proposals: int, the final number of proposals to generate.
-      test_pre_nms_top_k: int, the number of top scores proposals to be kept
-        before applying NMS in testing.
-      test_pre_nms_score_threshold: float, the score threshold to apply before
-        applying NMS in testing. Proposals whose scores are below this threshold
-        are thrown away.
-      test_pre_nms_min_size_threshold: float, the threshold of each side of the
-        box (w.r.t. the scaled image) in testing. Proposals whose sides are
-        below this threshold are thrown away.
-      test_nms_iou_threshold: float in [0, 1], the NMS IoU threshold in testing.
-      test_num_proposals: int, the final number of proposals to generate in
+      nms_iou_threshold: A `float` in [0, 1], the NMS IoU threshold.
+      num_proposals: An `int` of the final number of proposals to generate.
+      test_pre_nms_top_k: An `int` of the number of top scores proposals to be
+        kept before applying NMS in testing.
+      test_pre_nms_score_threshold: A `float` of the score threshold to apply
+        before applying NMS in testing. Proposals whose scores are below this
+        threshold are thrown away.
+      test_pre_nms_min_size_threshold: A `float` of the threshold of each side
+        of the box (w.r.t. the scaled image) in testing. Proposals whose sides
+        are below this threshold are thrown away.
+      test_nms_iou_threshold: A `float` in [0, 1] of the NMS IoU threshold in
        testing.
-      use_batched_nms: bool, whether or not use
+      test_num_proposals: An `int` of the final number of proposals to generate
+        in testing.
+      use_batched_nms: A `bool` of whether or not use
        `tf.image.combined_non_max_suppression`.
-      **kwargs: other key word arguments passed to Layer.
+      **kwargs: Additional keyword arguments passed to Layer.
    """
    self._config_dict = {
        'pre_nms_top_k': pre_nms_top_k,
@@ -257,23 +260,24 @@ class MultilevelROIGenerator(tf.keras.layers.Layer):
      3. Apply an overall top k to generate the final selected RoIs.

    Args:
-      raw_boxes: a dict with keys representing FPN levels and values
+      raw_boxes: A `dict` with keys representing FPN levels and values
        representing box tenors of shape
        [batch, feature_h, feature_w, num_anchors * 4].
-      raw_scores: a dict with keys representing FPN levels and values
+      raw_scores: A `dict` with keys representing FPN levels and values
        representing logit tensors of shape
        [batch, feature_h, feature_w, num_anchors].
-      anchor_boxes: a dict with keys representing FPN levels and values
+      anchor_boxes: A `dict` with keys representing FPN levels and values
        representing anchor box tensors of shape
        [batch, feature_h * feature_w * num_anchors, 4].
-      image_shape: a tensor of shape [batch, 2] where the last dimension are
-        [height, width] of the scaled image.
-      training: a bool indicat whether it is in training mode.
+      image_shape: A `tf.Tensor` of shape [batch, 2] where the last dimension
+        are [height, width] of the scaled image.
+      training: A `bool` that indicates whether it is in training mode.

    Returns:
-     roi_boxes: [batch, num_proposals, 4], the proposed ROIs in the scaled
-        image coordinate.
-      roi_scores: [batch, num_proposals], scores of the proposed ROIs.
+      roi_boxes: A `tf.Tensor` of shape [batch, num_proposals, 4], the proposed
+        ROIs in the scaled image coordinate.
+      roi_scores: A `tf.Tensor` of shape [batch, num_proposals], scores of the
+        proposed ROIs.
    """
    roi_boxes, roi_scores = _multilevel_propose_rois(
        raw_boxes,

--- a/official/vision/beta/modeling/layers/roi_sampler.py
+++ b/official/vision/beta/modeling/layers/roi_sampler.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""ROI sampler."""
+"""Contains definitions of ROI sampler."""

 # Import libraries
 import tensorflow as tf
@@ -23,7 +23,7 @@ from official.vision.beta.modeling.layers import box_sampler

 @tf.keras.utils.register_keras_serializable(package='Vision')
 class ROISampler(tf.keras.layers.Layer):
-  """Sample ROIs and assign targets to the sampled ROIs."""
+  """Samples ROIs and assigns targets to the sampled ROIs."""

  def __init__(self,
               mix_gt_boxes=True,
@@ -36,20 +36,20 @@ class ROISampler(tf.keras.layers.Layer):
    """Initializes a ROI sampler.

    Args:
-      mix_gt_boxes: bool, whether to mix the groundtruth boxes with proposed
-        ROIs.
-      num_sampled_rois: int, the number of sampled ROIs per image.
-      foreground_fraction: float in [0, 1], what percentage of proposed ROIs
+      mix_gt_boxes: A `bool` of whether to mix the groundtruth boxes with
+        proposed ROIs.
+      num_sampled_rois: An `int` of the number of sampled ROIs per image.
+      foreground_fraction: A `float` in [0, 1], what percentage of proposed ROIs
        should be sampled from the foreground boxes.
-      foreground_iou_threshold: float, represent the IoU threshold for a box to
-        be considered as positive (if >= `foreground_iou_threshold`).
-      background_iou_high_threshold: float, represent the IoU threshold for a
-        box to be considered as negative (if overlap in
+      foreground_iou_threshold: A `float` that represents the IoU threshold for
+        a box to be considered as positive (if >= `foreground_iou_threshold`).
+      background_iou_high_threshold: A `float` that represents the IoU threshold
+        for a box to be considered as negative (if overlap in
        [`background_iou_low_threshold`, `background_iou_high_threshold`]).
-      background_iou_low_threshold: float, represent the IoU threshold for a box
-        to be considered as negative (if overlap in
+      background_iou_low_threshold: A `float` that represents the IoU threshold
+        for a box to be considered as negative (if overlap in
        [`background_iou_low_threshold`, `background_iou_high_threshold`])
-      **kwargs: other key word arguments passed to Layer.
+      **kwargs: Additional keyword arguments passed to Layer.
    """
    self._config_dict = {
        'mix_gt_boxes': mix_gt_boxes,
@@ -85,29 +85,30 @@ class ROISampler(tf.keras.layers.Layer):
         returns box_targets, class_targets, and RoIs.

    Args:
-      boxes: a tensor of shape of [batch_size, N, 4]. N is the number of
+      boxes: A `tf.Tensor` of shape of [batch_size, N, 4]. N is the number of
        proposals before groundtruth assignment. The last dimension is the
        box coordinates w.r.t. the scaled images in [ymin, xmin, ymax, xmax]
        format.
-      gt_boxes: a tensor of shape of [batch_size, MAX_NUM_INSTANCES, 4].
+      gt_boxes: A `tf.Tensor` of shape of [batch_size, MAX_NUM_INSTANCES, 4].
        The coordinates of gt_boxes are in the pixel coordinates of the scaled
        image. This tensor might have padding of values -1 indicating the
        invalid box coordinates.
-      gt_classes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This
-        tensor might have paddings with values of -1 indicating the invalid
+      gt_classes: A `tf.Tensor` with a shape of [batch_size, MAX_NUM_INSTANCES].
+        This tensor might have paddings with values of -1 indicating the invalid
        classes.

    Returns:
-      sampled_rois: a tensor of shape of [batch_size, K, 4], representing the
-        coordinates of the sampled RoIs, where K is the number of the sampled
-        RoIs, i.e. K = num_samples_per_image.
-      sampled_gt_boxes: a tensor of shape of [batch_size, K, 4], storing the
-        box coordinates of the matched groundtruth boxes of the samples RoIs.
-      sampled_gt_classes: a tensor of shape of [batch_size, K], storing the
+      sampled_rois: A `tf.Tensor` of shape of [batch_size, K, 4], representing
+        the coordinates of the sampled RoIs, where K is the number of the
+        sampled RoIs, i.e. K = num_samples_per_image.
+      sampled_gt_boxes: A `tf.Tensor` of shape of [batch_size, K, 4], storing
+        the box coordinates of the matched groundtruth boxes of the samples
+        RoIs.
+      sampled_gt_classes: A `tf.Tensor` of shape of [batch_size, K], storing the
        classes of the matched groundtruth boxes of the sampled RoIs.
-      sampled_gt_indices: a tensor of shape of [batch_size, K], storing the
+      sampled_gt_indices: A `tf.Tensor` of shape of [batch_size, K], storing the
        indices of the sampled groudntruth boxes in the original `gt_boxes`
-        tensor, i.e.
+        tensor, i.e.,
        gt_boxes[sampled_gt_indices[:, i]] = sampled_gt_boxes[:, i].
    """
    if self._config_dict['mix_gt_boxes']: