"megatron/git@developer.sourcefind.cn:OpenDAS/megatron-lm.git" did not exist on "80389ef6e28cfe6cb85fe98e5313d6876c4391e4"
Commit 5b952c08 authored by Fan Yang's avatar Fan Yang Committed by A. Unique TensorFlower
Browse files

Internal change to docstring.

PiperOrigin-RevId: 362111110
parent 5df0cd30
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Box sampler."""
"""Contains definitions of box sampler."""
# Import libraries
import tensorflow as tf
......@@ -22,19 +22,19 @@ from official.vision.beta.ops import sampling_ops
@tf.keras.utils.register_keras_serializable(package='Vision')
class BoxSampler(tf.keras.layers.Layer):
"""Sample positive and negative boxes."""
"""Creates a BoxSampler to sample positive and negative boxes."""
def __init__(self,
num_samples=512,
foreground_fraction=0.25,
**kwargs):
"""Initializes a ROI sampler.
"""Initializes a box sampler.
Args:
num_samples: int, the number of sampled boxes per image.
foreground_fraction: float in [0, 1], what percentage of boxes should be
sampled from the positive examples.
**kwargs: other key word arguments passed to Layer.
num_samples: An `int` of the number of sampled boxes per image.
foreground_fraction: A `float` in [0, 1], what percentage of boxes should
be sampled from the positive examples.
**kwargs: Additional keyword arguments passed to Layer.
"""
self._config_dict = {
'num_samples': num_samples,
......@@ -43,22 +43,22 @@ class BoxSampler(tf.keras.layers.Layer):
super(BoxSampler, self).__init__(**kwargs)
def call(self, positive_matches, negative_matches, ignored_matches):
"""Sample and select positive and negative instances.
"""Samples and selects positive and negative instances.
Args:
positive_matches: a `bool` tensor of shape of [batch, N] where N is the
positive_matches: A `bool` tensor of shape of [batch, N] where N is the
number of instances. For each element, `True` means the instance
corresponds to a positive example.
negative_matches: a `bool` tensor of shape of [batch, N] where N is the
negative_matches: A `bool` tensor of shape of [batch, N] where N is the
number of instances. For each element, `True` means the instance
corresponds to a negative example.
ignored_matches: a `bool` tensor of shape of [batch, N] where N is the
number of instances. For each element, `True` means the instance
should be ignored.
ignored_matches: A `bool` tensor of shape of [batch, N] where N is the
number of instances. For each element, `True` means the instance should
be ignored.
Returns:
selected_indices: a tensor of shape of [batch_size, K], storing the
indices of the sampled examples, where K is `num_samples`.
A `tf.tensor` of shape of [batch_size, K], storing the indices of the
sampled examples, where K is `num_samples`.
"""
sample_candidates = tf.logical_and(
tf.logical_or(positive_matches, negative_matches),
......
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Mask sampler."""
"""Contains definitions of mask sampler."""
# Import libraries
import tensorflow as tf
......@@ -30,34 +30,34 @@ def _sample_and_crop_foreground_masks(candidate_rois,
"""Samples and creates cropped foreground masks for training.
Args:
candidate_rois: a tensor of shape of [batch_size, N, 4], where N is the
candidate_rois: A `tf.Tensor` of shape of [batch_size, N, 4], where N is the
number of candidate RoIs to be considered for mask sampling. It includes
both positive and negative RoIs. The `num_mask_samples_per_image` positive
RoIs will be sampled to create mask training targets.
candidate_gt_boxes: a tensor of shape of [batch_size, N, 4], storing the
corresponding groundtruth boxes to the `candidate_rois`.
candidate_gt_classes: a tensor of shape of [batch_size, N], storing the
candidate_gt_boxes: A `tf.Tensor` of shape of [batch_size, N, 4], storing
the corresponding groundtruth boxes to the `candidate_rois`.
candidate_gt_classes: A `tf.Tensor` of shape of [batch_size, N], storing the
corresponding groundtruth classes to the `candidate_rois`. 0 in the tensor
corresponds to the background class, i.e. negative RoIs.
candidate_gt_indices: a tensor of shape [batch_size, N], storing the
candidate_gt_indices: A `tf.Tensor` of shape [batch_size, N], storing the
corresponding groundtruth instance indices to the `candidate_gt_boxes`,
i.e. gt_boxes[candidate_gt_indices[:, i]] = candidate_gt_boxes[:, i] and
gt_boxes which is of shape [batch_size, MAX_INSTANCES, 4], M >= N, is the
superset of candidate_gt_boxes.
gt_masks: a tensor of [batch_size, MAX_INSTANCES, mask_height, mask_width]
containing all the groundtruth masks which sample masks are drawn from.
num_sampled_masks: an integer which specifies the number of masks
to sample.
mask_target_size: an integer which specifies the final cropped mask size
after sampling. The output masks are resized w.r.t the sampled RoIs.
gt_boxes which is of shape [batch_size, MAX_INSTANCES, 4], M >= N, is
the superset of candidate_gt_boxes.
gt_masks: A `tf.Tensor` of [batch_size, MAX_INSTANCES, mask_height,
mask_width] containing all the groundtruth masks which sample masks are
drawn from.
num_sampled_masks: An `int` that specifies the number of masks to sample.
mask_target_size: An `int` that specifies the final cropped mask size after
sampling. The output masks are resized w.r.t the sampled RoIs.
Returns:
foreground_rois: a tensor of shape of [batch_size, K, 4] storing the RoI
that corresponds to the sampled foreground masks, where
foreground_rois: A `tf.Tensor` of shape of [batch_size, K, 4] storing the
RoI that corresponds to the sampled foreground masks, where
K = num_mask_samples_per_image.
foreground_classes: a tensor of shape of [batch_size, K] storing the classes
corresponding to the sampled foreground masks.
cropoped_foreground_masks: a tensor of shape of
foreground_classes: A `tf.Tensor` of shape of [batch_size, K] storing the
classes corresponding to the sampled foreground masks.
cropoped_foreground_masks: A `tf.Tensor` of shape of
[batch_size, K, mask_target_size, mask_target_size] storing the cropped
foreground masks used for training.
"""
......@@ -120,34 +120,36 @@ class MaskSampler(tf.keras.layers.Layer):
candidate_gt_classes,
candidate_gt_indices,
gt_masks):
"""Sample and create mask targets for training.
"""Samples and creates mask targets for training.
Args:
candidate_rois: a tensor of shape of [batch_size, N, 4], where N is the
number of candidate RoIs to be considered for mask sampling. It includes
both positive and negative RoIs. The `num_mask_samples_per_image`
positive RoIs will be sampled to create mask training targets.
candidate_gt_boxes: a tensor of shape of [batch_size, N, 4], storing the
corresponding groundtruth boxes to the `candidate_rois`.
candidate_gt_classes: a tensor of shape of [batch_size, N], storing the
corresponding groundtruth classes to the `candidate_rois`. 0 in the
candidate_rois: A `tf.Tensor` of shape of [batch_size, N, 4], where N is
the number of candidate RoIs to be considered for mask sampling. It
includes both positive and negative RoIs. The
`num_mask_samples_per_image` positive RoIs will be sampled to create
mask training targets.
candidate_gt_boxes: A `tf.Tensor` of shape of [batch_size, N, 4], storing
the corresponding groundtruth boxes to the `candidate_rois`.
candidate_gt_classes: A `tf.Tensor` of shape of [batch_size, N], storing
the corresponding groundtruth classes to the `candidate_rois`. 0 in the
tensor corresponds to the background class, i.e. negative RoIs.
candidate_gt_indices: a tensor of shape [batch_size, N], storing the
candidate_gt_indices: A `tf.Tensor` of shape [batch_size, N], storing the
corresponding groundtruth instance indices to the `candidate_gt_boxes`,
i.e. gt_boxes[candidate_gt_indices[:, i]] = candidate_gt_boxes[:, i],
where gt_boxes which is of shape [batch_size, MAX_INSTANCES, 4], M >= N,
is the superset of candidate_gt_boxes.
gt_masks: a tensor of [batch_size, MAX_INSTANCES, mask_height, mask_width]
containing all the groundtruth masks which sample masks are drawn from.
after sampling. The output masks are resized w.r.t the sampled RoIs.
where gt_boxes which is of shape [batch_size, MAX_INSTANCES, 4], M >=
N, is the superset of candidate_gt_boxes.
gt_masks: A `tf.Tensor` of [batch_size, MAX_INSTANCES, mask_height,
mask_width] containing all the groundtruth masks which sample masks are
drawn from. after sampling. The output masks are resized w.r.t the
sampled RoIs.
Returns:
foreground_rois: a tensor of shape of [batch_size, K, 4] storing the RoI
that corresponds to the sampled foreground masks, where
foreground_rois: A `tf.Tensor` of shape of [batch_size, K, 4] storing the
RoI that corresponds to the sampled foreground masks, where
K = num_mask_samples_per_image.
foreground_classes: a tensor of shape of [batch_size, K] storing the
foreground_classes: A `tf.Tensor` of shape of [batch_size, K] storing the
classes corresponding to the sampled foreground masks.
cropoped_foreground_masks: a tensor of shape of
cropoped_foreground_masks: A `tf.Tensor` of shape of
[batch_size, K, mask_target_size, mask_target_size] storing the
cropped foreground masks used for training.
"""
......
......@@ -21,14 +21,21 @@ from official.modeling import tf_utils
@tf.keras.utils.register_keras_serializable(package='Vision')
class SelfGating(tf.keras.layers.Layer):
"""Feature gating as used in S3D-G (https://arxiv.org/pdf/1712.04851.pdf)."""
"""Feature gating as used in S3D-G.
This implements the S3D-G network from:
Saining Xie, Chen Sun, Jonathan Huang, Zhuowen Tu, Kevin Murphy.
Rethinking Spatiotemporal Feature Learning: Speed-Accuracy Trade-offs in Video
Classification.
(https://arxiv.org/pdf/1712.04851.pdf)
"""
def __init__(self, filters, **kwargs):
"""Constructor.
"""Initializes a self-gating layer.
Args:
filters: `int` number of filters for the convolutional layer.
**kwargs: keyword arguments to be passed.
filters: An `int` number of filters for the convolutional layer.
**kwargs: Additional keyword arguments to be passed.
"""
super(SelfGating, self).__init__(**kwargs)
self._filters = filters
......@@ -61,7 +68,7 @@ class SelfGating(tf.keras.layers.Layer):
@tf.keras.utils.register_keras_serializable(package='Vision')
class BottleneckBlock3D(tf.keras.layers.Layer):
"""A 3D bottleneck block."""
"""Creates a 3D bottleneck block."""
def __init__(self,
filters,
......@@ -77,28 +84,29 @@ class BottleneckBlock3D(tf.keras.layers.Layer):
norm_momentum=0.99,
norm_epsilon=0.001,
**kwargs):
"""A 3D bottleneck block with BN after convolutions.
"""Initializes a 3D bottleneck block with BN after convolutions.
Args:
filters: `int` number of filters for the first two convolutions. Note that
the third and final convolution will use 4 times as many filters.
temporal_kernel_size: `int` kernel size for the temporal convolutional
layer.
temporal_strides: `int` temporal stride for the temporal convolutional
filters: An `int` number of filters for the first two convolutions. Note
that the third and final convolution will use 4 times as many filters.
temporal_kernel_size: An `int` of kernel size for the temporal
convolutional layer.
temporal_strides: An `int` of ftemporal stride for the temporal
convolutional layer.
spatial_strides: An `int` of spatial stride for the spatial convolutional
layer.
spatial_strides: `int` spatial stride for the spatial convolutional layer.
use_self_gating: `bool` apply self-gating module or not.
kernel_initializer: kernel_initializer for convolutional layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
Default to None.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
use_self_gating: A `bool` of whether to apply self-gating module or not.
kernel_initializer: A `str` of kernel_initializer for convolutional
layers.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default to None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2d.
Default to None.
activation: `str` name of the activation function.
use_sync_bn: if True, use synchronized batch normalization.
norm_momentum: `float` normalization omentum for the moving average.
norm_epsilon: `float` small float added to variance to avoid dividing by
zero.
**kwargs: keyword arguments to be passed.
activation: A `str` name of the activation function.
use_sync_bn: A `bool`. If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
**kwargs: Additional keyword arguments to be passed.
"""
super(BottleneckBlock3D, self).__init__(**kwargs)
......
......@@ -22,7 +22,7 @@ import tensorflow as tf
from official.modeling import tf_utils
# Type annotations
# Type annotations.
States = Dict[str, tf.Tensor]
Activation = Union[str, Callable]
......@@ -34,12 +34,12 @@ def make_divisible(value: float,
"""This is to ensure that all layers have channels that are divisible by 8.
Args:
value: `float` original value.
divisor: `int` the divisor that need to be checked upon.
min_value: `float` minimum value threshold.
value: A `float` of original value.
divisor: An `int` off the divisor that need to be checked upon.
min_value: A `float` of minimum value threshold.
Returns:
The adjusted value in `int` that divisible against divisor.
The adjusted value in `int` that is divisible against divisor.
"""
if min_value is None:
min_value = divisor
......@@ -55,7 +55,7 @@ def round_filters(filters: int,
divisor: int = 8,
min_depth: Optional[int] = None,
skip: bool = False):
"""Round number of filters based on width multiplier."""
"""Rounds number of filters based on width multiplier."""
orig_f = filters
if skip or not multiplier:
return filters
......@@ -70,7 +70,7 @@ def round_filters(filters: int,
@tf.keras.utils.register_keras_serializable(package='Vision')
class SqueezeExcitation(tf.keras.layers.Layer):
"""Squeeze and excitation layer."""
"""Creates a squeeze and excitation layer."""
def __init__(self,
in_filters,
......@@ -84,25 +84,26 @@ class SqueezeExcitation(tf.keras.layers.Layer):
activation='relu',
gating_activation='sigmoid',
**kwargs):
"""Implementation for squeeze and excitation.
"""Initializes a squeeze and excitation layer.
Args:
in_filters: `int` number of filters of the input tensor.
out_filters: `int` number of filters of the output tensor.
se_ratio: `float` or None. If not None, se ratio for the squeeze and
in_filters: An `int` number of filters of the input tensor.
out_filters: An `int` number of filters of the output tensor.
se_ratio: A `float` or None. If not None, se ratio for the squeeze and
excitation layer.
divisible_by: `int` ensures all inner dimensions are divisible by this
number.
use_3d_input: `bool` 2D image or 3D input type.
kernel_initializer: kernel_initializer for convolutional layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
divisible_by: An `int` that ensures all inner dimensions are divisible by
this number.
use_3d_input: A `bool` of whether input is 2D or 3D image.
kernel_initializer: A `str` of kernel_initializer for convolutional
layers.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default to None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2d.
Default to None.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
Default to None.
activation: `str` name of the activation function.
gating_activation: `str` name of the activation function for final gating
function.
**kwargs: keyword arguments to be passed.
activation: A `str` name of the activation function.
gating_activation: A `str` name of the activation function for final
gating function.
**kwargs: Additional keyword arguments to be passed.
"""
super(SqueezeExcitation, self).__init__(**kwargs)
......@@ -183,9 +184,9 @@ def get_stochastic_depth_rate(init_rate, i, n):
"""Get drop connect rate for the ith block.
Args:
init_rate: `float` initial drop rate.
i: `int` order of the current block.
n: `int` total number of blocks.
init_rate: A `float` of initial drop rate.
i: An `int` of order of the current block.
n: An `int` total number of blocks.
Returns:
Drop rate of the ith block.
......@@ -201,17 +202,17 @@ def get_stochastic_depth_rate(init_rate, i, n):
@tf.keras.utils.register_keras_serializable(package='Vision')
class StochasticDepth(tf.keras.layers.Layer):
"""Stochastic depth layer."""
"""Creates a stochastic depth layer."""
def __init__(self, stochastic_depth_drop_rate, **kwargs):
"""Initialize stochastic depth.
"""Initializes a stochastic depth layer.
Args:
stochastic_depth_drop_rate: `float` drop rate.
**kwargs: keyword arguments to be passed.
stochastic_depth_drop_rate: A `float` of drop rate.
**kwargs: Additional keyword arguments to be passed.
Returns:
A output tensor, which should have the same shape as input.
A output `tf.Tensor` of which should have the same shape as input.
"""
super(StochasticDepth, self).__init__(**kwargs)
self._drop_rate = stochastic_depth_drop_rate
......@@ -239,15 +240,15 @@ class StochasticDepth(tf.keras.layers.Layer):
@tf.keras.utils.register_keras_serializable(package='Vision')
def pyramid_feature_fusion(inputs, target_level):
"""Fuse all feature maps in the feature pyramid at the target level.
"""Fuses all feature maps in the feature pyramid at the target level.
Args:
inputs: a dictionary containing the feature pyramid. The size of the input
inputs: A dictionary containing the feature pyramid. The size of the input
tensor needs to be fixed.
target_level: `int` the target feature level for feature fusion.
target_level: An `int` of the target feature level for feature fusion.
Returns:
A float Tensor of shape [batch_size, feature_height, feature_width,
A `float` `tf.Tensor` of shape [batch_size, feature_height, feature_width,
feature_channel].
"""
# Convert keys to int.
......@@ -279,8 +280,13 @@ def pyramid_feature_fusion(inputs, target_level):
class Scale(tf.keras.layers.Layer):
"""Scales the input by a trainable scalar weight.
Useful for applying ReZero to layers, which improves convergence speed.
Reference: https://arxiv.org/pdf/2003.04887.pdf
This is useful for applying ReZero to layers, which improves convergence
speed. This implements the paper:
Thomas Bachlechner, Bodhisattwa Prasad Majumder, Huanru Henry Mao,
Garrison W. Cottrell, Julian McAuley.
ReZero is All You Need: Fast Convergence at Large Depth.
(https://arxiv.org/pdf/2003.04887.pdf).
"""
def __init__(
......@@ -288,15 +294,15 @@ class Scale(tf.keras.layers.Layer):
initializer: tf.keras.initializers.Initializer = 'ones',
regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
**kwargs):
"""Initializes scale layer.
"""Initializes a scale layer.
Args:
initializer: initializer for the scalar weight.
regularizer: regularizer for the scalar weight.
**kwargs: keyword arguments to be passed to this layer.
initializer: A `str` of initializer for the scalar weight.
regularizer: A `tf.keras.regularizers.Regularizer` for the scalar weight.
**kwargs: Additional keyword arguments to be passed to this layer.
Returns:
A output tensor, which should have the same shape as input.
An `tf.Tensor` of which should have the same shape as input.
"""
super(Scale, self).__init__(**kwargs)
......@@ -328,11 +334,15 @@ class Scale(tf.keras.layers.Layer):
@tf.keras.utils.register_keras_serializable(package='Vision')
class TemporalSoftmaxPool(tf.keras.layers.Layer):
"""Network layer corresponding to temporal softmax pooling.
"""Creates a network layer corresponding to temporal softmax pooling.
This is useful for multi-class logits (used in e.g., Charades).
Modified from AssembleNet Charades evaluation.
Reference: https://arxiv.org/pdf/1905.13209.pdf.
This is useful for multi-class logits (used in e.g., Charades). Modified from
AssembleNet Charades evaluation from:
Michael S. Ryoo, AJ Piergiovanni, Mingxing Tan, Anelia Angelova.
AssembleNet: Searching for Multi-Stream Neural Connectivity in Video
Architectures.
(https://arxiv.org/pdf/1905.13209.pdf).
"""
def call(self, inputs):
......@@ -347,13 +357,16 @@ class TemporalSoftmaxPool(tf.keras.layers.Layer):
@tf.keras.utils.register_keras_serializable(package='Vision')
class PositionalEncoding(tf.keras.layers.Layer):
"""Network layer that adds a sinusoidal positional encoding.
"""Creates a network layer that adds a sinusoidal positional encoding.
Positional encoding is incremented across frames, and is added to the input.
The positional encoding is first weighted at 0 so that the network can choose
to ignore it.
to ignore it. This implements:
Reference: https://arxiv.org/pdf/1706.03762.pdf
Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones,
Aidan N. Gomez, Lukasz Kaiser, Illia Polosukhin.
Attention Is All You Need.
(https://arxiv.org/pdf/1706.03762.pdf).
"""
def __init__(self,
......@@ -363,15 +376,15 @@ class PositionalEncoding(tf.keras.layers.Layer):
"""Initializes positional encoding.
Args:
initializer: initializer for weighting the positional encoding.
cache_encoding: if True, cache the positional encoding tensor after
calling build. Otherwise, rebuild the tensor for every call. Setting
this to False can be useful when we want to input a variable number of
frames, so the positional encoding tensor can change shape.
**kwargs: keyword arguments to be passed to this layer.
initializer: A `str` of initializer for weighting the positional encoding.
cache_encoding: A `bool`. If True, cache the positional encoding tensor
after calling build. Otherwise, rebuild the tensor for every call.
Setting this to False can be useful when we want to input a variable
number of frames, so the positional encoding tensor can change shape.
**kwargs: Additional keyword arguments to be passed to this layer.
Returns:
An output tensor, which should have the same shape as input.
A `tf.Tensor` of which should have the same shape as input.
"""
super(PositionalEncoding, self).__init__(**kwargs)
self._initializer = initializer
......@@ -395,9 +408,9 @@ class PositionalEncoding(tf.keras.layers.Layer):
"""Creates a sequence of sinusoidal positional encoding vectors.
Args:
num_positions: the number of positions (frames).
hidden_size: the number of channels used for the hidden vectors.
dtype: the dtype of the output tensor.
num_positions: An `int` of number of positions (frames).
hidden_size: An `int` of number of channels used for the hidden vectors.
dtype: The dtype of the output tensor.
Returns:
The positional encoding tensor with shape [num_positions, hidden_size].
......@@ -430,10 +443,10 @@ class PositionalEncoding(tf.keras.layers.Layer):
"""Builds the layer with the given input shape.
Args:
input_shape: the input shape.
input_shape: The input shape.
Raises:
ValueError: if using 'channels_first' data format.
ValueError: If using 'channels_first' data format.
"""
if tf.keras.backend.image_data_format() == 'channels_first':
raise ValueError('"channels_first" mode is unsupported.')
......@@ -457,7 +470,7 @@ class PositionalEncoding(tf.keras.layers.Layer):
@tf.keras.utils.register_keras_serializable(package='Vision')
class GlobalAveragePool3D(tf.keras.layers.Layer):
"""Global average pooling layer with causal mode.
"""Creates a global average pooling layer with causal mode.
Implements causal mode, which runs a cumulative sum (with `tf.cumsum`) across
frames in the time dimension, allowing the use of a stream buffer. Sums any
......@@ -469,15 +482,16 @@ class GlobalAveragePool3D(tf.keras.layers.Layer):
keepdims: bool = False,
causal: bool = False,
**kwargs):
"""Initializes global average pool.
"""Initializes a global average pool layer.
Args:
keepdims: if True, keep the averaged dimensions.
causal: run in causal mode with a cumulative sum across frames.
**kwargs: keyword arguments to be passed to this layer.
keepdims: A `bool`. If True, keep the averaged dimensions.
causal: A `bool` of whether to run in causal mode with a cumulative sum
across frames.
**kwargs: Additional keyword arguments to be passed to this layer.
Returns:
An output tensor.
An output `tf.Tensor`.
"""
super(GlobalAveragePool3D, self).__init__(**kwargs)
......@@ -514,14 +528,14 @@ class GlobalAveragePool3D(tf.keras.layers.Layer):
"""Calls the layer with the given inputs.
Args:
inputs: the input tensor.
states: a dict of states such that, if any of the keys match for this
layer, will overwrite the contents of the buffer(s).
output_states: if True, returns the output tensor and output states.
Returns just the output tensor otherwise.
inputs: An input `tf.Tensor`.
states: A `dict` of states such that, if any of the keys match for this
layer, will overwrite the contents of the buffer(s).
output_states: A `bool`. If True, returns the output tensor and output
states. Returns just the output tensor otherwise.
Returns:
the output tensor (and optionally the states if `output_states=True`).
An output `tf.Tensor` (and optionally the states if `output_states=True`).
If `causal=True`, the output tensor will have shape
`[batch_size, num_frames, 1, 1, channels]` if `keepdims=True`. We keep
the frame dimension in this case to simulate a cumulative global average
......@@ -531,7 +545,7 @@ class GlobalAveragePool3D(tf.keras.layers.Layer):
buffer stored in `states`).
Raises:
ValueError: if using 'channels_first' data format.
ValueError: If using 'channels_first' data format.
"""
states = dict(states) if states is not None else {}
......@@ -592,18 +606,17 @@ class GlobalAveragePool3D(tf.keras.layers.Layer):
@tf.keras.utils.register_keras_serializable(package='Vision')
class SpatialAveragePool3D(tf.keras.layers.Layer):
"""Global average pooling layer pooling across spatial dimentions.
"""
"""Creates a global average pooling layer pooling across spatial dimentions."""
def __init__(self, keepdims: bool = False, **kwargs):
"""Initializes global average pool.
"""Initializes a global average pool layer.
Args:
keepdims: if True, keep the averaged dimensions.
**kwargs: keyword arguments to be passed to this layer.
keepdims: A `bool`. If True, keep the averaged dimensions.
**kwargs: Additional keyword arguments to be passed to this layer.
Returns:
An output tensor.
An output `tf.Tensor`.
"""
super(SpatialAveragePool3D, self).__init__(**kwargs)
self._keepdims = keepdims
......@@ -650,10 +663,10 @@ class CausalConvMixin:
"""Calculates padding for 'causal' option for conv layers.
Args:
inputs: optional input tensor to be padded.
use_buffered_input: if True, use 'valid' padding along the time dimension.
This should be set when applying the stream buffer.
time_axis: the axis of the time dimension
inputs: An optional input `tf.Tensor` to be padded.
use_buffered_input: A `bool`. If True, use 'valid' padding along the time
dimension. This should be set when applying the stream buffer.
time_axis: An `int` of the axis of the time dimension.
Returns:
A list of paddings for `tf.pad`.
......@@ -719,14 +732,14 @@ class Conv2D(tf.keras.layers.Conv2D, CausalConvMixin):
"""Initializes conv2d.
Args:
*args: arguments to be passed.
use_buffered_input: if True, the input is expected to be padded
beforehand. In effect, calling this layer will use 'valid' padding on
the temporal dimension to simulate 'causal' padding.
**kwargs: keyword arguments to be passed.
*args: Arguments to be passed.
use_buffered_input: A `bool`. If True, the input is expected to be padded
beforehand. In effect, calling this layer will use 'valid' padding on
the temporal dimension to simulate 'causal' padding.
**kwargs: Additional keyword arguments to be passed.
Returns:
A output tensor of the Conv2D operation.
An output `tf.Tensor` of the Conv2D operation.
"""
super(Conv2D, self).__init__(*args, **kwargs)
self._use_buffered_input = use_buffered_input
......@@ -767,14 +780,14 @@ class DepthwiseConv2D(tf.keras.layers.DepthwiseConv2D, CausalConvMixin):
"""Initializes depthwise conv2d.
Args:
*args: arguments to be passed.
use_buffered_input: if True, the input is expected to be padded
beforehand. In effect, calling this layer will use 'valid' padding on
the temporal dimension to simulate 'causal' padding.
**kwargs: keyword arguments to be passed.
*args: Arguments to be passed.
use_buffered_input: A `bool`. If True, the input is expected to be padded
beforehand. In effect, calling this layer will use 'valid' padding on
the temporal dimension to simulate 'causal' padding.
**kwargs: Additional keyword arguments to be passed.
Returns:
A output tensor of the DepthwiseConv2D operation.
An output `tf.Tensor` of the DepthwiseConv2D operation.
"""
super(DepthwiseConv2D, self).__init__(*args, **kwargs)
self._use_buffered_input = use_buffered_input
......@@ -829,14 +842,14 @@ class Conv3D(tf.keras.layers.Conv3D, CausalConvMixin):
"""Initializes conv3d.
Args:
*args: arguments to be passed.
use_buffered_input: if True, the input is expected to be padded
beforehand. In effect, calling this layer will use 'valid' padding on
the temporal dimension to simulate 'causal' padding.
**kwargs: keyword arguments to be passed.
*args: Arguments to be passed.
use_buffered_input: A `bool`. If True, the input is expected to be padded
beforehand. In effect, calling this layer will use 'valid' padding on
the temporal dimension to simulate 'causal' padding.
**kwargs: Additional keyword arguments to be passed.
Returns:
A output tensor of the Conv3D operation.
An output `tf.Tensor` of the Conv3D operation.
"""
super(Conv3D, self).__init__(*args, **kwargs)
self._use_buffered_input = use_buffered_input
......
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""ROI align."""
"""Contains definitions of ROI aligner."""
import tensorflow as tf
......@@ -30,9 +30,9 @@ class MultilevelROIAligner(tf.keras.layers.Layer):
"""Initializes a ROI aligner.
Args:
crop_size: int, the output size of the cropped features.
sample_offset: float in [0, 1], the subpixel sample offset.
**kwargs: other key word arguments passed to Layer.
crop_size: An `int` of the output size of the cropped features.
sample_offset: A `float` in [0, 1] of the subpixel sample offset.
**kwargs: Additional keyword arguments passed to Layer.
"""
self._config_dict = {
'crop_size': crop_size,
......@@ -47,13 +47,13 @@ class MultilevelROIAligner(tf.keras.layers.Layer):
features: A dictionary with key as pyramid level and value as features.
The features are in shape of
[batch_size, height_l, width_l, num_filters].
boxes: A 3-D Tensor of shape [batch_size, num_boxes, 4]. Each row
boxes: A 3-D `tf.Tensor` of shape [batch_size, num_boxes, 4]. Each row
represents a box with [y1, x1, y2, x2] in un-normalized coordinates.
from grid point.
training: bool, whether it is in training mode.
training: A `bool` of whether it is in training mode.
Returns:
roi_features: A 5-D tensor representing feature crop of shape
A 5-D `tf.Tensor` representing feature crop of shape
[batch_size, num_boxes, crop_size, crop_size, num_filters].
"""
roi_features = spatial_transform_ops.multilevel_crop_and_resize(
......
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""ROI generator."""
"""Contains definitions of ROI generator."""
# Import libraries
import tensorflow as tf
......@@ -48,46 +48,48 @@ def _multilevel_propose_rois(raw_boxes,
3. Apply an overall top k to generate the final selected RoIs.
Args:
raw_boxes: a dict with keys representing FPN levels and values representing
box tenors of shape [batch_size, feature_h, feature_w, num_anchors * 4].
raw_scores: a dict with keys representing FPN levels and values representing
logit tensors of shape [batch_size, feature_h, feature_w, num_anchors].
anchor_boxes: a dict with keys representing FPN levels and values
raw_boxes: A `dict` with keys representing FPN levels and values
representing box tenors of shape
[batch_size, feature_h, feature_w, num_anchors * 4].
raw_scores: A `dict` with keys representing FPN levels and values
representing logit tensors of shape
[batch_size, feature_h, feature_w, num_anchors].
anchor_boxes: A `dict` with keys representing FPN levels and values
representing anchor box tensors of shape
[batch_size, feature_h * feature_w * num_anchors, 4].
image_shape: a tensor of shape [batch_size, 2] where the last dimension are
[height, width] of the scaled image.
pre_nms_top_k: an integer of top scoring RPN proposals *per level* to
keep before applying NMS. Default: 2000.
pre_nms_score_threshold: a float between 0 and 1 representing the minimal
box score to keep before applying NMS. This is often used as a
image_shape: A `tf.Tensor` of shape [batch_size, 2] where the last dimension
are [height, width] of the scaled image.
pre_nms_top_k: An `int` of top scoring RPN proposals *per level* to keep
before applying NMS. Default: 2000.
pre_nms_score_threshold: A `float` between 0 and 1 representing the minimal
box score to keep before applying NMS. This is often used as a
pre-filtering step for better performance. Default: 0, no filtering is
applied.
pre_nms_min_size_threshold: a float representing the minimal box size in
pre_nms_min_size_threshold: A `float` representing the minimal box size in
each side (w.r.t. the scaled image) to keep before applying NMS. This is
often used as a pre-filtering step for better performance. Default: 0, no
filtering is applied.
nms_iou_threshold: a float between 0 and 1 representing the IoU threshold
nms_iou_threshold: A `float` between 0 and 1 representing the IoU threshold
used for NMS. If 0.0, no NMS is applied. Default: 0.7.
num_proposals: an integer of top scoring RPN proposals *in total* to
keep after applying NMS. Default: 1000.
use_batched_nms: a boolean indicating whether NMS is applied in batch using
num_proposals: An `int` of top scoring RPN proposals *in total* to keep
after applying NMS. Default: 1000.
use_batched_nms: A `bool` indicating whether NMS is applied in batch using
`tf.image.combined_non_max_suppression`. Currently only available in
CPU/GPU. Default: False.
decode_boxes: a boolean indicating whether `raw_boxes` needs to be decoded
CPU/GPU. Default is False.
decode_boxes: A `bool` indicating whether `raw_boxes` needs to be decoded
using `anchor_boxes`. If False, use `raw_boxes` directly and ignore
`anchor_boxes`. Default: True.
clip_boxes: a boolean indicating whether boxes are first clipped to the
`anchor_boxes`. Default is True.
clip_boxes: A `bool` indicating whether boxes are first clipped to the
scaled image size before appliying NMS. If False, no clipping is applied
and `image_shape` is ignored. Default: True.
apply_sigmoid_to_score: a boolean indicating whether apply sigmoid to
`raw_scores` before applying NMS. Default: True.
and `image_shape` is ignored. Default is True.
apply_sigmoid_to_score: A `bool` indicating whether apply sigmoid to
`raw_scores` before applying NMS. Default is True.
Returns:
selected_rois: a tensor of shape [batch_size, num_proposals, 4],
selected_rois: A `tf.Tensor` of shape [batch_size, num_proposals, 4],
representing the box coordinates of the selected proposals w.r.t. the
scaled image.
selected_roi_scores: a tensor of shape [batch_size, num_proposals, 1],
selected_roi_scores: A `tf.Tensor` of shape [batch_size, num_proposals, 1],
representing the scores of the selected proposals.
"""
with tf.name_scope('multilevel_propose_rois'):
......@@ -196,30 +198,31 @@ class MultilevelROIGenerator(tf.keras.layers.Layer):
The ROI generator transforms the raw predictions from RPN to ROIs.
Args:
pre_nms_top_k: int, the number of top scores proposals to be kept before
applying NMS.
pre_nms_score_threshold: float, the score threshold to apply before
pre_nms_top_k: An `int` of the number of top scores proposals to be kept
before applying NMS.
pre_nms_score_threshold: A `float` of the score threshold to apply before
applying NMS. Proposals whose scores are below this threshold are
thrown away.
pre_nms_min_size_threshold: float, the threshold of each side of the box
(w.r.t. the scaled image). Proposals whose sides are below this
pre_nms_min_size_threshold: A `float` of the threshold of each side of the
box (w.r.t. the scaled image). Proposals whose sides are below this
threshold are thrown away.
nms_iou_threshold: float in [0, 1], the NMS IoU threshold.
num_proposals: int, the final number of proposals to generate.
test_pre_nms_top_k: int, the number of top scores proposals to be kept
before applying NMS in testing.
test_pre_nms_score_threshold: float, the score threshold to apply before
applying NMS in testing. Proposals whose scores are below this threshold
are thrown away.
test_pre_nms_min_size_threshold: float, the threshold of each side of the
box (w.r.t. the scaled image) in testing. Proposals whose sides are
below this threshold are thrown away.
test_nms_iou_threshold: float in [0, 1], the NMS IoU threshold in testing.
test_num_proposals: int, the final number of proposals to generate in
nms_iou_threshold: A `float` in [0, 1], the NMS IoU threshold.
num_proposals: An `int` of the final number of proposals to generate.
test_pre_nms_top_k: An `int` of the number of top scores proposals to be
kept before applying NMS in testing.
test_pre_nms_score_threshold: A `float` of the score threshold to apply
before applying NMS in testing. Proposals whose scores are below this
threshold are thrown away.
test_pre_nms_min_size_threshold: A `float` of the threshold of each side
of the box (w.r.t. the scaled image) in testing. Proposals whose sides
are below this threshold are thrown away.
test_nms_iou_threshold: A `float` in [0, 1] of the NMS IoU threshold in
testing.
use_batched_nms: bool, whether or not use
test_num_proposals: An `int` of the final number of proposals to generate
in testing.
use_batched_nms: A `bool` of whether or not use
`tf.image.combined_non_max_suppression`.
**kwargs: other key word arguments passed to Layer.
**kwargs: Additional keyword arguments passed to Layer.
"""
self._config_dict = {
'pre_nms_top_k': pre_nms_top_k,
......@@ -257,23 +260,24 @@ class MultilevelROIGenerator(tf.keras.layers.Layer):
3. Apply an overall top k to generate the final selected RoIs.
Args:
raw_boxes: a dict with keys representing FPN levels and values
raw_boxes: A `dict` with keys representing FPN levels and values
representing box tenors of shape
[batch, feature_h, feature_w, num_anchors * 4].
raw_scores: a dict with keys representing FPN levels and values
raw_scores: A `dict` with keys representing FPN levels and values
representing logit tensors of shape
[batch, feature_h, feature_w, num_anchors].
anchor_boxes: a dict with keys representing FPN levels and values
anchor_boxes: A `dict` with keys representing FPN levels and values
representing anchor box tensors of shape
[batch, feature_h * feature_w * num_anchors, 4].
image_shape: a tensor of shape [batch, 2] where the last dimension are
[height, width] of the scaled image.
training: a bool indicat whether it is in training mode.
image_shape: A `tf.Tensor` of shape [batch, 2] where the last dimension
are [height, width] of the scaled image.
training: A `bool` that indicates whether it is in training mode.
Returns:
roi_boxes: [batch, num_proposals, 4], the proposed ROIs in the scaled
image coordinate.
roi_scores: [batch, num_proposals], scores of the proposed ROIs.
roi_boxes: A `tf.Tensor` of shape [batch, num_proposals, 4], the proposed
ROIs in the scaled image coordinate.
roi_scores: A `tf.Tensor` of shape [batch, num_proposals], scores of the
proposed ROIs.
"""
roi_boxes, roi_scores = _multilevel_propose_rois(
raw_boxes,
......
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""ROI sampler."""
"""Contains definitions of ROI sampler."""
# Import libraries
import tensorflow as tf
......@@ -23,7 +23,7 @@ from official.vision.beta.modeling.layers import box_sampler
@tf.keras.utils.register_keras_serializable(package='Vision')
class ROISampler(tf.keras.layers.Layer):
"""Sample ROIs and assign targets to the sampled ROIs."""
"""Samples ROIs and assigns targets to the sampled ROIs."""
def __init__(self,
mix_gt_boxes=True,
......@@ -36,20 +36,20 @@ class ROISampler(tf.keras.layers.Layer):
"""Initializes a ROI sampler.
Args:
mix_gt_boxes: bool, whether to mix the groundtruth boxes with proposed
ROIs.
num_sampled_rois: int, the number of sampled ROIs per image.
foreground_fraction: float in [0, 1], what percentage of proposed ROIs
mix_gt_boxes: A `bool` of whether to mix the groundtruth boxes with
proposed ROIs.
num_sampled_rois: An `int` of the number of sampled ROIs per image.
foreground_fraction: A `float` in [0, 1], what percentage of proposed ROIs
should be sampled from the foreground boxes.
foreground_iou_threshold: float, represent the IoU threshold for a box to
be considered as positive (if >= `foreground_iou_threshold`).
background_iou_high_threshold: float, represent the IoU threshold for a
box to be considered as negative (if overlap in
foreground_iou_threshold: A `float` that represents the IoU threshold for
a box to be considered as positive (if >= `foreground_iou_threshold`).
background_iou_high_threshold: A `float` that represents the IoU threshold
for a box to be considered as negative (if overlap in
[`background_iou_low_threshold`, `background_iou_high_threshold`]).
background_iou_low_threshold: float, represent the IoU threshold for a box
to be considered as negative (if overlap in
background_iou_low_threshold: A `float` that represents the IoU threshold
for a box to be considered as negative (if overlap in
[`background_iou_low_threshold`, `background_iou_high_threshold`])
**kwargs: other key word arguments passed to Layer.
**kwargs: Additional keyword arguments passed to Layer.
"""
self._config_dict = {
'mix_gt_boxes': mix_gt_boxes,
......@@ -85,29 +85,30 @@ class ROISampler(tf.keras.layers.Layer):
returns box_targets, class_targets, and RoIs.
Args:
boxes: a tensor of shape of [batch_size, N, 4]. N is the number of
boxes: A `tf.Tensor` of shape of [batch_size, N, 4]. N is the number of
proposals before groundtruth assignment. The last dimension is the
box coordinates w.r.t. the scaled images in [ymin, xmin, ymax, xmax]
format.
gt_boxes: a tensor of shape of [batch_size, MAX_NUM_INSTANCES, 4].
gt_boxes: A `tf.Tensor` of shape of [batch_size, MAX_NUM_INSTANCES, 4].
The coordinates of gt_boxes are in the pixel coordinates of the scaled
image. This tensor might have padding of values -1 indicating the
invalid box coordinates.
gt_classes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This
tensor might have paddings with values of -1 indicating the invalid
gt_classes: A `tf.Tensor` with a shape of [batch_size, MAX_NUM_INSTANCES].
This tensor might have paddings with values of -1 indicating the invalid
classes.
Returns:
sampled_rois: a tensor of shape of [batch_size, K, 4], representing the
coordinates of the sampled RoIs, where K is the number of the sampled
RoIs, i.e. K = num_samples_per_image.
sampled_gt_boxes: a tensor of shape of [batch_size, K, 4], storing the
box coordinates of the matched groundtruth boxes of the samples RoIs.
sampled_gt_classes: a tensor of shape of [batch_size, K], storing the
sampled_rois: A `tf.Tensor` of shape of [batch_size, K, 4], representing
the coordinates of the sampled RoIs, where K is the number of the
sampled RoIs, i.e. K = num_samples_per_image.
sampled_gt_boxes: A `tf.Tensor` of shape of [batch_size, K, 4], storing
the box coordinates of the matched groundtruth boxes of the samples
RoIs.
sampled_gt_classes: A `tf.Tensor` of shape of [batch_size, K], storing the
classes of the matched groundtruth boxes of the sampled RoIs.
sampled_gt_indices: a tensor of shape of [batch_size, K], storing the
sampled_gt_indices: A `tf.Tensor` of shape of [batch_size, K], storing the
indices of the sampled groudntruth boxes in the original `gt_boxes`
tensor, i.e.
tensor, i.e.,
gt_boxes[sampled_gt_indices[:, i]] = sampled_gt_boxes[:, i].
"""
if self._config_dict['mix_gt_boxes']:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment