Commit 05972650 authored by A. Unique TensorFlower's avatar A. Unique TensorFlower
Browse files

Internal change

PiperOrigin-RevId: 428641380
parent 92bcecc9
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""S3D model configurations."""
import dataclasses
from typing import Text
from official.modeling import hyperparams
from official.vision.beta.configs import backbones_3d
from official.vision.beta.configs import video_classification
@dataclasses.dataclass
class S3D(hyperparams.Config):
"""S3D backbone config.
Attributes:
final_endpoint: Specifies the endpoint to construct the network up to. It
can be one of ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',
'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c',
'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e',
'Mixed_4f', 'MaxPool_5a_2x2', 'Mixed_5b', 'Mixed_5c']
first_temporal_kernel_size: Specifies the temporal kernel size for the first
conv3d filter. A larger value slows down the model but provides little
accuracy improvement. Must be set to one of 1, 3, 5 or 7.
temporal_conv_start_at: Specifies the first conv block to use separable 3D
convs rather than 2D convs (implemented as [1, k, k] 3D conv). This is
used to construct the inverted pyramid models. 'Conv2d_2c_3x3' is the
first valid block to use separable 3D convs. If provided block name is
not present, all valid blocks will use separable 3D convs.
gating_start_at: Specifies the first conv block to use self gating.
'Conv2d_2c_3x3' is the first valid block to use self gating.
swap_pool_and_1x1x1: If True, in Branch_3 1x1x1 convolution is performed
first, then followed by max pooling. 1x1x1 convolution is used to reduce
the number of filters. Thus, max pooling is performed on less filters.
gating_style: Self gating can be applied after each branch and/or after each
inception cell. It can be one of ['BRANCH', 'CELL', 'BRANCH_AND_CELL'].
use_sync_bn: If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
temporal_conv_type: It can be one of ['3d', '2+1d', '1+2d', '1+1+1d'] where
'3d' is SPATIOTEMPORAL 3d convolution, '2+1d' is SPATIAL_TEMPORAL_SEPARATE
with 2D convolution on the spatial dimensions followed by 1D convolution
on the temporal dimension, '1+2d' is TEMPORAL_SPATIAL_SEPARATE with 1D
convolution on the temporal dimension followed by 2D convolution on the
spatial dimensions, and '1+1+1d' is FULLY_SEPARATE with 1D convolutions on
the horizontal, vertical, and temporal dimensions, respectively.
depth_multiplier: Float multiplier for the depth (number of channels) for
all convolution ops. The value must be greater than zero. Typical usage
will be to set this value in (0, 1) to reduce the number of parameters or
computation cost of the model.
"""
final_endpoint: Text = 'Mixed_5c'
first_temporal_kernel_size: int = 3
temporal_conv_start_at: Text = 'Conv2d_2c_3x3'
gating_start_at: Text = 'Conv2d_2c_3x3'
swap_pool_and_1x1x1: bool = True
gating_style: Text = 'CELL'
use_sync_bn: bool = False
norm_momentum: float = 0.999
norm_epsilon: float = 0.001
temporal_conv_type: Text = '2+1d'
depth_multiplier: float = 1.0
@dataclasses.dataclass
class Backbone3D(backbones_3d.Backbone3D):
"""Configuration for backbones.
Attributes:
type: 'str', type of backbone be used, on the of fields below.
s3d: s3d backbone config.
"""
type: str = 's3d'
s3d: S3D = S3D()
@dataclasses.dataclass
class S3DModel(video_classification.VideoClassificationModel):
"""The S3D model config.
Attributes:
type: 'str', type of backbone be used, on the of fields below.
backbone: backbone config.
"""
model_type: str = 's3d'
backbone: Backbone3D = Backbone3D()
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Contains modules related to Inception networks."""
from typing import Callable, Dict, Optional, Sequence, Set, Text, Tuple, Type, Union
import tensorflow as tf
from official.projects.s3d.modeling import net_utils
from official.vision.beta.modeling.layers import nn_blocks_3d
INCEPTION_V1_CONV_ENDPOINTS = [
'Conv2d_1a_7x7', 'Conv2d_2c_3x3', 'Mixed_3b', 'Mixed_3c', 'Mixed_4b',
'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 'Mixed_4f', 'Mixed_5b', 'Mixed_5c'
]
# Mapping from endpoint to branch filters. The endpoint shapes below are
# specific for input 64x224x224.
INCEPTION_V1_ARCH_SKELETON = [
('Mixed_3b', [[64], [96, 128], [16, 32], [32]]), # 32x28x28x256
('Mixed_3c', [[128], [128, 192], [32, 96], [64]]), # 32x28x28x480
('MaxPool_4a_3x3', [[3, 3, 3], [2, 2, 2]]), # 16x14x14x480
('Mixed_4b', [[192], [96, 208], [16, 48], [64]]), # 16x14x14x512
('Mixed_4c', [[160], [112, 224], [24, 64], [64]]), # 16x14x14x512
('Mixed_4d', [[128], [128, 256], [24, 64], [64]]), # 16x14x14x512
('Mixed_4e', [[112], [144, 288], [32, 64], [64]]), # 16x14x14x528
('Mixed_4f', [[256], [160, 320], [32, 128], [128]]), # 16x14x14x832
('MaxPool_5a_2x2', [[2, 2, 2], [2, 2, 2]]), # 8x7x7x832
('Mixed_5b', [[256], [160, 320], [32, 128], [128]]), # 8x7x7x832
('Mixed_5c', [[384], [192, 384], [48, 128], [128]]), # 8x7x7x1024
]
INCEPTION_V1_LOCAL_SKELETON = [
('MaxPool_5a_2x2_local', [[2, 2, 2], [2, 2, 2]]), # 8x7x7x832
('Mixed_5b_local', [[256], [160, 320], [32, 128], [128]]), # 8x7x7x832
('Mixed_5c_local', [[384], [192, 384], [48, 128], [128]]), # 8x7x7x1024
]
initializers = tf.keras.initializers
regularizers = tf.keras.regularizers
def inception_v1_stem_cells(
inputs: tf.Tensor,
depth_multiplier: float,
final_endpoint: Text,
temporal_conv_endpoints: Optional[Set[Text]] = None,
self_gating_endpoints: Optional[Set[Text]] = None,
temporal_conv_type: Text = '3d',
first_temporal_kernel_size: int = 7,
use_sync_bn: bool = False,
norm_momentum: float = 0.999,
norm_epsilon: float = 0.001,
temporal_conv_initializer: Union[
Text, initializers.Initializer] = initializers.TruncatedNormal(
mean=0.0, stddev=0.01),
kernel_initializer: Union[Text,
initializers.Initializer] = 'truncated_normal',
kernel_regularizer: Union[Text, regularizers.Regularizer] = 'l2',
parameterized_conv_layer: Type[
net_utils.ParameterizedConvLayer] = net_utils.ParameterizedConvLayer,
layer_naming_fn: Callable[[Text], Text] = lambda end_point: None,
) -> Tuple[tf.Tensor, Dict[Text, tf.Tensor]]:
"""Stem cells used in the original I3D/S3D model.
Args:
inputs: A 5-D float tensor of size [batch_size, num_frames, height, width,
channels].
depth_multiplier: A float to reduce/increase number of channels.
final_endpoint: Specifies the endpoint to construct the network up to. It
can be one of ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',
'Conv2d_2c_3x3', 'MaxPool_3a_3x3'].
temporal_conv_endpoints: Specifies the endpoints where to perform temporal
convolution.
self_gating_endpoints: Specifies the endpoints where to perform self gating.
temporal_conv_type: '3d' for I3D model and '2+1d' for S3D model.
first_temporal_kernel_size: temporal kernel size of the first convolution
layer.
use_sync_bn: If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
temporal_conv_initializer: Weight initializer for temporal convolution
inside the cell. It only applies to 2+1d and 1+2d cases.
kernel_initializer: Weight initializer for convolutional layers other than
temporal convolution.
kernel_regularizer: Weight regularizer for all convolutional layers.
parameterized_conv_layer: class for parameterized conv layer.
layer_naming_fn: function to customize conv / pooling layer names given
endpoint name of the block. This is mainly used to creat model that is
compatible with TF1 checkpoints.
Returns:
A dictionary from components of the network to the corresponding activation.
"""
if temporal_conv_endpoints is None:
temporal_conv_endpoints = set()
if self_gating_endpoints is None:
self_gating_endpoints = set()
if use_sync_bn:
batch_norm = tf.keras.layers.experimental.SyncBatchNormalization
else:
batch_norm = tf.keras.layers.BatchNormalization
if tf.keras.backend.image_data_format() == 'channels_last':
bn_axis = -1
else:
bn_axis = 1
end_points = {}
# batch_size x 32 x 112 x 112 x 64
end_point = 'Conv2d_1a_7x7'
net = tf.keras.layers.Conv3D(
filters=net_utils.apply_depth_multiplier(64, depth_multiplier),
kernel_size=[first_temporal_kernel_size, 7, 7],
strides=[2, 2, 2],
padding='same',
use_bias=False,
kernel_initializer=kernel_initializer,
kernel_regularizer=kernel_regularizer,
name=layer_naming_fn(end_point))(
inputs)
net = batch_norm(
axis=bn_axis,
momentum=norm_momentum,
epsilon=norm_epsilon,
scale=False,
gamma_initializer='ones',
name=layer_naming_fn(end_point + '/BatchNorm'))(
net)
net = tf.nn.relu(net)
end_points[end_point] = net
if final_endpoint == end_point:
return net, end_points
# batch_size x 32 x 56 x 56 x 64
end_point = 'MaxPool_2a_3x3'
net = tf.keras.layers.MaxPool3D(
pool_size=[1, 3, 3],
strides=[1, 2, 2],
padding='same',
name=layer_naming_fn(end_point))(
net)
end_points[end_point] = net
if final_endpoint == end_point:
return net, end_points
# batch_size x 32 x 56 x 56 x 64
end_point = 'Conv2d_2b_1x1'
net = tf.keras.layers.Conv3D(
filters=net_utils.apply_depth_multiplier(64, depth_multiplier),
strides=[1, 1, 1],
kernel_size=[1, 1, 1],
padding='same',
use_bias=False,
kernel_initializer=kernel_initializer,
kernel_regularizer=kernel_regularizer,
name=layer_naming_fn(end_point))(
net)
net = batch_norm(
axis=bn_axis,
momentum=norm_momentum,
epsilon=norm_epsilon,
scale=False,
gamma_initializer='ones',
name=layer_naming_fn(end_point + '/BatchNorm'))(
net)
net = tf.nn.relu(net)
end_points[end_point] = net
if final_endpoint == end_point:
return net, end_points
# batch_size x 32 x 56 x 56 x 192
end_point = 'Conv2d_2c_3x3'
if end_point not in temporal_conv_endpoints:
temporal_conv_type = '2d'
net = parameterized_conv_layer(
conv_type=temporal_conv_type,
kernel_size=3,
filters=net_utils.apply_depth_multiplier(192, depth_multiplier),
strides=[1, 1, 1],
rates=[1, 1, 1],
use_sync_bn=use_sync_bn,
norm_momentum=norm_momentum,
norm_epsilon=norm_epsilon,
temporal_conv_initializer=temporal_conv_initializer,
kernel_initializer=kernel_initializer,
kernel_regularizer=kernel_regularizer,
name=layer_naming_fn(end_point))(
net)
if end_point in self_gating_endpoints:
net = nn_blocks_3d.SelfGating(
filters=net_utils.apply_depth_multiplier(192, depth_multiplier),
name=layer_naming_fn(end_point + '/self_gating'))(
net)
end_points[end_point] = net
if final_endpoint == end_point:
return net, end_points
# batch_size x 32 x 28 x 28 x 192
end_point = 'MaxPool_3a_3x3'
net = tf.keras.layers.MaxPool3D(
pool_size=[1, 3, 3],
strides=[1, 2, 2],
padding='same',
name=layer_naming_fn(end_point))(
net)
end_points[end_point] = net
return net, end_points
def _construct_branch_3_layers(
channels: int,
swap_pool_and_1x1x1: bool,
pool_type: Text,
batch_norm_layer: tf.keras.layers.Layer,
kernel_initializer: Union[Text, initializers.Initializer],
kernel_regularizer: Union[Text, regularizers.Regularizer],
):
"""Helper function for Branch 3 inside Inception module."""
kernel_size = [1, 3, 3] if pool_type == '2d' else [3] * 3
conv = tf.keras.layers.Conv3D(
filters=channels,
kernel_size=[1, 1, 1],
padding='same',
use_bias=False,
kernel_initializer=kernel_initializer,
kernel_regularizer=kernel_regularizer)
activation = tf.keras.layers.Activation('relu')
pool = tf.keras.layers.MaxPool3D(
pool_size=kernel_size, strides=[1, 1, 1], padding='same')
if swap_pool_and_1x1x1:
branch_3_layers = [conv, batch_norm_layer, activation, pool]
else:
branch_3_layers = [pool, conv, batch_norm_layer, activation]
return branch_3_layers
class InceptionV1CellLayer(tf.keras.layers.Layer):
"""A single Tensorflow 2 cell used in the original I3D/S3D model."""
def __init__(
self,
branch_filters: Sequence[Sequence[int]],
conv_type: Text = '3d',
temporal_dilation_rate: int = 1,
swap_pool_and_1x1x1: bool = False,
use_self_gating_on_branch: bool = False,
use_self_gating_on_cell: bool = False,
use_sync_bn: bool = False,
norm_momentum: float = 0.999,
norm_epsilon: float = 0.001,
temporal_conv_initializer: Union[
Text, initializers.Initializer] = initializers.TruncatedNormal(
mean=0.0, stddev=0.01),
kernel_initializer: Union[Text,
initializers.Initializer] = 'truncated_normal',
kernel_regularizer: Union[Text, regularizers.Regularizer] = 'l2',
parameterized_conv_layer: Type[
net_utils.ParameterizedConvLayer] = net_utils.ParameterizedConvLayer,
**kwargs):
"""A cell structure inspired by Inception V1.
Args:
branch_filters: Specifies the number of filters in four branches
(Branch_0, Branch_1, Branch_2, Branch_3). Single number for Branch_0 and
Branch_3. For Branch_1 and Branch_2, each need to specify two numbers,
one for 1x1x1 and one for 3x3x3.
conv_type: The type of parameterized convolution. Currently, we support
'2d', '3d', '2+1d', '1+2d'.
temporal_dilation_rate: The dilation rate for temporal convolution.
swap_pool_and_1x1x1: A boolean flag indicates that whether to swap the
order of convolution and max pooling in Branch_3.
use_self_gating_on_branch: Whether or not to apply self gating on each
branch of the inception cell.
use_self_gating_on_cell: Whether or not to apply self gating on each cell
after the concatenation of all branches.
use_sync_bn: If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
temporal_conv_initializer: Weight initializer for temporal convolution
inside the cell. It only applies to 2+1d and 1+2d cases.
kernel_initializer: Weight initializer for convolutional layers other than
temporal convolution.
kernel_regularizer: Weight regularizer for all convolutional layers.
parameterized_conv_layer: class for parameterized conv layer.
**kwargs: keyword arguments to be passed.
Returns:
out_tensor: A 5-D float tensor of size [batch_size, num_frames, height,
width, channels].
"""
super(InceptionV1CellLayer, self).__init__(**kwargs)
self._branch_filters = branch_filters
self._conv_type = conv_type
self._temporal_dilation_rate = temporal_dilation_rate
self._swap_pool_and_1x1x1 = swap_pool_and_1x1x1
self._use_self_gating_on_branch = use_self_gating_on_branch
self._use_self_gating_on_cell = use_self_gating_on_cell
self._use_sync_bn = use_sync_bn
self._norm_momentum = norm_momentum
self._norm_epsilon = norm_epsilon
self._temporal_conv_initializer = temporal_conv_initializer
self._kernel_initializer = kernel_initializer
self._kernel_regularizer = kernel_regularizer
self._parameterized_conv_layer = parameterized_conv_layer
if use_sync_bn:
self._norm = tf.keras.layers.experimental.SyncBatchNormalization
else:
self._norm = tf.keras.layers.BatchNormalization
if tf.keras.backend.image_data_format() == 'channels_last':
self._channel_axis = -1
else:
self._channel_axis = 1
def _build_branch_params(self):
branch_0_params = [
# Conv3D
dict(
filters=self._branch_filters[0][0],
kernel_size=[1, 1, 1],
padding='same',
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer),
# norm
dict(
axis=self._channel_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon,
scale=False,
gamma_initializer='ones'),
# relu
dict(),
]
branch_1_params = [
# Conv3D
dict(
filters=self._branch_filters[1][0],
kernel_size=[1, 1, 1],
padding='same',
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer),
# norm
dict(
axis=self._channel_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon,
scale=False,
gamma_initializer='ones'),
# relu
dict(),
# ParameterizedConvLayer
dict(
conv_type=self._conv_type,
kernel_size=3,
filters=self._branch_filters[1][1],
strides=[1, 1, 1],
rates=[self._temporal_dilation_rate, 1, 1],
use_sync_bn=self._use_sync_bn,
norm_momentum=self._norm_momentum,
norm_epsilon=self._norm_epsilon,
temporal_conv_initializer=self._temporal_conv_initializer,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer),
]
branch_2_params = [
# Conv3D
dict(
filters=self._branch_filters[2][0],
kernel_size=[1, 1, 1],
padding='same',
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer),
# norm
dict(
axis=self._channel_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon,
scale=False,
gamma_initializer='ones'),
# relu
dict(),
# ParameterizedConvLayer
dict(
conv_type=self._conv_type,
kernel_size=3,
filters=self._branch_filters[2][1],
strides=[1, 1, 1],
rates=[self._temporal_dilation_rate, 1, 1],
use_sync_bn=self._use_sync_bn,
norm_momentum=self._norm_momentum,
norm_epsilon=self._norm_epsilon,
temporal_conv_initializer=self._temporal_conv_initializer,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer)
]
branch_3_params = [
# Conv3D
dict(
filters=self._branch_filters[3][0],
kernel_size=[1, 1, 1],
padding='same',
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer),
# norm
dict(
axis=self._channel_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon,
scale=False,
gamma_initializer='ones'),
# relu
dict(),
# pool
dict(
pool_size=([1, 3, 3] if self._conv_type == '2d' else [3] * 3),
strides=[1, 1, 1],
padding='same')
]
if self._use_self_gating_on_branch:
branch_0_params.append(dict(filters=self._branch_filters[0][0]))
branch_1_params.append(dict(filters=self._branch_filters[1][1]))
branch_2_params.append(dict(filters=self._branch_filters[2][1]))
branch_3_params.append(dict(filters=self._branch_filters[3][0]))
out_gating_params = []
if self._use_self_gating_on_cell:
out_channels = (
self._branch_filters[0][0] + self._branch_filters[1][1] +
self._branch_filters[2][1] + self._branch_filters[3][0])
out_gating_params.append(dict(filters=out_channels))
return [
branch_0_params, branch_1_params, branch_2_params, branch_3_params,
out_gating_params
]
def build(self, input_shape):
branch_params = self._build_branch_params()
self._branch_0_layers = [
tf.keras.layers.Conv3D(**branch_params[0][0]),
self._norm(**branch_params[0][1]),
tf.keras.layers.Activation('relu', **branch_params[0][2]),
]
self._branch_1_layers = [
tf.keras.layers.Conv3D(**branch_params[1][0]),
self._norm(**branch_params[1][1]),
tf.keras.layers.Activation('relu', **branch_params[1][2]),
self._parameterized_conv_layer(**branch_params[1][3]),
]
self._branch_2_layers = [
tf.keras.layers.Conv3D(**branch_params[2][0]),
self._norm(**branch_params[2][1]),
tf.keras.layers.Activation('relu', **branch_params[2][2]),
self._parameterized_conv_layer(**branch_params[2][3])
]
if self._swap_pool_and_1x1x1:
self._branch_3_layers = [
tf.keras.layers.Conv3D(**branch_params[3][0]),
self._norm(**branch_params[3][1]),
tf.keras.layers.Activation('relu', **branch_params[3][2]),
tf.keras.layers.MaxPool3D(**branch_params[3][3]),
]
else:
self._branch_3_layers = [
tf.keras.layers.MaxPool3D(**branch_params[3][3]),
tf.keras.layers.Conv3D(**branch_params[3][0]),
self._norm(**branch_params[3][1]),
tf.keras.layers.Activation('relu', **branch_params[3][2]),
]
if self._use_self_gating_on_branch:
self._branch_0_layers.append(
nn_blocks_3d.SelfGating(**branch_params[0][-1]))
self._branch_1_layers.append(
nn_blocks_3d.SelfGating(**branch_params[1][-1]))
self._branch_2_layers.append(
nn_blocks_3d.SelfGating(**branch_params[2][-1]))
self._branch_3_layers.append(
nn_blocks_3d.SelfGating(**branch_params[3][-1]))
if self._use_self_gating_on_cell:
self.cell_self_gating = nn_blocks_3d.SelfGating(**branch_params[4][0])
super(InceptionV1CellLayer, self).build(input_shape)
def call(self, inputs):
x = inputs
for layer in self._branch_0_layers:
x = layer(x)
branch_0 = x
x = inputs
for layer in self._branch_1_layers:
x = layer(x)
branch_1 = x
x = inputs
for layer in self._branch_2_layers:
x = layer(x)
branch_2 = x
x = inputs
for layer in self._branch_3_layers:
x = layer(x)
branch_3 = x
out_tensor = tf.concat([branch_0, branch_1, branch_2, branch_3],
axis=self._channel_axis)
if self._use_self_gating_on_cell:
out_tensor = self.cell_self_gating(out_tensor)
return out_tensor
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
from absl.testing import parameterized
import tensorflow as tf
from official.projects.s3d.modeling import inception_utils
class InceptionUtilsTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters((1.0, 3, {'Conv2d_1a_7x7', 'Conv2d_2c_3x3'}),
(0.5, 5, {'Conv2d_1a_7x7', 'Conv2d_2c_3x3'}),
(0.25, 7, {'Conv2d_1a_7x7', 'Conv2d_2c_3x3'}))
def test_s3d_stem_cells(self, depth_multiplier, first_temporal_kernel_size,
temporal_conv_endpoints):
batch_size = 1
num_frames = 64
height, width = 224, 224
inputs = tf.keras.layers.Input(
shape=(num_frames, height, width, 3), batch_size=batch_size)
outputs, output_endpoints = inception_utils.inception_v1_stem_cells(
inputs,
depth_multiplier,
'Mixed_5c',
temporal_conv_endpoints=temporal_conv_endpoints,
self_gating_endpoints={'Conv2d_2c_3x3'},
first_temporal_kernel_size=first_temporal_kernel_size)
self.assertListEqual(outputs.shape.as_list(),
[batch_size, 32, 28, 28, int(192 * depth_multiplier)])
expected_endpoints = {
'Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1', 'Conv2d_2c_3x3',
'MaxPool_3a_3x3'
}
self.assertSetEqual(expected_endpoints, set(output_endpoints.keys()))
@parameterized.parameters(
('3d', True, True, True),
('2d', False, False, True),
('1+2d', True, False, False),
('2+1d', False, True, False),
)
def test_inception_v1_cell_endpoint_match(self, conv_type,
swap_pool_and_1x1x1,
use_self_gating_on_branch,
use_self_gating_on_cell):
batch_size = 5
num_frames = 32
channels = 128
height, width = 28, 28
inputs = tf.keras.layers.Input(
shape=(num_frames, height, width, channels), batch_size=batch_size)
inception_v1_cell_layer = inception_utils.InceptionV1CellLayer(
[[64], [96, 128], [16, 32], [32]],
conv_type=conv_type,
swap_pool_and_1x1x1=swap_pool_and_1x1x1,
use_self_gating_on_branch=use_self_gating_on_branch,
use_self_gating_on_cell=use_self_gating_on_cell,
name='test')
outputs = inception_v1_cell_layer(inputs)
# self.assertTrue(net.op.name.startswith('test'))
self.assertListEqual(outputs.shape.as_list(),
[batch_size, 32, 28, 28, 256])
if __name__ == '__main__':
tf.test.main()
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Commonly used TensorFlow 2 network blocks."""
from typing import Any, Text, Sequence, Union
import tensorflow as tf
WEIGHT_INITIALIZER = {
'Xavier': tf.keras.initializers.GlorotUniform,
'Gaussian': lambda: tf.keras.initializers.RandomNormal(stddev=0.01),
}
initializers = tf.keras.initializers
regularizers = tf.keras.regularizers
def make_set_from_start_endpoint(start_endpoint: Text,
endpoints: Sequence[Text]):
"""Makes a subset of endpoints from the given starting position."""
if start_endpoint not in endpoints:
return set()
start_index = endpoints.index(start_endpoint)
return set(endpoints[start_index:])
def apply_depth_multiplier(d: Union[int, Sequence[Any]],
depth_multiplier: float):
"""Applies depth_multiplier recursively to ints."""
if isinstance(d, int):
return int(d * depth_multiplier)
else:
return [apply_depth_multiplier(x, depth_multiplier) for x in d]
class ParameterizedConvLayer(tf.keras.layers.Layer):
"""Convolution layer based on the input conv_type."""
def __init__(
self,
conv_type: Text,
kernel_size: int,
filters: int,
strides: Sequence[int],
rates: Sequence[int],
use_sync_bn: bool = False,
norm_momentum: float = 0.999,
norm_epsilon: float = 0.001,
temporal_conv_initializer: Union[
Text, initializers.Initializer] = 'glorot_uniform',
kernel_initializer: Union[Text,
initializers.Initializer] = 'truncated_normal',
kernel_regularizer: Union[Text, regularizers.Regularizer] = 'l2',
**kwargs):
super(ParameterizedConvLayer, self).__init__(**kwargs)
self._conv_type = conv_type
self._kernel_size = kernel_size
self._filters = filters
self._strides = strides
self._rates = rates
self._use_sync_bn = use_sync_bn
self._norm_momentum = norm_momentum
self._norm_epsilon = norm_epsilon
if use_sync_bn:
self._norm = tf.keras.layers.experimental.SyncBatchNormalization
else:
self._norm = tf.keras.layers.BatchNormalization
if tf.keras.backend.image_data_format() == 'channels_last':
self._channel_axis = -1
else:
self._channel_axis = 1
self._temporal_conv_initializer = temporal_conv_initializer
self._kernel_initializer = kernel_initializer
self._kernel_regularizer = kernel_regularizer
def _build_conv_layer_params(self, input_shape):
"""Builds params for conv layers."""
conv_layer_params = []
if self._conv_type == '3d':
conv_layer_params.append(
dict(
filters=self._filters,
kernel_size=[self._kernel_size] * 3,
strides=self._strides,
dilation_rate=self._rates,
kernel_initializer=self._kernel_initializer,
))
elif self._conv_type == '2d':
conv_layer_params.append(
dict(
filters=self._filters,
kernel_size=[1, self._kernel_size, self._kernel_size],
strides=[1, self._strides[1], self._strides[2]],
dilation_rate=[1, self._rates[1], self._rates[2]],
kernel_initializer=self._kernel_initializer,
))
elif self._conv_type == '1+2d':
channels_in = input_shape[self._channel_axis]
conv_layer_params.append(
dict(
filters=channels_in,
kernel_size=[self._kernel_size, 1, 1],
strides=[self._strides[0], 1, 1],
dilation_rate=[self._rates[0], 1, 1],
kernel_initializer=self._temporal_conv_initializer,
))
conv_layer_params.append(
dict(
filters=self._filters,
kernel_size=[1, self._kernel_size, self._kernel_size],
strides=[1, self._strides[1], self._strides[2]],
dilation_rate=[1, self._rates[1], self._rates[2]],
kernel_initializer=self._kernel_initializer,
))
elif self._conv_type == '2+1d':
conv_layer_params.append(
dict(
filters=self._filters,
kernel_size=[1, self._kernel_size, self._kernel_size],
strides=[1, self._strides[1], self._strides[2]],
dilation_rate=[1, self._rates[1], self._rates[2]],
kernel_initializer=self._kernel_initializer,
))
conv_layer_params.append(
dict(
filters=self._filters,
kernel_size=[self._kernel_size, 1, 1],
strides=[self._strides[0], 1, 1],
dilation_rate=[self._rates[0], 1, 1],
kernel_initializer=self._temporal_conv_initializer,
))
elif self._conv_type == '1+1+1d':
conv_layer_params.append(
dict(
filters=self._filters,
kernel_size=[1, 1, self._kernel_size],
strides=[1, 1, self._strides[2]],
dilation_rate=[1, 1, self._rates[2]],
kernel_initializer=self._kernel_initializer,
))
conv_layer_params.append(
dict(
filters=self._filters,
kernel_size=[1, self._kernel_size, 1],
strides=[1, self._strides[1], 1],
dilation_rate=[1, self._rates[1], 1],
kernel_initializer=self._kernel_initializer,
))
conv_layer_params.append(
dict(
filters=self._filters,
kernel_size=[self._kernel_size, 1, 1],
strides=[self._strides[0], 1, 1],
dilation_rate=[self._rates[0], 1, 1],
kernel_initializer=self._kernel_initializer,
))
else:
raise ValueError('Unsupported conv_type: {}'.format(self._conv_type))
return conv_layer_params
def _build_norm_layer_params(self, conv_param):
"""Builds params for the norm layer after one conv layer."""
return dict(
axis=self._channel_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon,
scale=False,
gamma_initializer='ones')
def _build_activation_layer_params(self, conv_param):
"""Builds params for the activation layer after one conv layer."""
return {}
def _append_conv_layer(self, param):
"""Appends conv, normalization and activation layers."""
self._parameterized_conv_layers.append(
tf.keras.layers.Conv3D(
padding='same',
use_bias=False,
kernel_regularizer=self._kernel_regularizer,
**param,
))
norm_layer_params = self._build_norm_layer_params(param)
self._parameterized_conv_layers.append(self._norm(**norm_layer_params))
relu_layer_params = self._build_activation_layer_params(param)
self._parameterized_conv_layers.append(
tf.keras.layers.Activation('relu', **relu_layer_params))
def build(self, input_shape):
self._parameterized_conv_layers = []
for conv_layer_param in self._build_conv_layer_params(input_shape):
self._append_conv_layer(conv_layer_param)
super(ParameterizedConvLayer, self).build(input_shape)
def call(self, inputs):
x = inputs
for layer in self._parameterized_conv_layers:
x = layer(x)
return x
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
from absl import logging
from absl.testing import parameterized
import tensorflow as tf
from official.projects.s3d.modeling import net_utils
class Tf2NetUtilsTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(
('3d', [2, 1, 1], [5, 16, 28, 28, 256]),
('3d', [2, 2, 2], [5, 16, 14, 14, 256]),
('3d', [1, 2, 1], [5, 32, 14, 28, 256]),
('2d', [2, 2, 2], [5, 32, 14, 14, 256]),
('2d', [1, 1, 2], [5, 32, 28, 14, 256]),
('1+2d', [2, 2, 2], [5, 16, 14, 14, 256]),
('1+2d', [2, 1, 1], [5, 16, 28, 28, 256]),
('1+2d', [1, 1, 1], [5, 32, 28, 28, 256]),
('1+2d', [1, 1, 2], [5, 32, 28, 14, 256]),
('2+1d', [2, 2, 2], [5, 16, 14, 14, 256]),
('2+1d', [1, 1, 1], [5, 32, 28, 28, 256]),
('2+1d', [2, 1, 2], [5, 16, 28, 14, 256]),
('1+1+1d', [2, 2, 2], [5, 16, 14, 14, 256]),
('1+1+1d', [1, 1, 1], [5, 32, 28, 28, 256]),
('1+1+1d', [2, 1, 2], [5, 16, 28, 14, 256]),
)
def test_parameterized_conv_layer_creation(self, conv_type, strides,
expected_shape):
batch_size = 5
temporal_size = 32
spatial_size = 28
channels = 128
kernel_size = 3
filters = 256
rates = [1, 1, 1]
name = 'ParameterizedConv'
inputs = tf.keras.Input(
shape=(temporal_size, spatial_size, spatial_size, channels),
batch_size=batch_size)
parameterized_conv_layer = net_utils.ParameterizedConvLayer(
conv_type, kernel_size, filters, strides, rates, name=name)
features = parameterized_conv_layer(inputs)
logging.info(features.shape.as_list())
logging.info([w.name for w in parameterized_conv_layer.weights])
self.assertAllEqual(features.shape.as_list(), expected_shape)
if __name__ == '__main__':
tf.test.main()
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Contains the Tensorflow 2 version definition of S3D model.
S3D model is described in the following paper:
https://arxiv.org/abs/1712.04851.
"""
from typing import Any, Dict, Mapping, Optional, Sequence, Text, Tuple, Union
import tensorflow as tf
from official.modeling import hyperparams
from official.projects.s3d.configs import s3d as cfg
from official.projects.s3d.modeling import inception_utils
from official.projects.s3d.modeling import net_utils
from official.vision.beta.modeling import factory_3d as model_factory
from official.vision.beta.modeling.backbones import factory as backbone_factory
initializers = tf.keras.initializers
regularizers = tf.keras.regularizers
class S3D(tf.keras.Model):
"""Class to build S3D family model."""
def __init__(self,
input_specs: tf.keras.layers.InputSpec,
final_endpoint: Text = 'Mixed_5c',
first_temporal_kernel_size: int = 3,
temporal_conv_start_at: Text = 'Conv2d_2c_3x3',
gating_start_at: Text = 'Conv2d_2c_3x3',
swap_pool_and_1x1x1: bool = True,
gating_style: Text = 'CELL',
use_sync_bn: bool = False,
norm_momentum: float = 0.999,
norm_epsilon: float = 0.001,
temporal_conv_initializer: Union[
Text,
initializers.Initializer] = initializers.TruncatedNormal(
mean=0.0, stddev=0.01),
temporal_conv_type: Text = '2+1d',
kernel_initializer: Union[
Text,
initializers.Initializer] = initializers.TruncatedNormal(
mean=0.0, stddev=0.01),
kernel_regularizer: Union[Text, regularizers.Regularizer] = 'l2',
depth_multiplier: float = 1.0,
**kwargs):
"""Constructor.
Args:
input_specs: `tf.keras.layers.InputSpec` specs of the input tensor.
final_endpoint: Specifies the endpoint to construct the network up to.
first_temporal_kernel_size: Temporal kernel size of the first convolution
layer.
temporal_conv_start_at: Specifies the endpoint where to start performimg
temporal convolution from.
gating_start_at: Specifies the endpoint where to start performimg self
gating from.
swap_pool_and_1x1x1: A boolean flag indicates that whether to swap the
order of convolution and max pooling in Branch_3 of inception v1 cell.
gating_style: A string that specifies self gating to be applied after each
branch and/or after each cell. It can be one of ['BRANCH', 'CELL',
'BRANCH_AND_CELL'].
use_sync_bn: If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
temporal_conv_initializer: Weight initializer for temporal convolutional
layers.
temporal_conv_type: The type of parameterized convolution. Currently, we
support '2d', '3d', '2+1d', '1+2d'.
kernel_initializer: Weight initializer for convolutional layers other than
temporal convolution.
kernel_regularizer: Weight regularizer for all convolutional layers.
depth_multiplier: A float to reduce/increase number of channels.
**kwargs: keyword arguments to be passed.
"""
self._input_specs = input_specs
self._final_endpoint = final_endpoint
self._first_temporal_kernel_size = first_temporal_kernel_size
self._temporal_conv_start_at = temporal_conv_start_at
self._gating_start_at = gating_start_at
self._swap_pool_and_1x1x1 = swap_pool_and_1x1x1
self._gating_style = gating_style
self._use_sync_bn = use_sync_bn
self._norm_momentum = norm_momentum
self._norm_epsilon = norm_epsilon
self._temporal_conv_initializer = temporal_conv_initializer
self._temporal_conv_type = temporal_conv_type
self._kernel_initializer = kernel_initializer
self._kernel_regularizer = kernel_regularizer
self._depth_multiplier = depth_multiplier
self._temporal_conv_endpoints = net_utils.make_set_from_start_endpoint(
temporal_conv_start_at, inception_utils.INCEPTION_V1_CONV_ENDPOINTS)
self._self_gating_endpoints = net_utils.make_set_from_start_endpoint(
gating_start_at, inception_utils.INCEPTION_V1_CONV_ENDPOINTS)
inputs = tf.keras.Input(shape=input_specs.shape[1:])
net, end_points = inception_utils.inception_v1_stem_cells(
inputs,
depth_multiplier,
final_endpoint,
temporal_conv_endpoints=self._temporal_conv_endpoints,
self_gating_endpoints=self._self_gating_endpoints,
temporal_conv_type=self._temporal_conv_type,
first_temporal_kernel_size=self._first_temporal_kernel_size,
use_sync_bn=self._use_sync_bn,
norm_momentum=self._norm_momentum,
norm_epsilon=self._norm_epsilon,
temporal_conv_initializer=self._temporal_conv_initializer,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
parameterized_conv_layer=self._get_parameterized_conv_layer_impl(),
layer_naming_fn=self._get_layer_naming_fn(),
)
for end_point, filters in inception_utils.INCEPTION_V1_ARCH_SKELETON:
net, end_points = self._s3d_cell(net, end_point, end_points, filters)
if end_point == final_endpoint:
break
if final_endpoint not in end_points:
raise ValueError(
'Unrecognized final endpoint %s (available endpoints: %s).' %
(final_endpoint, end_points.keys()))
super(S3D, self).__init__(inputs=inputs, outputs=end_points, **kwargs)
def _s3d_cell(
self,
net: tf.Tensor,
end_point: Text,
end_points: Dict[Text, tf.Tensor],
filters: Union[int, Sequence[Any]],
non_local_block: Optional[tf.keras.layers.Layer] = None,
attention_cell: Optional[tf.keras.layers.Layer] = None,
attention_cell_super_graph: Optional[tf.keras.layers.Layer] = None
) -> Tuple[tf.Tensor, Dict[Text, tf.Tensor]]:
if end_point.startswith('Mixed'):
conv_type = (
self._temporal_conv_type
if end_point in self._temporal_conv_endpoints else '2d')
use_self_gating_on_branch = (
end_point in self._self_gating_endpoints and
(self._gating_style == 'BRANCH' or
self._gating_style == 'BRANCH_AND_CELL'))
use_self_gating_on_cell = (
end_point in self._self_gating_endpoints and
(self._gating_style == 'CELL' or
self._gating_style == 'BRANCH_AND_CELL'))
net = self._get_inception_v1_cell_layer_impl()(
branch_filters=net_utils.apply_depth_multiplier(
filters, self._depth_multiplier),
conv_type=conv_type,
temporal_dilation_rate=1,
swap_pool_and_1x1x1=self._swap_pool_and_1x1x1,
use_self_gating_on_branch=use_self_gating_on_branch,
use_self_gating_on_cell=use_self_gating_on_cell,
use_sync_bn=self._use_sync_bn,
norm_momentum=self._norm_momentum,
norm_epsilon=self._norm_epsilon,
kernel_initializer=self._kernel_initializer,
temporal_conv_initializer=self._temporal_conv_initializer,
kernel_regularizer=self._kernel_regularizer,
name=self._get_layer_naming_fn()(end_point))(
net)
else:
net = tf.keras.layers.MaxPool3D(
pool_size=filters[0],
strides=filters[1],
padding='same',
name=self._get_layer_naming_fn()(end_point))(
net)
end_points[end_point] = net
if non_local_block:
# TODO(b/182299420): Implement non local block in TF2.
raise NotImplementedError('Non local block is not implemented yet.')
if attention_cell:
# TODO(b/182299420): Implement attention cell in TF2.
raise NotImplementedError('Attention cell is not implemented yet.')
if attention_cell_super_graph:
# TODO(b/182299420): Implement attention cell super graph in TF2.
raise NotImplementedError('Attention cell super graph is not implemented'
' yet.')
return net, end_points
def get_config(self):
config_dict = {
'input_specs': self._input_specs,
'final_endpoint': self._final_endpoint,
'first_temporal_kernel_size': self._first_temporal_kernel_size,
'temporal_conv_start_at': self._temporal_conv_start_at,
'gating_start_at': self._gating_start_at,
'swap_pool_and_1x1x1': self._swap_pool_and_1x1x1,
'gating_style': self._gating_style,
'use_sync_bn': self._use_sync_bn,
'norm_momentum': self._norm_momentum,
'norm_epsilon': self._norm_epsilon,
'temporal_conv_initializer': self._temporal_conv_initializer,
'temporal_conv_type': self._temporal_conv_type,
'kernel_initializer': self._kernel_initializer,
'kernel_regularizer': self._kernel_regularizer,
'depth_multiplier': self._depth_multiplier
}
return config_dict
@classmethod
def from_config(cls, config, custom_objects=None):
return cls(**config)
@property
def output_specs(self):
"""A dict of {level: TensorShape} pairs for the model output."""
return self._output_specs
def _get_inception_v1_cell_layer_impl(self):
return inception_utils.InceptionV1CellLayer
def _get_parameterized_conv_layer_impl(self):
return net_utils.ParameterizedConvLayer
def _get_layer_naming_fn(self):
return lambda end_point: None
class S3DModel(tf.keras.Model):
"""An S3D model builder."""
def __init__(self,
backbone: tf.keras.Model,
num_classes: int,
input_specs: Mapping[Text, tf.keras.layers.InputSpec],
final_endpoint: Text = 'Mixed_5c',
dropout_rate: float = 0.0,
**kwargs):
"""Constructor.
Args:
backbone: S3D backbone Keras Model.
num_classes: `int` number of possible classes for video classification.
input_specs: input_specs: `tf.keras.layers.InputSpec` specs of the input
tensor.
final_endpoint: Specifies the endpoint to construct the network up to.
dropout_rate: `float` between 0 and 1. Fraction of the input units to
drop. Note that dropout_rate = 1.0 - dropout_keep_prob.
**kwargs: keyword arguments to be passed.
"""
self._self_setattr_tracking = False
self._backbone = backbone
self._num_classes = num_classes
self._input_specs = input_specs
self._final_endpoint = final_endpoint
self._dropout_rate = dropout_rate
self._config_dict = {
'backbone': backbone,
'num_classes': num_classes,
'input_specs': input_specs,
'final_endpoint': final_endpoint,
'dropout_rate': dropout_rate,
}
inputs = {
k: tf.keras.Input(shape=v.shape[1:]) for k, v in input_specs.items()
}
streams = self._backbone(inputs['image'])
pool = tf.math.reduce_mean(streams[self._final_endpoint], axis=[1, 2, 3])
fc = tf.keras.layers.Dropout(dropout_rate)(pool)
logits = tf.keras.layers.Dense(**self._build_dense_layer_params())(fc)
super(S3DModel, self).__init__(inputs=inputs, outputs=logits, **kwargs)
@property
def checkpoint_items(self):
"""Returns a dictionary of items to be additionally checkpointed."""
return dict(backbone=self.backbone)
@property
def backbone(self):
return self._backbone
def get_config(self):
return self._config_dict
@classmethod
def from_config(cls, config, custom_objects=None):
return cls(**config)
def _build_dense_layer_params(self):
return dict(units=self._num_classes, kernel_regularizer='l2')
@backbone_factory.register_backbone_builder('s3d')
def build_s3d(
input_specs: tf.keras.layers.InputSpec,
backbone_config: hyperparams.Config,
norm_activation_config: hyperparams.Config,
l2_regularizer: tf.keras.regularizers.Regularizer = None
) -> tf.keras.Model: # pytype: disable=annotation-type-mismatch # typed-keras
"""Builds S3D backbone."""
backbone_type = backbone_config.type
backbone_cfg = backbone_config.get()
assert backbone_type == 's3d'
del norm_activation_config
backbone = S3D(
input_specs=input_specs,
final_endpoint=backbone_cfg.final_endpoint,
first_temporal_kernel_size=backbone_cfg.first_temporal_kernel_size,
temporal_conv_start_at=backbone_cfg.temporal_conv_start_at,
gating_start_at=backbone_cfg.gating_start_at,
swap_pool_and_1x1x1=backbone_cfg.swap_pool_and_1x1x1,
gating_style=backbone_cfg.gating_style,
use_sync_bn=backbone_cfg.use_sync_bn,
norm_momentum=backbone_cfg.norm_momentum,
norm_epsilon=backbone_cfg.norm_epsilon,
temporal_conv_type=backbone_cfg.temporal_conv_type,
kernel_regularizer=l2_regularizer,
depth_multiplier=backbone_cfg.depth_multiplier)
return backbone
@model_factory.register_model_builder('s3d')
def build_s3d_model(
input_specs: tf.keras.layers.InputSpec,
model_config: cfg.S3DModel,
num_classes: int,
l2_regularizer: tf.keras.regularizers.Regularizer = None
) -> tf.keras.Model: # pytype: disable=annotation-type-mismatch # typed-keras
"""Builds S3D model with classification layer."""
input_specs_dict = {'image': input_specs}
backbone = build_s3d(input_specs, model_config.backbone,
model_config.norm_activation, l2_regularizer)
model = S3DModel(
backbone,
num_classes=num_classes,
input_specs=input_specs_dict,
dropout_rate=model_config.dropout_rate)
return model
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for S3D model."""
from absl.testing import parameterized
import tensorflow as tf
from official.projects.s3d.modeling import s3d
class S3dTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(
(7, 224, 224, 3),
(7, 128, 128, 3),
(7, 256, 256, 3),
(7, 192, 192, 3),
(64, 224, 224, 3),
(32, 224, 224, 3),
(64, 224, 224, 11),
(32, 224, 224, 11),
)
def test_build(self, num_frames, height, width, first_temporal_kernel_size):
batch_size = 5
input_shape = [batch_size, num_frames, height, width, 3]
input_specs = tf.keras.layers.InputSpec(shape=input_shape)
network = s3d.S3D(
input_specs=input_specs
)
inputs = tf.keras.Input(shape=input_shape[1:], batch_size=input_shape[0])
endpoints = network(inputs)
temporal_1a = (num_frames - 1)//2 + 1
expected_shapes = {
'Conv2d_1a_7x7': [5, temporal_1a, height//2, width//2, 64],
'Conv2d_2b_1x1': [5, temporal_1a, height//4, width//4, 64],
'Conv2d_2c_3x3': [5, temporal_1a, height//4, height//4, 192],
'MaxPool_2a_3x3': [5, temporal_1a, height//4, height//4, 64],
'MaxPool_3a_3x3': [5, temporal_1a, height//8, width//8, 192],
'Mixed_3b': [5, temporal_1a, height//8, width//8, 256],
'Mixed_3c': [5, temporal_1a, height//8, width//8, 480],
'MaxPool_4a_3x3': [5, temporal_1a//2, height//16, width//16, 480],
'Mixed_4b': [5, temporal_1a//2, height//16, width//16, 512],
'Mixed_4c': [5, temporal_1a//2, height//16, width//16, 512],
'Mixed_4d': [5, temporal_1a//2, height//16, width//16, 512],
'Mixed_4e': [5, temporal_1a//2, height//16, width//16, 528],
'Mixed_4f': [5, temporal_1a//2, height//16, width//16, 832],
'MaxPool_5a_2x2': [5, temporal_1a//4, height//32, width//32, 832],
'Mixed_5b': [5, temporal_1a//4, height//32, width//32, 832],
'Mixed_5c': [5, temporal_1a//4, height//32, width//32, 1024],
}
output_shapes = dict()
for end_point, output_tensor in endpoints.items():
output_shapes[end_point] = output_tensor.shape.as_list()
self.assertDictEqual(output_shapes, expected_shapes)
def test_serialize_deserialize(self):
# Create a network object that sets all of its config options.
kwargs = dict(
input_specs=tf.keras.layers.InputSpec(shape=(5, 64, 224, 224, 3)),
final_endpoint='Mixed_5c',
first_temporal_kernel_size=3,
temporal_conv_start_at='Conv2d_2c_3x3',
gating_start_at='Conv2d_2c_3x3',
swap_pool_and_1x1x1=True,
gating_style='CELL',
use_sync_bn=False,
norm_momentum=0.999,
norm_epsilon=0.001,
temporal_conv_initializer=tf.keras.initializers.TruncatedNormal(
mean=0.0, stddev=0.01),
temporal_conv_type='2+1d',
kernel_initializer='truncated_normal',
kernel_regularizer='l2',
depth_multiplier=1.0
)
network = s3d.S3D(**kwargs)
expected_config = dict(kwargs)
self.assertEqual(network.get_config(), expected_config)
# Create another network object from the first object's config.
new_network = s3d.S3D.from_config(network.get_config())
# Validate that the config can be forced to JSON.
_ = new_network.to_json()
# If the serialization was successful, the new config should match the old.
self.assertAllEqual(network.get_config(), new_network.get_config())
if __name__ == '__main__':
tf.test.main()
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""TensorFlow Model Garden Vision training driver for S3D."""
from absl import app
# pylint: disable=unused-import
from official.common import registry_imports
# pylint: enable=unused-import
from official.common import flags as tfm_flags
# pylint: disable=unused-import
from official.projects.s3d.configs.google import s3d as s3d_config
from official.projects.s3d.modeling import s3d
from official.projects.s3d.tasks.google import automl_video_classification
# pylint: enable=unused-import
from official.vision.beta import train
if __name__ == '__main__':
tfm_flags.define_flags()
app.run(train.main)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment