Commit 9c8cbd0c authored by A. Unique TensorFlower's avatar A. Unique TensorFlower
Browse files

Internal change

PiperOrigin-RevId: 428641380
parent 8c3a1ef3
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""S3D model configurations."""
import dataclasses
from typing import Text
from official.modeling import hyperparams
from official.vision.beta.configs import backbones_3d
from official.vision.beta.configs import video_classification
@dataclasses.dataclass
class S3D(hyperparams.Config):
"""S3D backbone config.
Attributes:
final_endpoint: Specifies the endpoint to construct the network up to. It
can be one of ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',
'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c',
'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e',
'Mixed_4f', 'MaxPool_5a_2x2', 'Mixed_5b', 'Mixed_5c']
first_temporal_kernel_size: Specifies the temporal kernel size for the first
conv3d filter. A larger value slows down the model but provides little
accuracy improvement. Must be set to one of 1, 3, 5 or 7.
temporal_conv_start_at: Specifies the first conv block to use separable 3D
convs rather than 2D convs (implemented as [1, k, k] 3D conv). This is
used to construct the inverted pyramid models. 'Conv2d_2c_3x3' is the
first valid block to use separable 3D convs. If provided block name is
not present, all valid blocks will use separable 3D convs.
gating_start_at: Specifies the first conv block to use self gating.
'Conv2d_2c_3x3' is the first valid block to use self gating.
swap_pool_and_1x1x1: If True, in Branch_3 1x1x1 convolution is performed
first, then followed by max pooling. 1x1x1 convolution is used to reduce
the number of filters. Thus, max pooling is performed on less filters.
gating_style: Self gating can be applied after each branch and/or after each
inception cell. It can be one of ['BRANCH', 'CELL', 'BRANCH_AND_CELL'].
use_sync_bn: If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
temporal_conv_type: It can be one of ['3d', '2+1d', '1+2d', '1+1+1d'] where
'3d' is SPATIOTEMPORAL 3d convolution, '2+1d' is SPATIAL_TEMPORAL_SEPARATE
with 2D convolution on the spatial dimensions followed by 1D convolution
on the temporal dimension, '1+2d' is TEMPORAL_SPATIAL_SEPARATE with 1D
convolution on the temporal dimension followed by 2D convolution on the
spatial dimensions, and '1+1+1d' is FULLY_SEPARATE with 1D convolutions on
the horizontal, vertical, and temporal dimensions, respectively.
depth_multiplier: Float multiplier for the depth (number of channels) for
all convolution ops. The value must be greater than zero. Typical usage
will be to set this value in (0, 1) to reduce the number of parameters or
computation cost of the model.
"""
final_endpoint: Text = 'Mixed_5c'
first_temporal_kernel_size: int = 3
temporal_conv_start_at: Text = 'Conv2d_2c_3x3'
gating_start_at: Text = 'Conv2d_2c_3x3'
swap_pool_and_1x1x1: bool = True
gating_style: Text = 'CELL'
use_sync_bn: bool = False
norm_momentum: float = 0.999
norm_epsilon: float = 0.001
temporal_conv_type: Text = '2+1d'
depth_multiplier: float = 1.0
@dataclasses.dataclass
class Backbone3D(backbones_3d.Backbone3D):
"""Configuration for backbones.
Attributes:
type: 'str', type of backbone be used, on the of fields below.
s3d: s3d backbone config.
"""
type: str = 's3d'
s3d: S3D = S3D()
@dataclasses.dataclass
class S3DModel(video_classification.VideoClassificationModel):
"""The S3D model config.
Attributes:
type: 'str', type of backbone be used, on the of fields below.
backbone: backbone config.
"""
model_type: str = 's3d'
backbone: Backbone3D = Backbone3D()
This diff is collapsed.
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
from absl.testing import parameterized
import tensorflow as tf
from official.projects.s3d.modeling import inception_utils
class InceptionUtilsTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters((1.0, 3, {'Conv2d_1a_7x7', 'Conv2d_2c_3x3'}),
(0.5, 5, {'Conv2d_1a_7x7', 'Conv2d_2c_3x3'}),
(0.25, 7, {'Conv2d_1a_7x7', 'Conv2d_2c_3x3'}))
def test_s3d_stem_cells(self, depth_multiplier, first_temporal_kernel_size,
temporal_conv_endpoints):
batch_size = 1
num_frames = 64
height, width = 224, 224
inputs = tf.keras.layers.Input(
shape=(num_frames, height, width, 3), batch_size=batch_size)
outputs, output_endpoints = inception_utils.inception_v1_stem_cells(
inputs,
depth_multiplier,
'Mixed_5c',
temporal_conv_endpoints=temporal_conv_endpoints,
self_gating_endpoints={'Conv2d_2c_3x3'},
first_temporal_kernel_size=first_temporal_kernel_size)
self.assertListEqual(outputs.shape.as_list(),
[batch_size, 32, 28, 28, int(192 * depth_multiplier)])
expected_endpoints = {
'Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1', 'Conv2d_2c_3x3',
'MaxPool_3a_3x3'
}
self.assertSetEqual(expected_endpoints, set(output_endpoints.keys()))
@parameterized.parameters(
('3d', True, True, True),
('2d', False, False, True),
('1+2d', True, False, False),
('2+1d', False, True, False),
)
def test_inception_v1_cell_endpoint_match(self, conv_type,
swap_pool_and_1x1x1,
use_self_gating_on_branch,
use_self_gating_on_cell):
batch_size = 5
num_frames = 32
channels = 128
height, width = 28, 28
inputs = tf.keras.layers.Input(
shape=(num_frames, height, width, channels), batch_size=batch_size)
inception_v1_cell_layer = inception_utils.InceptionV1CellLayer(
[[64], [96, 128], [16, 32], [32]],
conv_type=conv_type,
swap_pool_and_1x1x1=swap_pool_and_1x1x1,
use_self_gating_on_branch=use_self_gating_on_branch,
use_self_gating_on_cell=use_self_gating_on_cell,
name='test')
outputs = inception_v1_cell_layer(inputs)
# self.assertTrue(net.op.name.startswith('test'))
self.assertListEqual(outputs.shape.as_list(),
[batch_size, 32, 28, 28, 256])
if __name__ == '__main__':
tf.test.main()
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Commonly used TensorFlow 2 network blocks."""
from typing import Any, Text, Sequence, Union
import tensorflow as tf
WEIGHT_INITIALIZER = {
'Xavier': tf.keras.initializers.GlorotUniform,
'Gaussian': lambda: tf.keras.initializers.RandomNormal(stddev=0.01),
}
initializers = tf.keras.initializers
regularizers = tf.keras.regularizers
def make_set_from_start_endpoint(start_endpoint: Text,
endpoints: Sequence[Text]):
"""Makes a subset of endpoints from the given starting position."""
if start_endpoint not in endpoints:
return set()
start_index = endpoints.index(start_endpoint)
return set(endpoints[start_index:])
def apply_depth_multiplier(d: Union[int, Sequence[Any]],
depth_multiplier: float):
"""Applies depth_multiplier recursively to ints."""
if isinstance(d, int):
return int(d * depth_multiplier)
else:
return [apply_depth_multiplier(x, depth_multiplier) for x in d]
class ParameterizedConvLayer(tf.keras.layers.Layer):
"""Convolution layer based on the input conv_type."""
def __init__(
self,
conv_type: Text,
kernel_size: int,
filters: int,
strides: Sequence[int],
rates: Sequence[int],
use_sync_bn: bool = False,
norm_momentum: float = 0.999,
norm_epsilon: float = 0.001,
temporal_conv_initializer: Union[
Text, initializers.Initializer] = 'glorot_uniform',
kernel_initializer: Union[Text,
initializers.Initializer] = 'truncated_normal',
kernel_regularizer: Union[Text, regularizers.Regularizer] = 'l2',
**kwargs):
super(ParameterizedConvLayer, self).__init__(**kwargs)
self._conv_type = conv_type
self._kernel_size = kernel_size
self._filters = filters
self._strides = strides
self._rates = rates
self._use_sync_bn = use_sync_bn
self._norm_momentum = norm_momentum
self._norm_epsilon = norm_epsilon
if use_sync_bn:
self._norm = tf.keras.layers.experimental.SyncBatchNormalization
else:
self._norm = tf.keras.layers.BatchNormalization
if tf.keras.backend.image_data_format() == 'channels_last':
self._channel_axis = -1
else:
self._channel_axis = 1
self._temporal_conv_initializer = temporal_conv_initializer
self._kernel_initializer = kernel_initializer
self._kernel_regularizer = kernel_regularizer
def _build_conv_layer_params(self, input_shape):
"""Builds params for conv layers."""
conv_layer_params = []
if self._conv_type == '3d':
conv_layer_params.append(
dict(
filters=self._filters,
kernel_size=[self._kernel_size] * 3,
strides=self._strides,
dilation_rate=self._rates,
kernel_initializer=self._kernel_initializer,
))
elif self._conv_type == '2d':
conv_layer_params.append(
dict(
filters=self._filters,
kernel_size=[1, self._kernel_size, self._kernel_size],
strides=[1, self._strides[1], self._strides[2]],
dilation_rate=[1, self._rates[1], self._rates[2]],
kernel_initializer=self._kernel_initializer,
))
elif self._conv_type == '1+2d':
channels_in = input_shape[self._channel_axis]
conv_layer_params.append(
dict(
filters=channels_in,
kernel_size=[self._kernel_size, 1, 1],
strides=[self._strides[0], 1, 1],
dilation_rate=[self._rates[0], 1, 1],
kernel_initializer=self._temporal_conv_initializer,
))
conv_layer_params.append(
dict(
filters=self._filters,
kernel_size=[1, self._kernel_size, self._kernel_size],
strides=[1, self._strides[1], self._strides[2]],
dilation_rate=[1, self._rates[1], self._rates[2]],
kernel_initializer=self._kernel_initializer,
))
elif self._conv_type == '2+1d':
conv_layer_params.append(
dict(
filters=self._filters,
kernel_size=[1, self._kernel_size, self._kernel_size],
strides=[1, self._strides[1], self._strides[2]],
dilation_rate=[1, self._rates[1], self._rates[2]],
kernel_initializer=self._kernel_initializer,
))
conv_layer_params.append(
dict(
filters=self._filters,
kernel_size=[self._kernel_size, 1, 1],
strides=[self._strides[0], 1, 1],
dilation_rate=[self._rates[0], 1, 1],
kernel_initializer=self._temporal_conv_initializer,
))
elif self._conv_type == '1+1+1d':
conv_layer_params.append(
dict(
filters=self._filters,
kernel_size=[1, 1, self._kernel_size],
strides=[1, 1, self._strides[2]],
dilation_rate=[1, 1, self._rates[2]],
kernel_initializer=self._kernel_initializer,
))
conv_layer_params.append(
dict(
filters=self._filters,
kernel_size=[1, self._kernel_size, 1],
strides=[1, self._strides[1], 1],
dilation_rate=[1, self._rates[1], 1],
kernel_initializer=self._kernel_initializer,
))
conv_layer_params.append(
dict(
filters=self._filters,
kernel_size=[self._kernel_size, 1, 1],
strides=[self._strides[0], 1, 1],
dilation_rate=[self._rates[0], 1, 1],
kernel_initializer=self._kernel_initializer,
))
else:
raise ValueError('Unsupported conv_type: {}'.format(self._conv_type))
return conv_layer_params
def _build_norm_layer_params(self, conv_param):
"""Builds params for the norm layer after one conv layer."""
return dict(
axis=self._channel_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon,
scale=False,
gamma_initializer='ones')
def _build_activation_layer_params(self, conv_param):
"""Builds params for the activation layer after one conv layer."""
return {}
def _append_conv_layer(self, param):
"""Appends conv, normalization and activation layers."""
self._parameterized_conv_layers.append(
tf.keras.layers.Conv3D(
padding='same',
use_bias=False,
kernel_regularizer=self._kernel_regularizer,
**param,
))
norm_layer_params = self._build_norm_layer_params(param)
self._parameterized_conv_layers.append(self._norm(**norm_layer_params))
relu_layer_params = self._build_activation_layer_params(param)
self._parameterized_conv_layers.append(
tf.keras.layers.Activation('relu', **relu_layer_params))
def build(self, input_shape):
self._parameterized_conv_layers = []
for conv_layer_param in self._build_conv_layer_params(input_shape):
self._append_conv_layer(conv_layer_param)
super(ParameterizedConvLayer, self).build(input_shape)
def call(self, inputs):
x = inputs
for layer in self._parameterized_conv_layers:
x = layer(x)
return x
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
from absl import logging
from absl.testing import parameterized
import tensorflow as tf
from official.projects.s3d.modeling import net_utils
class Tf2NetUtilsTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(
('3d', [2, 1, 1], [5, 16, 28, 28, 256]),
('3d', [2, 2, 2], [5, 16, 14, 14, 256]),
('3d', [1, 2, 1], [5, 32, 14, 28, 256]),
('2d', [2, 2, 2], [5, 32, 14, 14, 256]),
('2d', [1, 1, 2], [5, 32, 28, 14, 256]),
('1+2d', [2, 2, 2], [5, 16, 14, 14, 256]),
('1+2d', [2, 1, 1], [5, 16, 28, 28, 256]),
('1+2d', [1, 1, 1], [5, 32, 28, 28, 256]),
('1+2d', [1, 1, 2], [5, 32, 28, 14, 256]),
('2+1d', [2, 2, 2], [5, 16, 14, 14, 256]),
('2+1d', [1, 1, 1], [5, 32, 28, 28, 256]),
('2+1d', [2, 1, 2], [5, 16, 28, 14, 256]),
('1+1+1d', [2, 2, 2], [5, 16, 14, 14, 256]),
('1+1+1d', [1, 1, 1], [5, 32, 28, 28, 256]),
('1+1+1d', [2, 1, 2], [5, 16, 28, 14, 256]),
)
def test_parameterized_conv_layer_creation(self, conv_type, strides,
expected_shape):
batch_size = 5
temporal_size = 32
spatial_size = 28
channels = 128
kernel_size = 3
filters = 256
rates = [1, 1, 1]
name = 'ParameterizedConv'
inputs = tf.keras.Input(
shape=(temporal_size, spatial_size, spatial_size, channels),
batch_size=batch_size)
parameterized_conv_layer = net_utils.ParameterizedConvLayer(
conv_type, kernel_size, filters, strides, rates, name=name)
features = parameterized_conv_layer(inputs)
logging.info(features.shape.as_list())
logging.info([w.name for w in parameterized_conv_layer.weights])
self.assertAllEqual(features.shape.as_list(), expected_shape)
if __name__ == '__main__':
tf.test.main()
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Contains the Tensorflow 2 version definition of S3D model.
S3D model is described in the following paper:
https://arxiv.org/abs/1712.04851.
"""
from typing import Any, Dict, Mapping, Optional, Sequence, Text, Tuple, Union
import tensorflow as tf
from official.modeling import hyperparams
from official.projects.s3d.configs import s3d as cfg
from official.projects.s3d.modeling import inception_utils
from official.projects.s3d.modeling import net_utils
from official.vision.beta.modeling import factory_3d as model_factory
from official.vision.beta.modeling.backbones import factory as backbone_factory
initializers = tf.keras.initializers
regularizers = tf.keras.regularizers
class S3D(tf.keras.Model):
"""Class to build S3D family model."""
def __init__(self,
input_specs: tf.keras.layers.InputSpec,
final_endpoint: Text = 'Mixed_5c',
first_temporal_kernel_size: int = 3,
temporal_conv_start_at: Text = 'Conv2d_2c_3x3',
gating_start_at: Text = 'Conv2d_2c_3x3',
swap_pool_and_1x1x1: bool = True,
gating_style: Text = 'CELL',
use_sync_bn: bool = False,
norm_momentum: float = 0.999,
norm_epsilon: float = 0.001,
temporal_conv_initializer: Union[
Text,
initializers.Initializer] = initializers.TruncatedNormal(
mean=0.0, stddev=0.01),
temporal_conv_type: Text = '2+1d',
kernel_initializer: Union[
Text,
initializers.Initializer] = initializers.TruncatedNormal(
mean=0.0, stddev=0.01),
kernel_regularizer: Union[Text, regularizers.Regularizer] = 'l2',
depth_multiplier: float = 1.0,
**kwargs):
"""Constructor.
Args:
input_specs: `tf.keras.layers.InputSpec` specs of the input tensor.
final_endpoint: Specifies the endpoint to construct the network up to.
first_temporal_kernel_size: Temporal kernel size of the first convolution
layer.
temporal_conv_start_at: Specifies the endpoint where to start performimg
temporal convolution from.
gating_start_at: Specifies the endpoint where to start performimg self
gating from.
swap_pool_and_1x1x1: A boolean flag indicates that whether to swap the
order of convolution and max pooling in Branch_3 of inception v1 cell.
gating_style: A string that specifies self gating to be applied after each
branch and/or after each cell. It can be one of ['BRANCH', 'CELL',
'BRANCH_AND_CELL'].
use_sync_bn: If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
temporal_conv_initializer: Weight initializer for temporal convolutional
layers.
temporal_conv_type: The type of parameterized convolution. Currently, we
support '2d', '3d', '2+1d', '1+2d'.
kernel_initializer: Weight initializer for convolutional layers other than
temporal convolution.
kernel_regularizer: Weight regularizer for all convolutional layers.
depth_multiplier: A float to reduce/increase number of channels.
**kwargs: keyword arguments to be passed.
"""
self._input_specs = input_specs
self._final_endpoint = final_endpoint
self._first_temporal_kernel_size = first_temporal_kernel_size
self._temporal_conv_start_at = temporal_conv_start_at
self._gating_start_at = gating_start_at
self._swap_pool_and_1x1x1 = swap_pool_and_1x1x1
self._gating_style = gating_style
self._use_sync_bn = use_sync_bn
self._norm_momentum = norm_momentum
self._norm_epsilon = norm_epsilon
self._temporal_conv_initializer = temporal_conv_initializer
self._temporal_conv_type = temporal_conv_type
self._kernel_initializer = kernel_initializer
self._kernel_regularizer = kernel_regularizer
self._depth_multiplier = depth_multiplier
self._temporal_conv_endpoints = net_utils.make_set_from_start_endpoint(
temporal_conv_start_at, inception_utils.INCEPTION_V1_CONV_ENDPOINTS)
self._self_gating_endpoints = net_utils.make_set_from_start_endpoint(
gating_start_at, inception_utils.INCEPTION_V1_CONV_ENDPOINTS)
inputs = tf.keras.Input(shape=input_specs.shape[1:])
net, end_points = inception_utils.inception_v1_stem_cells(
inputs,
depth_multiplier,
final_endpoint,
temporal_conv_endpoints=self._temporal_conv_endpoints,
self_gating_endpoints=self._self_gating_endpoints,
temporal_conv_type=self._temporal_conv_type,
first_temporal_kernel_size=self._first_temporal_kernel_size,
use_sync_bn=self._use_sync_bn,
norm_momentum=self._norm_momentum,
norm_epsilon=self._norm_epsilon,
temporal_conv_initializer=self._temporal_conv_initializer,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
parameterized_conv_layer=self._get_parameterized_conv_layer_impl(),
layer_naming_fn=self._get_layer_naming_fn(),
)
for end_point, filters in inception_utils.INCEPTION_V1_ARCH_SKELETON:
net, end_points = self._s3d_cell(net, end_point, end_points, filters)
if end_point == final_endpoint:
break
if final_endpoint not in end_points:
raise ValueError(
'Unrecognized final endpoint %s (available endpoints: %s).' %
(final_endpoint, end_points.keys()))
super(S3D, self).__init__(inputs=inputs, outputs=end_points, **kwargs)
def _s3d_cell(
self,
net: tf.Tensor,
end_point: Text,
end_points: Dict[Text, tf.Tensor],
filters: Union[int, Sequence[Any]],
non_local_block: Optional[tf.keras.layers.Layer] = None,
attention_cell: Optional[tf.keras.layers.Layer] = None,
attention_cell_super_graph: Optional[tf.keras.layers.Layer] = None
) -> Tuple[tf.Tensor, Dict[Text, tf.Tensor]]:
if end_point.startswith('Mixed'):
conv_type = (
self._temporal_conv_type
if end_point in self._temporal_conv_endpoints else '2d')
use_self_gating_on_branch = (
end_point in self._self_gating_endpoints and
(self._gating_style == 'BRANCH' or
self._gating_style == 'BRANCH_AND_CELL'))
use_self_gating_on_cell = (
end_point in self._self_gating_endpoints and
(self._gating_style == 'CELL' or
self._gating_style == 'BRANCH_AND_CELL'))
net = self._get_inception_v1_cell_layer_impl()(
branch_filters=net_utils.apply_depth_multiplier(
filters, self._depth_multiplier),
conv_type=conv_type,
temporal_dilation_rate=1,
swap_pool_and_1x1x1=self._swap_pool_and_1x1x1,
use_self_gating_on_branch=use_self_gating_on_branch,
use_self_gating_on_cell=use_self_gating_on_cell,
use_sync_bn=self._use_sync_bn,
norm_momentum=self._norm_momentum,
norm_epsilon=self._norm_epsilon,
kernel_initializer=self._kernel_initializer,
temporal_conv_initializer=self._temporal_conv_initializer,
kernel_regularizer=self._kernel_regularizer,
name=self._get_layer_naming_fn()(end_point))(
net)
else:
net = tf.keras.layers.MaxPool3D(
pool_size=filters[0],
strides=filters[1],
padding='same',
name=self._get_layer_naming_fn()(end_point))(
net)
end_points[end_point] = net
if non_local_block:
# TODO(b/182299420): Implement non local block in TF2.
raise NotImplementedError('Non local block is not implemented yet.')
if attention_cell:
# TODO(b/182299420): Implement attention cell in TF2.
raise NotImplementedError('Attention cell is not implemented yet.')
if attention_cell_super_graph:
# TODO(b/182299420): Implement attention cell super graph in TF2.
raise NotImplementedError('Attention cell super graph is not implemented'
' yet.')
return net, end_points
def get_config(self):
config_dict = {
'input_specs': self._input_specs,
'final_endpoint': self._final_endpoint,
'first_temporal_kernel_size': self._first_temporal_kernel_size,
'temporal_conv_start_at': self._temporal_conv_start_at,
'gating_start_at': self._gating_start_at,
'swap_pool_and_1x1x1': self._swap_pool_and_1x1x1,
'gating_style': self._gating_style,
'use_sync_bn': self._use_sync_bn,
'norm_momentum': self._norm_momentum,
'norm_epsilon': self._norm_epsilon,
'temporal_conv_initializer': self._temporal_conv_initializer,
'temporal_conv_type': self._temporal_conv_type,
'kernel_initializer': self._kernel_initializer,
'kernel_regularizer': self._kernel_regularizer,
'depth_multiplier': self._depth_multiplier
}
return config_dict
@classmethod
def from_config(cls, config, custom_objects=None):
return cls(**config)
@property
def output_specs(self):
"""A dict of {level: TensorShape} pairs for the model output."""
return self._output_specs
def _get_inception_v1_cell_layer_impl(self):
return inception_utils.InceptionV1CellLayer
def _get_parameterized_conv_layer_impl(self):
return net_utils.ParameterizedConvLayer
def _get_layer_naming_fn(self):
return lambda end_point: None
class S3DModel(tf.keras.Model):
"""An S3D model builder."""
def __init__(self,
backbone: tf.keras.Model,
num_classes: int,
input_specs: Mapping[Text, tf.keras.layers.InputSpec],
final_endpoint: Text = 'Mixed_5c',
dropout_rate: float = 0.0,
**kwargs):
"""Constructor.
Args:
backbone: S3D backbone Keras Model.
num_classes: `int` number of possible classes for video classification.
input_specs: input_specs: `tf.keras.layers.InputSpec` specs of the input
tensor.
final_endpoint: Specifies the endpoint to construct the network up to.
dropout_rate: `float` between 0 and 1. Fraction of the input units to
drop. Note that dropout_rate = 1.0 - dropout_keep_prob.
**kwargs: keyword arguments to be passed.
"""
self._self_setattr_tracking = False
self._backbone = backbone
self._num_classes = num_classes
self._input_specs = input_specs
self._final_endpoint = final_endpoint
self._dropout_rate = dropout_rate
self._config_dict = {
'backbone': backbone,
'num_classes': num_classes,
'input_specs': input_specs,
'final_endpoint': final_endpoint,
'dropout_rate': dropout_rate,
}
inputs = {
k: tf.keras.Input(shape=v.shape[1:]) for k, v in input_specs.items()
}
streams = self._backbone(inputs['image'])
pool = tf.math.reduce_mean(streams[self._final_endpoint], axis=[1, 2, 3])
fc = tf.keras.layers.Dropout(dropout_rate)(pool)
logits = tf.keras.layers.Dense(**self._build_dense_layer_params())(fc)
super(S3DModel, self).__init__(inputs=inputs, outputs=logits, **kwargs)
@property
def checkpoint_items(self):
"""Returns a dictionary of items to be additionally checkpointed."""
return dict(backbone=self.backbone)
@property
def backbone(self):
return self._backbone
def get_config(self):
return self._config_dict
@classmethod
def from_config(cls, config, custom_objects=None):
return cls(**config)
def _build_dense_layer_params(self):
return dict(units=self._num_classes, kernel_regularizer='l2')
@backbone_factory.register_backbone_builder('s3d')
def build_s3d(
input_specs: tf.keras.layers.InputSpec,
backbone_config: hyperparams.Config,
norm_activation_config: hyperparams.Config,
l2_regularizer: tf.keras.regularizers.Regularizer = None
) -> tf.keras.Model: # pytype: disable=annotation-type-mismatch # typed-keras
"""Builds S3D backbone."""
backbone_type = backbone_config.type
backbone_cfg = backbone_config.get()
assert backbone_type == 's3d'
del norm_activation_config
backbone = S3D(
input_specs=input_specs,
final_endpoint=backbone_cfg.final_endpoint,
first_temporal_kernel_size=backbone_cfg.first_temporal_kernel_size,
temporal_conv_start_at=backbone_cfg.temporal_conv_start_at,
gating_start_at=backbone_cfg.gating_start_at,
swap_pool_and_1x1x1=backbone_cfg.swap_pool_and_1x1x1,
gating_style=backbone_cfg.gating_style,
use_sync_bn=backbone_cfg.use_sync_bn,
norm_momentum=backbone_cfg.norm_momentum,
norm_epsilon=backbone_cfg.norm_epsilon,
temporal_conv_type=backbone_cfg.temporal_conv_type,
kernel_regularizer=l2_regularizer,
depth_multiplier=backbone_cfg.depth_multiplier)
return backbone
@model_factory.register_model_builder('s3d')
def build_s3d_model(
input_specs: tf.keras.layers.InputSpec,
model_config: cfg.S3DModel,
num_classes: int,
l2_regularizer: tf.keras.regularizers.Regularizer = None
) -> tf.keras.Model: # pytype: disable=annotation-type-mismatch # typed-keras
"""Builds S3D model with classification layer."""
input_specs_dict = {'image': input_specs}
backbone = build_s3d(input_specs, model_config.backbone,
model_config.norm_activation, l2_regularizer)
model = S3DModel(
backbone,
num_classes=num_classes,
input_specs=input_specs_dict,
dropout_rate=model_config.dropout_rate)
return model
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for S3D model."""
from absl.testing import parameterized
import tensorflow as tf
from official.projects.s3d.modeling import s3d
class S3dTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(
(7, 224, 224, 3),
(7, 128, 128, 3),
(7, 256, 256, 3),
(7, 192, 192, 3),
(64, 224, 224, 3),
(32, 224, 224, 3),
(64, 224, 224, 11),
(32, 224, 224, 11),
)
def test_build(self, num_frames, height, width, first_temporal_kernel_size):
batch_size = 5
input_shape = [batch_size, num_frames, height, width, 3]
input_specs = tf.keras.layers.InputSpec(shape=input_shape)
network = s3d.S3D(
input_specs=input_specs
)
inputs = tf.keras.Input(shape=input_shape[1:], batch_size=input_shape[0])
endpoints = network(inputs)
temporal_1a = (num_frames - 1)//2 + 1
expected_shapes = {
'Conv2d_1a_7x7': [5, temporal_1a, height//2, width//2, 64],
'Conv2d_2b_1x1': [5, temporal_1a, height//4, width//4, 64],
'Conv2d_2c_3x3': [5, temporal_1a, height//4, height//4, 192],
'MaxPool_2a_3x3': [5, temporal_1a, height//4, height//4, 64],
'MaxPool_3a_3x3': [5, temporal_1a, height//8, width//8, 192],
'Mixed_3b': [5, temporal_1a, height//8, width//8, 256],
'Mixed_3c': [5, temporal_1a, height//8, width//8, 480],
'MaxPool_4a_3x3': [5, temporal_1a//2, height//16, width//16, 480],
'Mixed_4b': [5, temporal_1a//2, height//16, width//16, 512],
'Mixed_4c': [5, temporal_1a//2, height//16, width//16, 512],
'Mixed_4d': [5, temporal_1a//2, height//16, width//16, 512],
'Mixed_4e': [5, temporal_1a//2, height//16, width//16, 528],
'Mixed_4f': [5, temporal_1a//2, height//16, width//16, 832],
'MaxPool_5a_2x2': [5, temporal_1a//4, height//32, width//32, 832],
'Mixed_5b': [5, temporal_1a//4, height//32, width//32, 832],
'Mixed_5c': [5, temporal_1a//4, height//32, width//32, 1024],
}
output_shapes = dict()
for end_point, output_tensor in endpoints.items():
output_shapes[end_point] = output_tensor.shape.as_list()
self.assertDictEqual(output_shapes, expected_shapes)
def test_serialize_deserialize(self):
# Create a network object that sets all of its config options.
kwargs = dict(
input_specs=tf.keras.layers.InputSpec(shape=(5, 64, 224, 224, 3)),
final_endpoint='Mixed_5c',
first_temporal_kernel_size=3,
temporal_conv_start_at='Conv2d_2c_3x3',
gating_start_at='Conv2d_2c_3x3',
swap_pool_and_1x1x1=True,
gating_style='CELL',
use_sync_bn=False,
norm_momentum=0.999,
norm_epsilon=0.001,
temporal_conv_initializer=tf.keras.initializers.TruncatedNormal(
mean=0.0, stddev=0.01),
temporal_conv_type='2+1d',
kernel_initializer='truncated_normal',
kernel_regularizer='l2',
depth_multiplier=1.0
)
network = s3d.S3D(**kwargs)
expected_config = dict(kwargs)
self.assertEqual(network.get_config(), expected_config)
# Create another network object from the first object's config.
new_network = s3d.S3D.from_config(network.get_config())
# Validate that the config can be forced to JSON.
_ = new_network.to_json()
# If the serialization was successful, the new config should match the old.
self.assertAllEqual(network.get_config(), new_network.get_config())
if __name__ == '__main__':
tf.test.main()
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""TensorFlow Model Garden Vision training driver for S3D."""
from absl import app
# pylint: disable=unused-import
from official.common import registry_imports
# pylint: enable=unused-import
from official.common import flags as tfm_flags
# pylint: disable=unused-import
from official.projects.s3d.configs.google import s3d as s3d_config
from official.projects.s3d.modeling import s3d
from official.projects.s3d.tasks.google import automl_video_classification
# pylint: enable=unused-import
from official.vision.beta import train
if __name__ == '__main__':
tfm_flags.define_flags()
app.run(train.main)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment