Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
9c8cbd0c
Commit
9c8cbd0c
authored
Feb 14, 2022
by
A. Unique TensorFlower
Browse files
Internal change
PiperOrigin-RevId: 428641380
parent
8c3a1ef3
Changes
8
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
1492 additions
and
0 deletions
+1492
-0
official/projects/s3d/configs/s3d.py
official/projects/s3d/configs/s3d.py
+100
-0
official/projects/s3d/modeling/inception_utils.py
official/projects/s3d/modeling/inception_utils.py
+530
-0
official/projects/s3d/modeling/inception_utils_test.py
official/projects/s3d/modeling/inception_utils_test.py
+85
-0
official/projects/s3d/modeling/net_utils.py
official/projects/s3d/modeling/net_utils.py
+212
-0
official/projects/s3d/modeling/net_utils_test.py
official/projects/s3d/modeling/net_utils_test.py
+69
-0
official/projects/s3d/modeling/s3d.py
official/projects/s3d/modeling/s3d.py
+356
-0
official/projects/s3d/modeling/s3d_test.py
official/projects/s3d/modeling/s3d_test.py
+107
-0
official/projects/s3d/train.py
official/projects/s3d/train.py
+33
-0
No files found.
official/projects/s3d/configs/s3d.py
0 → 100644
View file @
9c8cbd0c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""S3D model configurations."""
import
dataclasses
from
typing
import
Text
from
official.modeling
import
hyperparams
from
official.vision.beta.configs
import
backbones_3d
from
official.vision.beta.configs
import
video_classification
@
dataclasses
.
dataclass
class
S3D
(
hyperparams
.
Config
):
"""S3D backbone config.
Attributes:
final_endpoint: Specifies the endpoint to construct the network up to. It
can be one of ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',
'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c',
'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e',
'Mixed_4f', 'MaxPool_5a_2x2', 'Mixed_5b', 'Mixed_5c']
first_temporal_kernel_size: Specifies the temporal kernel size for the first
conv3d filter. A larger value slows down the model but provides little
accuracy improvement. Must be set to one of 1, 3, 5 or 7.
temporal_conv_start_at: Specifies the first conv block to use separable 3D
convs rather than 2D convs (implemented as [1, k, k] 3D conv). This is
used to construct the inverted pyramid models. 'Conv2d_2c_3x3' is the
first valid block to use separable 3D convs. If provided block name is
not present, all valid blocks will use separable 3D convs.
gating_start_at: Specifies the first conv block to use self gating.
'Conv2d_2c_3x3' is the first valid block to use self gating.
swap_pool_and_1x1x1: If True, in Branch_3 1x1x1 convolution is performed
first, then followed by max pooling. 1x1x1 convolution is used to reduce
the number of filters. Thus, max pooling is performed on less filters.
gating_style: Self gating can be applied after each branch and/or after each
inception cell. It can be one of ['BRANCH', 'CELL', 'BRANCH_AND_CELL'].
use_sync_bn: If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
temporal_conv_type: It can be one of ['3d', '2+1d', '1+2d', '1+1+1d'] where
'3d' is SPATIOTEMPORAL 3d convolution, '2+1d' is SPATIAL_TEMPORAL_SEPARATE
with 2D convolution on the spatial dimensions followed by 1D convolution
on the temporal dimension, '1+2d' is TEMPORAL_SPATIAL_SEPARATE with 1D
convolution on the temporal dimension followed by 2D convolution on the
spatial dimensions, and '1+1+1d' is FULLY_SEPARATE with 1D convolutions on
the horizontal, vertical, and temporal dimensions, respectively.
depth_multiplier: Float multiplier for the depth (number of channels) for
all convolution ops. The value must be greater than zero. Typical usage
will be to set this value in (0, 1) to reduce the number of parameters or
computation cost of the model.
"""
final_endpoint
:
Text
=
'Mixed_5c'
first_temporal_kernel_size
:
int
=
3
temporal_conv_start_at
:
Text
=
'Conv2d_2c_3x3'
gating_start_at
:
Text
=
'Conv2d_2c_3x3'
swap_pool_and_1x1x1
:
bool
=
True
gating_style
:
Text
=
'CELL'
use_sync_bn
:
bool
=
False
norm_momentum
:
float
=
0.999
norm_epsilon
:
float
=
0.001
temporal_conv_type
:
Text
=
'2+1d'
depth_multiplier
:
float
=
1.0
@
dataclasses
.
dataclass
class
Backbone3D
(
backbones_3d
.
Backbone3D
):
"""Configuration for backbones.
Attributes:
type: 'str', type of backbone be used, on the of fields below.
s3d: s3d backbone config.
"""
type
:
str
=
's3d'
s3d
:
S3D
=
S3D
()
@
dataclasses
.
dataclass
class
S3DModel
(
video_classification
.
VideoClassificationModel
):
"""The S3D model config.
Attributes:
type: 'str', type of backbone be used, on the of fields below.
backbone: backbone config.
"""
model_type
:
str
=
's3d'
backbone
:
Backbone3D
=
Backbone3D
()
official/projects/s3d/modeling/inception_utils.py
0 → 100644
View file @
9c8cbd0c
This diff is collapsed.
Click to expand it.
official/projects/s3d/modeling/inception_utils_test.py
0 → 100644
View file @
9c8cbd0c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
official.projects.s3d.modeling
import
inception_utils
class
InceptionUtilsTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
((
1.0
,
3
,
{
'Conv2d_1a_7x7'
,
'Conv2d_2c_3x3'
}),
(
0.5
,
5
,
{
'Conv2d_1a_7x7'
,
'Conv2d_2c_3x3'
}),
(
0.25
,
7
,
{
'Conv2d_1a_7x7'
,
'Conv2d_2c_3x3'
}))
def
test_s3d_stem_cells
(
self
,
depth_multiplier
,
first_temporal_kernel_size
,
temporal_conv_endpoints
):
batch_size
=
1
num_frames
=
64
height
,
width
=
224
,
224
inputs
=
tf
.
keras
.
layers
.
Input
(
shape
=
(
num_frames
,
height
,
width
,
3
),
batch_size
=
batch_size
)
outputs
,
output_endpoints
=
inception_utils
.
inception_v1_stem_cells
(
inputs
,
depth_multiplier
,
'Mixed_5c'
,
temporal_conv_endpoints
=
temporal_conv_endpoints
,
self_gating_endpoints
=
{
'Conv2d_2c_3x3'
},
first_temporal_kernel_size
=
first_temporal_kernel_size
)
self
.
assertListEqual
(
outputs
.
shape
.
as_list
(),
[
batch_size
,
32
,
28
,
28
,
int
(
192
*
depth_multiplier
)])
expected_endpoints
=
{
'Conv2d_1a_7x7'
,
'MaxPool_2a_3x3'
,
'Conv2d_2b_1x1'
,
'Conv2d_2c_3x3'
,
'MaxPool_3a_3x3'
}
self
.
assertSetEqual
(
expected_endpoints
,
set
(
output_endpoints
.
keys
()))
@
parameterized
.
parameters
(
(
'3d'
,
True
,
True
,
True
),
(
'2d'
,
False
,
False
,
True
),
(
'1+2d'
,
True
,
False
,
False
),
(
'2+1d'
,
False
,
True
,
False
),
)
def
test_inception_v1_cell_endpoint_match
(
self
,
conv_type
,
swap_pool_and_1x1x1
,
use_self_gating_on_branch
,
use_self_gating_on_cell
):
batch_size
=
5
num_frames
=
32
channels
=
128
height
,
width
=
28
,
28
inputs
=
tf
.
keras
.
layers
.
Input
(
shape
=
(
num_frames
,
height
,
width
,
channels
),
batch_size
=
batch_size
)
inception_v1_cell_layer
=
inception_utils
.
InceptionV1CellLayer
(
[[
64
],
[
96
,
128
],
[
16
,
32
],
[
32
]],
conv_type
=
conv_type
,
swap_pool_and_1x1x1
=
swap_pool_and_1x1x1
,
use_self_gating_on_branch
=
use_self_gating_on_branch
,
use_self_gating_on_cell
=
use_self_gating_on_cell
,
name
=
'test'
)
outputs
=
inception_v1_cell_layer
(
inputs
)
# self.assertTrue(net.op.name.startswith('test'))
self
.
assertListEqual
(
outputs
.
shape
.
as_list
(),
[
batch_size
,
32
,
28
,
28
,
256
])
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/projects/s3d/modeling/net_utils.py
0 → 100644
View file @
9c8cbd0c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Commonly used TensorFlow 2 network blocks."""
from
typing
import
Any
,
Text
,
Sequence
,
Union
import
tensorflow
as
tf
WEIGHT_INITIALIZER
=
{
'Xavier'
:
tf
.
keras
.
initializers
.
GlorotUniform
,
'Gaussian'
:
lambda
:
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
),
}
initializers
=
tf
.
keras
.
initializers
regularizers
=
tf
.
keras
.
regularizers
def
make_set_from_start_endpoint
(
start_endpoint
:
Text
,
endpoints
:
Sequence
[
Text
]):
"""Makes a subset of endpoints from the given starting position."""
if
start_endpoint
not
in
endpoints
:
return
set
()
start_index
=
endpoints
.
index
(
start_endpoint
)
return
set
(
endpoints
[
start_index
:])
def
apply_depth_multiplier
(
d
:
Union
[
int
,
Sequence
[
Any
]],
depth_multiplier
:
float
):
"""Applies depth_multiplier recursively to ints."""
if
isinstance
(
d
,
int
):
return
int
(
d
*
depth_multiplier
)
else
:
return
[
apply_depth_multiplier
(
x
,
depth_multiplier
)
for
x
in
d
]
class
ParameterizedConvLayer
(
tf
.
keras
.
layers
.
Layer
):
"""Convolution layer based on the input conv_type."""
def
__init__
(
self
,
conv_type
:
Text
,
kernel_size
:
int
,
filters
:
int
,
strides
:
Sequence
[
int
],
rates
:
Sequence
[
int
],
use_sync_bn
:
bool
=
False
,
norm_momentum
:
float
=
0.999
,
norm_epsilon
:
float
=
0.001
,
temporal_conv_initializer
:
Union
[
Text
,
initializers
.
Initializer
]
=
'glorot_uniform'
,
kernel_initializer
:
Union
[
Text
,
initializers
.
Initializer
]
=
'truncated_normal'
,
kernel_regularizer
:
Union
[
Text
,
regularizers
.
Regularizer
]
=
'l2'
,
**
kwargs
):
super
(
ParameterizedConvLayer
,
self
).
__init__
(
**
kwargs
)
self
.
_conv_type
=
conv_type
self
.
_kernel_size
=
kernel_size
self
.
_filters
=
filters
self
.
_strides
=
strides
self
.
_rates
=
rates
self
.
_use_sync_bn
=
use_sync_bn
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
if
use_sync_bn
:
self
.
_norm
=
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
else
:
self
.
_norm
=
tf
.
keras
.
layers
.
BatchNormalization
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_channel_axis
=
-
1
else
:
self
.
_channel_axis
=
1
self
.
_temporal_conv_initializer
=
temporal_conv_initializer
self
.
_kernel_initializer
=
kernel_initializer
self
.
_kernel_regularizer
=
kernel_regularizer
def
_build_conv_layer_params
(
self
,
input_shape
):
"""Builds params for conv layers."""
conv_layer_params
=
[]
if
self
.
_conv_type
==
'3d'
:
conv_layer_params
.
append
(
dict
(
filters
=
self
.
_filters
,
kernel_size
=
[
self
.
_kernel_size
]
*
3
,
strides
=
self
.
_strides
,
dilation_rate
=
self
.
_rates
,
kernel_initializer
=
self
.
_kernel_initializer
,
))
elif
self
.
_conv_type
==
'2d'
:
conv_layer_params
.
append
(
dict
(
filters
=
self
.
_filters
,
kernel_size
=
[
1
,
self
.
_kernel_size
,
self
.
_kernel_size
],
strides
=
[
1
,
self
.
_strides
[
1
],
self
.
_strides
[
2
]],
dilation_rate
=
[
1
,
self
.
_rates
[
1
],
self
.
_rates
[
2
]],
kernel_initializer
=
self
.
_kernel_initializer
,
))
elif
self
.
_conv_type
==
'1+2d'
:
channels_in
=
input_shape
[
self
.
_channel_axis
]
conv_layer_params
.
append
(
dict
(
filters
=
channels_in
,
kernel_size
=
[
self
.
_kernel_size
,
1
,
1
],
strides
=
[
self
.
_strides
[
0
],
1
,
1
],
dilation_rate
=
[
self
.
_rates
[
0
],
1
,
1
],
kernel_initializer
=
self
.
_temporal_conv_initializer
,
))
conv_layer_params
.
append
(
dict
(
filters
=
self
.
_filters
,
kernel_size
=
[
1
,
self
.
_kernel_size
,
self
.
_kernel_size
],
strides
=
[
1
,
self
.
_strides
[
1
],
self
.
_strides
[
2
]],
dilation_rate
=
[
1
,
self
.
_rates
[
1
],
self
.
_rates
[
2
]],
kernel_initializer
=
self
.
_kernel_initializer
,
))
elif
self
.
_conv_type
==
'2+1d'
:
conv_layer_params
.
append
(
dict
(
filters
=
self
.
_filters
,
kernel_size
=
[
1
,
self
.
_kernel_size
,
self
.
_kernel_size
],
strides
=
[
1
,
self
.
_strides
[
1
],
self
.
_strides
[
2
]],
dilation_rate
=
[
1
,
self
.
_rates
[
1
],
self
.
_rates
[
2
]],
kernel_initializer
=
self
.
_kernel_initializer
,
))
conv_layer_params
.
append
(
dict
(
filters
=
self
.
_filters
,
kernel_size
=
[
self
.
_kernel_size
,
1
,
1
],
strides
=
[
self
.
_strides
[
0
],
1
,
1
],
dilation_rate
=
[
self
.
_rates
[
0
],
1
,
1
],
kernel_initializer
=
self
.
_temporal_conv_initializer
,
))
elif
self
.
_conv_type
==
'1+1+1d'
:
conv_layer_params
.
append
(
dict
(
filters
=
self
.
_filters
,
kernel_size
=
[
1
,
1
,
self
.
_kernel_size
],
strides
=
[
1
,
1
,
self
.
_strides
[
2
]],
dilation_rate
=
[
1
,
1
,
self
.
_rates
[
2
]],
kernel_initializer
=
self
.
_kernel_initializer
,
))
conv_layer_params
.
append
(
dict
(
filters
=
self
.
_filters
,
kernel_size
=
[
1
,
self
.
_kernel_size
,
1
],
strides
=
[
1
,
self
.
_strides
[
1
],
1
],
dilation_rate
=
[
1
,
self
.
_rates
[
1
],
1
],
kernel_initializer
=
self
.
_kernel_initializer
,
))
conv_layer_params
.
append
(
dict
(
filters
=
self
.
_filters
,
kernel_size
=
[
self
.
_kernel_size
,
1
,
1
],
strides
=
[
self
.
_strides
[
0
],
1
,
1
],
dilation_rate
=
[
self
.
_rates
[
0
],
1
,
1
],
kernel_initializer
=
self
.
_kernel_initializer
,
))
else
:
raise
ValueError
(
'Unsupported conv_type: {}'
.
format
(
self
.
_conv_type
))
return
conv_layer_params
def
_build_norm_layer_params
(
self
,
conv_param
):
"""Builds params for the norm layer after one conv layer."""
return
dict
(
axis
=
self
.
_channel_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
,
scale
=
False
,
gamma_initializer
=
'ones'
)
def
_build_activation_layer_params
(
self
,
conv_param
):
"""Builds params for the activation layer after one conv layer."""
return
{}
def
_append_conv_layer
(
self
,
param
):
"""Appends conv, normalization and activation layers."""
self
.
_parameterized_conv_layers
.
append
(
tf
.
keras
.
layers
.
Conv3D
(
padding
=
'same'
,
use_bias
=
False
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
**
param
,
))
norm_layer_params
=
self
.
_build_norm_layer_params
(
param
)
self
.
_parameterized_conv_layers
.
append
(
self
.
_norm
(
**
norm_layer_params
))
relu_layer_params
=
self
.
_build_activation_layer_params
(
param
)
self
.
_parameterized_conv_layers
.
append
(
tf
.
keras
.
layers
.
Activation
(
'relu'
,
**
relu_layer_params
))
def
build
(
self
,
input_shape
):
self
.
_parameterized_conv_layers
=
[]
for
conv_layer_param
in
self
.
_build_conv_layer_params
(
input_shape
):
self
.
_append_conv_layer
(
conv_layer_param
)
super
(
ParameterizedConvLayer
,
self
).
build
(
input_shape
)
def
call
(
self
,
inputs
):
x
=
inputs
for
layer
in
self
.
_parameterized_conv_layers
:
x
=
layer
(
x
)
return
x
official/projects/s3d/modeling/net_utils_test.py
0 → 100644
View file @
9c8cbd0c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
from
absl
import
logging
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
official.projects.s3d.modeling
import
net_utils
class
Tf2NetUtilsTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
'3d'
,
[
2
,
1
,
1
],
[
5
,
16
,
28
,
28
,
256
]),
(
'3d'
,
[
2
,
2
,
2
],
[
5
,
16
,
14
,
14
,
256
]),
(
'3d'
,
[
1
,
2
,
1
],
[
5
,
32
,
14
,
28
,
256
]),
(
'2d'
,
[
2
,
2
,
2
],
[
5
,
32
,
14
,
14
,
256
]),
(
'2d'
,
[
1
,
1
,
2
],
[
5
,
32
,
28
,
14
,
256
]),
(
'1+2d'
,
[
2
,
2
,
2
],
[
5
,
16
,
14
,
14
,
256
]),
(
'1+2d'
,
[
2
,
1
,
1
],
[
5
,
16
,
28
,
28
,
256
]),
(
'1+2d'
,
[
1
,
1
,
1
],
[
5
,
32
,
28
,
28
,
256
]),
(
'1+2d'
,
[
1
,
1
,
2
],
[
5
,
32
,
28
,
14
,
256
]),
(
'2+1d'
,
[
2
,
2
,
2
],
[
5
,
16
,
14
,
14
,
256
]),
(
'2+1d'
,
[
1
,
1
,
1
],
[
5
,
32
,
28
,
28
,
256
]),
(
'2+1d'
,
[
2
,
1
,
2
],
[
5
,
16
,
28
,
14
,
256
]),
(
'1+1+1d'
,
[
2
,
2
,
2
],
[
5
,
16
,
14
,
14
,
256
]),
(
'1+1+1d'
,
[
1
,
1
,
1
],
[
5
,
32
,
28
,
28
,
256
]),
(
'1+1+1d'
,
[
2
,
1
,
2
],
[
5
,
16
,
28
,
14
,
256
]),
)
def
test_parameterized_conv_layer_creation
(
self
,
conv_type
,
strides
,
expected_shape
):
batch_size
=
5
temporal_size
=
32
spatial_size
=
28
channels
=
128
kernel_size
=
3
filters
=
256
rates
=
[
1
,
1
,
1
]
name
=
'ParameterizedConv'
inputs
=
tf
.
keras
.
Input
(
shape
=
(
temporal_size
,
spatial_size
,
spatial_size
,
channels
),
batch_size
=
batch_size
)
parameterized_conv_layer
=
net_utils
.
ParameterizedConvLayer
(
conv_type
,
kernel_size
,
filters
,
strides
,
rates
,
name
=
name
)
features
=
parameterized_conv_layer
(
inputs
)
logging
.
info
(
features
.
shape
.
as_list
())
logging
.
info
([
w
.
name
for
w
in
parameterized_conv_layer
.
weights
])
self
.
assertAllEqual
(
features
.
shape
.
as_list
(),
expected_shape
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/projects/s3d/modeling/s3d.py
0 → 100644
View file @
9c8cbd0c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Contains the Tensorflow 2 version definition of S3D model.
S3D model is described in the following paper:
https://arxiv.org/abs/1712.04851.
"""
from
typing
import
Any
,
Dict
,
Mapping
,
Optional
,
Sequence
,
Text
,
Tuple
,
Union
import
tensorflow
as
tf
from
official.modeling
import
hyperparams
from
official.projects.s3d.configs
import
s3d
as
cfg
from
official.projects.s3d.modeling
import
inception_utils
from
official.projects.s3d.modeling
import
net_utils
from
official.vision.beta.modeling
import
factory_3d
as
model_factory
from
official.vision.beta.modeling.backbones
import
factory
as
backbone_factory
initializers
=
tf
.
keras
.
initializers
regularizers
=
tf
.
keras
.
regularizers
class
S3D
(
tf
.
keras
.
Model
):
"""Class to build S3D family model."""
def
__init__
(
self
,
input_specs
:
tf
.
keras
.
layers
.
InputSpec
,
final_endpoint
:
Text
=
'Mixed_5c'
,
first_temporal_kernel_size
:
int
=
3
,
temporal_conv_start_at
:
Text
=
'Conv2d_2c_3x3'
,
gating_start_at
:
Text
=
'Conv2d_2c_3x3'
,
swap_pool_and_1x1x1
:
bool
=
True
,
gating_style
:
Text
=
'CELL'
,
use_sync_bn
:
bool
=
False
,
norm_momentum
:
float
=
0.999
,
norm_epsilon
:
float
=
0.001
,
temporal_conv_initializer
:
Union
[
Text
,
initializers
.
Initializer
]
=
initializers
.
TruncatedNormal
(
mean
=
0.0
,
stddev
=
0.01
),
temporal_conv_type
:
Text
=
'2+1d'
,
kernel_initializer
:
Union
[
Text
,
initializers
.
Initializer
]
=
initializers
.
TruncatedNormal
(
mean
=
0.0
,
stddev
=
0.01
),
kernel_regularizer
:
Union
[
Text
,
regularizers
.
Regularizer
]
=
'l2'
,
depth_multiplier
:
float
=
1.0
,
**
kwargs
):
"""Constructor.
Args:
input_specs: `tf.keras.layers.InputSpec` specs of the input tensor.
final_endpoint: Specifies the endpoint to construct the network up to.
first_temporal_kernel_size: Temporal kernel size of the first convolution
layer.
temporal_conv_start_at: Specifies the endpoint where to start performimg
temporal convolution from.
gating_start_at: Specifies the endpoint where to start performimg self
gating from.
swap_pool_and_1x1x1: A boolean flag indicates that whether to swap the
order of convolution and max pooling in Branch_3 of inception v1 cell.
gating_style: A string that specifies self gating to be applied after each
branch and/or after each cell. It can be one of ['BRANCH', 'CELL',
'BRANCH_AND_CELL'].
use_sync_bn: If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
temporal_conv_initializer: Weight initializer for temporal convolutional
layers.
temporal_conv_type: The type of parameterized convolution. Currently, we
support '2d', '3d', '2+1d', '1+2d'.
kernel_initializer: Weight initializer for convolutional layers other than
temporal convolution.
kernel_regularizer: Weight regularizer for all convolutional layers.
depth_multiplier: A float to reduce/increase number of channels.
**kwargs: keyword arguments to be passed.
"""
self
.
_input_specs
=
input_specs
self
.
_final_endpoint
=
final_endpoint
self
.
_first_temporal_kernel_size
=
first_temporal_kernel_size
self
.
_temporal_conv_start_at
=
temporal_conv_start_at
self
.
_gating_start_at
=
gating_start_at
self
.
_swap_pool_and_1x1x1
=
swap_pool_and_1x1x1
self
.
_gating_style
=
gating_style
self
.
_use_sync_bn
=
use_sync_bn
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
self
.
_temporal_conv_initializer
=
temporal_conv_initializer
self
.
_temporal_conv_type
=
temporal_conv_type
self
.
_kernel_initializer
=
kernel_initializer
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_depth_multiplier
=
depth_multiplier
self
.
_temporal_conv_endpoints
=
net_utils
.
make_set_from_start_endpoint
(
temporal_conv_start_at
,
inception_utils
.
INCEPTION_V1_CONV_ENDPOINTS
)
self
.
_self_gating_endpoints
=
net_utils
.
make_set_from_start_endpoint
(
gating_start_at
,
inception_utils
.
INCEPTION_V1_CONV_ENDPOINTS
)
inputs
=
tf
.
keras
.
Input
(
shape
=
input_specs
.
shape
[
1
:])
net
,
end_points
=
inception_utils
.
inception_v1_stem_cells
(
inputs
,
depth_multiplier
,
final_endpoint
,
temporal_conv_endpoints
=
self
.
_temporal_conv_endpoints
,
self_gating_endpoints
=
self
.
_self_gating_endpoints
,
temporal_conv_type
=
self
.
_temporal_conv_type
,
first_temporal_kernel_size
=
self
.
_first_temporal_kernel_size
,
use_sync_bn
=
self
.
_use_sync_bn
,
norm_momentum
=
self
.
_norm_momentum
,
norm_epsilon
=
self
.
_norm_epsilon
,
temporal_conv_initializer
=
self
.
_temporal_conv_initializer
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
parameterized_conv_layer
=
self
.
_get_parameterized_conv_layer_impl
(),
layer_naming_fn
=
self
.
_get_layer_naming_fn
(),
)
for
end_point
,
filters
in
inception_utils
.
INCEPTION_V1_ARCH_SKELETON
:
net
,
end_points
=
self
.
_s3d_cell
(
net
,
end_point
,
end_points
,
filters
)
if
end_point
==
final_endpoint
:
break
if
final_endpoint
not
in
end_points
:
raise
ValueError
(
'Unrecognized final endpoint %s (available endpoints: %s).'
%
(
final_endpoint
,
end_points
.
keys
()))
super
(
S3D
,
self
).
__init__
(
inputs
=
inputs
,
outputs
=
end_points
,
**
kwargs
)
def
_s3d_cell
(
self
,
net
:
tf
.
Tensor
,
end_point
:
Text
,
end_points
:
Dict
[
Text
,
tf
.
Tensor
],
filters
:
Union
[
int
,
Sequence
[
Any
]],
non_local_block
:
Optional
[
tf
.
keras
.
layers
.
Layer
]
=
None
,
attention_cell
:
Optional
[
tf
.
keras
.
layers
.
Layer
]
=
None
,
attention_cell_super_graph
:
Optional
[
tf
.
keras
.
layers
.
Layer
]
=
None
)
->
Tuple
[
tf
.
Tensor
,
Dict
[
Text
,
tf
.
Tensor
]]:
if
end_point
.
startswith
(
'Mixed'
):
conv_type
=
(
self
.
_temporal_conv_type
if
end_point
in
self
.
_temporal_conv_endpoints
else
'2d'
)
use_self_gating_on_branch
=
(
end_point
in
self
.
_self_gating_endpoints
and
(
self
.
_gating_style
==
'BRANCH'
or
self
.
_gating_style
==
'BRANCH_AND_CELL'
))
use_self_gating_on_cell
=
(
end_point
in
self
.
_self_gating_endpoints
and
(
self
.
_gating_style
==
'CELL'
or
self
.
_gating_style
==
'BRANCH_AND_CELL'
))
net
=
self
.
_get_inception_v1_cell_layer_impl
()(
branch_filters
=
net_utils
.
apply_depth_multiplier
(
filters
,
self
.
_depth_multiplier
),
conv_type
=
conv_type
,
temporal_dilation_rate
=
1
,
swap_pool_and_1x1x1
=
self
.
_swap_pool_and_1x1x1
,
use_self_gating_on_branch
=
use_self_gating_on_branch
,
use_self_gating_on_cell
=
use_self_gating_on_cell
,
use_sync_bn
=
self
.
_use_sync_bn
,
norm_momentum
=
self
.
_norm_momentum
,
norm_epsilon
=
self
.
_norm_epsilon
,
kernel_initializer
=
self
.
_kernel_initializer
,
temporal_conv_initializer
=
self
.
_temporal_conv_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
name
=
self
.
_get_layer_naming_fn
()(
end_point
))(
net
)
else
:
net
=
tf
.
keras
.
layers
.
MaxPool3D
(
pool_size
=
filters
[
0
],
strides
=
filters
[
1
],
padding
=
'same'
,
name
=
self
.
_get_layer_naming_fn
()(
end_point
))(
net
)
end_points
[
end_point
]
=
net
if
non_local_block
:
# TODO(b/182299420): Implement non local block in TF2.
raise
NotImplementedError
(
'Non local block is not implemented yet.'
)
if
attention_cell
:
# TODO(b/182299420): Implement attention cell in TF2.
raise
NotImplementedError
(
'Attention cell is not implemented yet.'
)
if
attention_cell_super_graph
:
# TODO(b/182299420): Implement attention cell super graph in TF2.
raise
NotImplementedError
(
'Attention cell super graph is not implemented'
' yet.'
)
return
net
,
end_points
def
get_config
(
self
):
config_dict
=
{
'input_specs'
:
self
.
_input_specs
,
'final_endpoint'
:
self
.
_final_endpoint
,
'first_temporal_kernel_size'
:
self
.
_first_temporal_kernel_size
,
'temporal_conv_start_at'
:
self
.
_temporal_conv_start_at
,
'gating_start_at'
:
self
.
_gating_start_at
,
'swap_pool_and_1x1x1'
:
self
.
_swap_pool_and_1x1x1
,
'gating_style'
:
self
.
_gating_style
,
'use_sync_bn'
:
self
.
_use_sync_bn
,
'norm_momentum'
:
self
.
_norm_momentum
,
'norm_epsilon'
:
self
.
_norm_epsilon
,
'temporal_conv_initializer'
:
self
.
_temporal_conv_initializer
,
'temporal_conv_type'
:
self
.
_temporal_conv_type
,
'kernel_initializer'
:
self
.
_kernel_initializer
,
'kernel_regularizer'
:
self
.
_kernel_regularizer
,
'depth_multiplier'
:
self
.
_depth_multiplier
}
return
config_dict
@
classmethod
def
from_config
(
cls
,
config
,
custom_objects
=
None
):
return
cls
(
**
config
)
@
property
def
output_specs
(
self
):
"""A dict of {level: TensorShape} pairs for the model output."""
return
self
.
_output_specs
def
_get_inception_v1_cell_layer_impl
(
self
):
return
inception_utils
.
InceptionV1CellLayer
def
_get_parameterized_conv_layer_impl
(
self
):
return
net_utils
.
ParameterizedConvLayer
def
_get_layer_naming_fn
(
self
):
return
lambda
end_point
:
None
class
S3DModel
(
tf
.
keras
.
Model
):
"""An S3D model builder."""
def
__init__
(
self
,
backbone
:
tf
.
keras
.
Model
,
num_classes
:
int
,
input_specs
:
Mapping
[
Text
,
tf
.
keras
.
layers
.
InputSpec
],
final_endpoint
:
Text
=
'Mixed_5c'
,
dropout_rate
:
float
=
0.0
,
**
kwargs
):
"""Constructor.
Args:
backbone: S3D backbone Keras Model.
num_classes: `int` number of possible classes for video classification.
input_specs: input_specs: `tf.keras.layers.InputSpec` specs of the input
tensor.
final_endpoint: Specifies the endpoint to construct the network up to.
dropout_rate: `float` between 0 and 1. Fraction of the input units to
drop. Note that dropout_rate = 1.0 - dropout_keep_prob.
**kwargs: keyword arguments to be passed.
"""
self
.
_self_setattr_tracking
=
False
self
.
_backbone
=
backbone
self
.
_num_classes
=
num_classes
self
.
_input_specs
=
input_specs
self
.
_final_endpoint
=
final_endpoint
self
.
_dropout_rate
=
dropout_rate
self
.
_config_dict
=
{
'backbone'
:
backbone
,
'num_classes'
:
num_classes
,
'input_specs'
:
input_specs
,
'final_endpoint'
:
final_endpoint
,
'dropout_rate'
:
dropout_rate
,
}
inputs
=
{
k
:
tf
.
keras
.
Input
(
shape
=
v
.
shape
[
1
:])
for
k
,
v
in
input_specs
.
items
()
}
streams
=
self
.
_backbone
(
inputs
[
'image'
])
pool
=
tf
.
math
.
reduce_mean
(
streams
[
self
.
_final_endpoint
],
axis
=
[
1
,
2
,
3
])
fc
=
tf
.
keras
.
layers
.
Dropout
(
dropout_rate
)(
pool
)
logits
=
tf
.
keras
.
layers
.
Dense
(
**
self
.
_build_dense_layer_params
())(
fc
)
super
(
S3DModel
,
self
).
__init__
(
inputs
=
inputs
,
outputs
=
logits
,
**
kwargs
)
@
property
def
checkpoint_items
(
self
):
"""Returns a dictionary of items to be additionally checkpointed."""
return
dict
(
backbone
=
self
.
backbone
)
@
property
def
backbone
(
self
):
return
self
.
_backbone
def
get_config
(
self
):
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
,
custom_objects
=
None
):
return
cls
(
**
config
)
def
_build_dense_layer_params
(
self
):
return
dict
(
units
=
self
.
_num_classes
,
kernel_regularizer
=
'l2'
)
@
backbone_factory
.
register_backbone_builder
(
's3d'
)
def
build_s3d
(
input_specs
:
tf
.
keras
.
layers
.
InputSpec
,
backbone_config
:
hyperparams
.
Config
,
norm_activation_config
:
hyperparams
.
Config
,
l2_regularizer
:
tf
.
keras
.
regularizers
.
Regularizer
=
None
)
->
tf
.
keras
.
Model
:
# pytype: disable=annotation-type-mismatch # typed-keras
"""Builds S3D backbone."""
backbone_type
=
backbone_config
.
type
backbone_cfg
=
backbone_config
.
get
()
assert
backbone_type
==
's3d'
del
norm_activation_config
backbone
=
S3D
(
input_specs
=
input_specs
,
final_endpoint
=
backbone_cfg
.
final_endpoint
,
first_temporal_kernel_size
=
backbone_cfg
.
first_temporal_kernel_size
,
temporal_conv_start_at
=
backbone_cfg
.
temporal_conv_start_at
,
gating_start_at
=
backbone_cfg
.
gating_start_at
,
swap_pool_and_1x1x1
=
backbone_cfg
.
swap_pool_and_1x1x1
,
gating_style
=
backbone_cfg
.
gating_style
,
use_sync_bn
=
backbone_cfg
.
use_sync_bn
,
norm_momentum
=
backbone_cfg
.
norm_momentum
,
norm_epsilon
=
backbone_cfg
.
norm_epsilon
,
temporal_conv_type
=
backbone_cfg
.
temporal_conv_type
,
kernel_regularizer
=
l2_regularizer
,
depth_multiplier
=
backbone_cfg
.
depth_multiplier
)
return
backbone
@
model_factory
.
register_model_builder
(
's3d'
)
def
build_s3d_model
(
input_specs
:
tf
.
keras
.
layers
.
InputSpec
,
model_config
:
cfg
.
S3DModel
,
num_classes
:
int
,
l2_regularizer
:
tf
.
keras
.
regularizers
.
Regularizer
=
None
)
->
tf
.
keras
.
Model
:
# pytype: disable=annotation-type-mismatch # typed-keras
"""Builds S3D model with classification layer."""
input_specs_dict
=
{
'image'
:
input_specs
}
backbone
=
build_s3d
(
input_specs
,
model_config
.
backbone
,
model_config
.
norm_activation
,
l2_regularizer
)
model
=
S3DModel
(
backbone
,
num_classes
=
num_classes
,
input_specs
=
input_specs_dict
,
dropout_rate
=
model_config
.
dropout_rate
)
return
model
official/projects/s3d/modeling/s3d_test.py
0 → 100644
View file @
9c8cbd0c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for S3D model."""
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
official.projects.s3d.modeling
import
s3d
class
S3dTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
7
,
224
,
224
,
3
),
(
7
,
128
,
128
,
3
),
(
7
,
256
,
256
,
3
),
(
7
,
192
,
192
,
3
),
(
64
,
224
,
224
,
3
),
(
32
,
224
,
224
,
3
),
(
64
,
224
,
224
,
11
),
(
32
,
224
,
224
,
11
),
)
def
test_build
(
self
,
num_frames
,
height
,
width
,
first_temporal_kernel_size
):
batch_size
=
5
input_shape
=
[
batch_size
,
num_frames
,
height
,
width
,
3
]
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
input_shape
)
network
=
s3d
.
S3D
(
input_specs
=
input_specs
)
inputs
=
tf
.
keras
.
Input
(
shape
=
input_shape
[
1
:],
batch_size
=
input_shape
[
0
])
endpoints
=
network
(
inputs
)
temporal_1a
=
(
num_frames
-
1
)
//
2
+
1
expected_shapes
=
{
'Conv2d_1a_7x7'
:
[
5
,
temporal_1a
,
height
//
2
,
width
//
2
,
64
],
'Conv2d_2b_1x1'
:
[
5
,
temporal_1a
,
height
//
4
,
width
//
4
,
64
],
'Conv2d_2c_3x3'
:
[
5
,
temporal_1a
,
height
//
4
,
height
//
4
,
192
],
'MaxPool_2a_3x3'
:
[
5
,
temporal_1a
,
height
//
4
,
height
//
4
,
64
],
'MaxPool_3a_3x3'
:
[
5
,
temporal_1a
,
height
//
8
,
width
//
8
,
192
],
'Mixed_3b'
:
[
5
,
temporal_1a
,
height
//
8
,
width
//
8
,
256
],
'Mixed_3c'
:
[
5
,
temporal_1a
,
height
//
8
,
width
//
8
,
480
],
'MaxPool_4a_3x3'
:
[
5
,
temporal_1a
//
2
,
height
//
16
,
width
//
16
,
480
],
'Mixed_4b'
:
[
5
,
temporal_1a
//
2
,
height
//
16
,
width
//
16
,
512
],
'Mixed_4c'
:
[
5
,
temporal_1a
//
2
,
height
//
16
,
width
//
16
,
512
],
'Mixed_4d'
:
[
5
,
temporal_1a
//
2
,
height
//
16
,
width
//
16
,
512
],
'Mixed_4e'
:
[
5
,
temporal_1a
//
2
,
height
//
16
,
width
//
16
,
528
],
'Mixed_4f'
:
[
5
,
temporal_1a
//
2
,
height
//
16
,
width
//
16
,
832
],
'MaxPool_5a_2x2'
:
[
5
,
temporal_1a
//
4
,
height
//
32
,
width
//
32
,
832
],
'Mixed_5b'
:
[
5
,
temporal_1a
//
4
,
height
//
32
,
width
//
32
,
832
],
'Mixed_5c'
:
[
5
,
temporal_1a
//
4
,
height
//
32
,
width
//
32
,
1024
],
}
output_shapes
=
dict
()
for
end_point
,
output_tensor
in
endpoints
.
items
():
output_shapes
[
end_point
]
=
output_tensor
.
shape
.
as_list
()
self
.
assertDictEqual
(
output_shapes
,
expected_shapes
)
def
test_serialize_deserialize
(
self
):
# Create a network object that sets all of its config options.
kwargs
=
dict
(
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
(
5
,
64
,
224
,
224
,
3
)),
final_endpoint
=
'Mixed_5c'
,
first_temporal_kernel_size
=
3
,
temporal_conv_start_at
=
'Conv2d_2c_3x3'
,
gating_start_at
=
'Conv2d_2c_3x3'
,
swap_pool_and_1x1x1
=
True
,
gating_style
=
'CELL'
,
use_sync_bn
=
False
,
norm_momentum
=
0.999
,
norm_epsilon
=
0.001
,
temporal_conv_initializer
=
tf
.
keras
.
initializers
.
TruncatedNormal
(
mean
=
0.0
,
stddev
=
0.01
),
temporal_conv_type
=
'2+1d'
,
kernel_initializer
=
'truncated_normal'
,
kernel_regularizer
=
'l2'
,
depth_multiplier
=
1.0
)
network
=
s3d
.
S3D
(
**
kwargs
)
expected_config
=
dict
(
kwargs
)
self
.
assertEqual
(
network
.
get_config
(),
expected_config
)
# Create another network object from the first object's config.
new_network
=
s3d
.
S3D
.
from_config
(
network
.
get_config
())
# Validate that the config can be forced to JSON.
_
=
new_network
.
to_json
()
# If the serialization was successful, the new config should match the old.
self
.
assertAllEqual
(
network
.
get_config
(),
new_network
.
get_config
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/projects/s3d/train.py
0 → 100644
View file @
9c8cbd0c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""TensorFlow Model Garden Vision training driver for S3D."""
from
absl
import
app
# pylint: disable=unused-import
from
official.common
import
registry_imports
# pylint: enable=unused-import
from
official.common
import
flags
as
tfm_flags
# pylint: disable=unused-import
from
official.projects.s3d.configs.google
import
s3d
as
s3d_config
from
official.projects.s3d.modeling
import
s3d
from
official.projects.s3d.tasks.google
import
automl_video_classification
# pylint: enable=unused-import
from
official.vision.beta
import
train
if
__name__
==
'__main__'
:
tfm_flags
.
define_flags
()
app
.
run
(
train
.
main
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment