Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
5ffcc5b6
Unverified
Commit
5ffcc5b6
authored
Jul 21, 2021
by
Anirudh Vegesana
Committed by
GitHub
Jul 21, 2021
Browse files
Merge branch 'purdue-yolo' into detection_generator_pr
parents
0b81a843
76e0c014
Changes
190
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
976 additions
and
53 deletions
+976
-53
official/vision/beta/dataloaders/tfds_factory_test.py
official/vision/beta/dataloaders/tfds_factory_test.py
+78
-0
official/vision/beta/dataloaders/tfexample_utils.py
official/vision/beta/dataloaders/tfexample_utils.py
+21
-0
official/vision/beta/modeling/backbones/spinenet.py
official/vision/beta/modeling/backbones/spinenet.py
+4
-2
official/vision/beta/modeling/backbones/spinenet_mobile.py
official/vision/beta/modeling/backbones/spinenet_mobile.py
+17
-15
official/vision/beta/modeling/backbones/spinenet_mobile_test.py
...al/vision/beta/modeling/backbones/spinenet_mobile_test.py
+1
-0
official/vision/beta/modeling/backbones/spinenet_test.py
official/vision/beta/modeling/backbones/spinenet_test.py
+7
-8
official/vision/beta/modeling/layers/nn_layers.py
official/vision/beta/modeling/layers/nn_layers.py
+42
-14
official/vision/beta/modeling/layers/nn_layers_test.py
official/vision/beta/modeling/layers/nn_layers_test.py
+89
-8
official/vision/beta/modeling/maskrcnn_model.py
official/vision/beta/modeling/maskrcnn_model.py
+6
-3
official/vision/beta/ops/augment.py
official/vision/beta/ops/augment.py
+2
-1
official/vision/beta/ops/spatial_transform_ops.py
official/vision/beta/ops/spatial_transform_ops.py
+13
-2
official/vision/beta/projects/example/example_config.py
official/vision/beta/projects/example/example_config.py
+117
-0
official/vision/beta/projects/example/example_config_local.yaml
...al/vision/beta/projects/example/example_config_local.yaml
+32
-0
official/vision/beta/projects/example/example_config_tpu.yaml
...cial/vision/beta/projects/example/example_config_tpu.yaml
+35
-0
official/vision/beta/projects/example/example_input.py
official/vision/beta/projects/example/example_input.py
+137
-0
official/vision/beta/projects/example/example_model.py
official/vision/beta/projects/example/example_model.py
+102
-0
official/vision/beta/projects/example/example_task.py
official/vision/beta/projects/example/example_task.py
+209
-0
official/vision/beta/projects/example/registry_imports.py
official/vision/beta/projects/example/registry_imports.py
+27
-0
official/vision/beta/projects/example/train.py
official/vision/beta/projects/example/train.py
+30
-0
official/vision/beta/projects/movinet/configs/movinet.py
official/vision/beta/projects/movinet/configs/movinet.py
+7
-0
No files found.
official/vision/beta/dataloaders/tfds_factory_test.py
0 → 100644
View file @
5ffcc5b6
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for tfds factory functions."""
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
official.vision.beta.dataloaders
import
decoder
as
base_decoder
from
official.vision.beta.dataloaders
import
tfds_factory
class
TFDSFactoryTest
(
tf
.
test
.
TestCase
,
parameterized
.
TestCase
):
@
parameterized
.
parameters
(
(
'imagenet2012'
),
(
'cifar10'
),
(
'cifar100'
),
)
def
test_classification_decoder
(
self
,
tfds_name
):
decoder
=
tfds_factory
.
get_classification_decoder
(
tfds_name
)
self
.
assertIsInstance
(
decoder
,
base_decoder
.
Decoder
)
@
parameterized
.
parameters
(
(
'flowers'
),
(
'coco'
),
)
def
test_doesnt_exit_classification_decoder
(
self
,
tfds_name
):
with
self
.
assertRaises
(
ValueError
):
_
=
tfds_factory
.
get_classification_decoder
(
tfds_name
)
@
parameterized
.
parameters
(
(
'coco'
),
(
'coco/2014'
),
(
'coco/2017'
),
)
def
test_detection_decoder
(
self
,
tfds_name
):
decoder
=
tfds_factory
.
get_detection_decoder
(
tfds_name
)
self
.
assertIsInstance
(
decoder
,
base_decoder
.
Decoder
)
@
parameterized
.
parameters
(
(
'pascal'
),
(
'cityscapes'
),
)
def
test_doesnt_exit_detection_decoder
(
self
,
tfds_name
):
with
self
.
assertRaises
(
ValueError
):
_
=
tfds_factory
.
get_detection_decoder
(
tfds_name
)
@
parameterized
.
parameters
(
(
'cityscapes'
),
(
'cityscapes/semantic_segmentation'
),
(
'cityscapes/semantic_segmentation_extra'
),
)
def
test_segmentation_decoder
(
self
,
tfds_name
):
decoder
=
tfds_factory
.
get_segmentation_decoder
(
tfds_name
)
self
.
assertIsInstance
(
decoder
,
base_decoder
.
Decoder
)
@
parameterized
.
parameters
(
(
'coco'
),
(
'imagenet'
),
)
def
test_doesnt_exit_segmentation_decoder
(
self
,
tfds_name
):
with
self
.
assertRaises
(
ValueError
):
_
=
tfds_factory
.
get_segmentation_decoder
(
tfds_name
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/beta/dataloaders/tfexample_utils.py
View file @
5ffcc5b6
...
...
@@ -143,3 +143,24 @@ def create_classification_example(
int64_list
=
tf
.
train
.
Int64List
(
value
=
labels
))),
})).
SerializeToString
()
return
serialized_example
def
create_3d_image_test_example
(
image_height
:
int
,
image_width
:
int
,
image_volume
:
int
,
image_channel
:
int
)
->
tf
.
train
.
Example
:
"""Creates 3D image and label."""
images
=
np
.
random
.
rand
(
image_height
,
image_width
,
image_volume
,
image_channel
)
images
=
images
.
astype
(
np
.
float32
)
labels
=
np
.
random
.
randint
(
low
=
2
,
size
=
(
image_height
,
image_width
,
image_volume
,
image_channel
))
labels
=
labels
.
astype
(
np
.
float32
)
feature
=
{
IMAGE_KEY
:
(
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
[
images
.
tobytes
()]))),
CLASSIFICATION_LABEL_KEY
:
(
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
[
labels
.
tobytes
()])))
}
return
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
feature
))
official/vision/beta/modeling/backbones/spinenet.py
View file @
5ffcc5b6
...
...
@@ -393,8 +393,10 @@ class SpineNet(tf.keras.Model):
block_spec
.
level
))
if
(
block_spec
.
level
<
self
.
_min_level
or
block_spec
.
level
>
self
.
_max_level
):
raise
ValueError
(
'Output level is out of range [{}, {}]'
.
format
(
self
.
_min_level
,
self
.
_max_level
))
logging
.
warning
(
'SpineNet output level out of range [min_level, max_level] = '
'[%s, %s] will not be used for further processing.'
,
self
.
_min_level
,
self
.
_max_level
)
endpoints
[
str
(
block_spec
.
level
)]
=
x
return
endpoints
...
...
official/vision/beta/modeling/backbones/spinenet_mobile.py
View file @
5ffcc5b6
...
...
@@ -152,6 +152,7 @@ class SpineNetMobile(tf.keras.Model):
use_sync_bn
:
bool
=
False
,
norm_momentum
:
float
=
0.99
,
norm_epsilon
:
float
=
0.001
,
use_keras_upsampling_2d
:
bool
=
False
,
**
kwargs
):
"""Initializes a Mobile SpineNet model.
...
...
@@ -181,6 +182,7 @@ class SpineNetMobile(tf.keras.Model):
use_sync_bn: If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A small `float` added to variance to avoid dividing by zero.
use_keras_upsampling_2d: If True, use keras UpSampling2D layer.
**kwargs: Additional keyword arguments to be passed.
"""
self
.
_input_specs
=
input_specs
...
...
@@ -200,12 +202,7 @@ class SpineNetMobile(tf.keras.Model):
self
.
_use_sync_bn
=
use_sync_bn
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
if
activation
==
'relu'
:
self
.
_activation_fn
=
tf
.
nn
.
relu
elif
activation
==
'swish'
:
self
.
_activation_fn
=
tf
.
nn
.
swish
else
:
raise
ValueError
(
'Activation {} not implemented.'
.
format
(
activation
))
self
.
_use_keras_upsampling_2d
=
use_keras_upsampling_2d
self
.
_num_init_blocks
=
2
if
use_sync_bn
:
...
...
@@ -271,7 +268,7 @@ class SpineNetMobile(tf.keras.Model):
norm_momentum
=
self
.
_norm_momentum
,
norm_epsilon
=
self
.
_norm_epsilon
)(
inputs
)
return
tf
.
identity
(
x
,
name
=
name
)
return
tf
.
keras
.
layers
.
Activation
(
'linear'
,
name
=
name
)
(
x
)
def
_build_stem
(
self
,
inputs
):
"""Builds SpineNet stem."""
...
...
@@ -290,7 +287,7 @@ class SpineNetMobile(tf.keras.Model):
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)(
x
)
x
=
tf_utils
.
get_activation
(
self
.
_activation
_fn
)(
x
)
x
=
tf_utils
.
get_activation
(
self
.
_activation
,
use_keras_layer
=
True
)(
x
)
net
=
[]
stem_strides
=
[
1
,
2
]
...
...
@@ -365,14 +362,15 @@ class SpineNetMobile(tf.keras.Model):
parent_weights
=
[
tf
.
nn
.
relu
(
tf
.
cast
(
tf
.
Variable
(
1.0
,
name
=
'block{}_fusion{}'
.
format
(
i
,
j
)),
dtype
=
dtype
))
for
j
in
range
(
len
(
parents
))]
weights_sum
=
tf
.
add_n
(
parent_weights
)
weights_sum
=
layers
.
Add
()
(
parent_weights
)
parents
=
[
parents
[
i
]
*
parent_weights
[
i
]
/
(
weights_sum
+
0.0001
)
for
i
in
range
(
len
(
parents
))
]
# Fuse all parent nodes then build a new block.
x
=
tf_utils
.
get_activation
(
self
.
_activation_fn
)(
tf
.
add_n
(
parents
))
x
=
tf_utils
.
get_activation
(
self
.
_activation
,
use_keras_layer
=
True
)(
layers
.
Add
()(
parents
))
x
=
self
.
_block_group
(
inputs
=
x
,
in_filters
=
target_num_filters
,
...
...
@@ -421,7 +419,7 @@ class SpineNetMobile(tf.keras.Model):
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)(
x
)
x
=
tf_utils
.
get_activation
(
self
.
_activation
_fn
)(
x
)
x
=
tf_utils
.
get_activation
(
self
.
_activation
,
use_keras_layer
=
True
)(
x
)
endpoints
[
str
(
level
)]
=
x
return
endpoints
...
...
@@ -446,11 +444,13 @@ class SpineNetMobile(tf.keras.Model):
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)(
x
)
x
=
tf_utils
.
get_activation
(
self
.
_activation_fn
)(
x
)
x
=
tf_utils
.
get_activation
(
self
.
_activation
,
use_keras_layer
=
True
)(
x
)
input_width
/=
2
elif
input_width
<
target_width
:
scale
=
target_width
//
input_width
x
=
spatial_transform_ops
.
nearest_upsampling
(
x
,
scale
=
scale
)
x
=
spatial_transform_ops
.
nearest_upsampling
(
x
,
scale
=
scale
,
use_keras_layer
=
self
.
_use_keras_upsampling_2d
)
# Last 1x1 conv to match filter size.
x
=
layers
.
Conv2D
(
...
...
@@ -485,7 +485,8 @@ class SpineNetMobile(tf.keras.Model):
'activation'
:
self
.
_activation
,
'use_sync_bn'
:
self
.
_use_sync_bn
,
'norm_momentum'
:
self
.
_norm_momentum
,
'norm_epsilon'
:
self
.
_norm_epsilon
'norm_epsilon'
:
self
.
_norm_epsilon
,
'use_keras_upsampling_2d'
:
self
.
_use_keras_upsampling_2d
,
}
return
config_dict
...
...
@@ -531,4 +532,5 @@ def build_spinenet_mobile(
activation
=
norm_activation_config
.
activation
,
use_sync_bn
=
norm_activation_config
.
use_sync_bn
,
norm_momentum
=
norm_activation_config
.
norm_momentum
,
norm_epsilon
=
norm_activation_config
.
norm_epsilon
)
norm_epsilon
=
norm_activation_config
.
norm_epsilon
,
use_keras_upsampling_2d
=
backbone_cfg
.
use_keras_upsampling_2d
)
official/vision/beta/modeling/backbones/spinenet_mobile_test.py
View file @
5ffcc5b6
...
...
@@ -90,6 +90,7 @@ class SpineNetMobileTest(parameterized.TestCase, tf.test.TestCase):
kernel_initializer
=
'VarianceScaling'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
use_keras_upsampling_2d
=
False
,
)
network
=
spinenet_mobile
.
SpineNetMobile
(
**
kwargs
)
...
...
official/vision/beta/modeling/backbones/spinenet_test.py
View file @
5ffcc5b6
...
...
@@ -24,17 +24,16 @@ from official.vision.beta.modeling.backbones import spinenet
class
SpineNetTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
128
,
0.65
,
1
,
0.5
,
128
),
(
256
,
1.0
,
1
,
0.5
,
256
),
(
384
,
1.0
,
2
,
0.5
,
256
),
(
512
,
1.0
,
3
,
1.0
,
256
),
(
640
,
1.3
,
4
,
1.0
,
384
),
(
128
,
0.65
,
1
,
0.5
,
128
,
4
,
6
),
(
256
,
1.0
,
1
,
0.5
,
256
,
3
,
6
),
(
384
,
1.0
,
2
,
0.5
,
256
,
4
,
7
),
(
512
,
1.0
,
3
,
1.0
,
256
,
3
,
7
),
(
640
,
1.3
,
4
,
1.0
,
384
,
3
,
7
),
)
def
test_network_creation
(
self
,
input_size
,
filter_size_scale
,
block_repeats
,
resample_alpha
,
endpoints_num_filters
):
resample_alpha
,
endpoints_num_filters
,
min_level
,
max_level
):
"""Test creation of SpineNet models."""
min_level
=
3
max_level
=
7
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
...
...
official/vision/beta/modeling/layers/nn_layers.py
View file @
5ffcc5b6
...
...
@@ -26,6 +26,10 @@ from official.modeling import tf_utils
States
=
Dict
[
str
,
tf
.
Tensor
]
Activation
=
Union
[
str
,
Callable
]
# TODO(dankondratyuk): keep legacy padding until new checkpoints are trained.
# Otherwise, accuracy will be affected.
LEGACY_PADDING
=
True
def
make_divisible
(
value
:
float
,
divisor
:
int
,
...
...
@@ -68,6 +72,23 @@ def round_filters(filters: int,
return
int
(
new_filters
)
def
hard_swish
(
x
:
tf
.
Tensor
)
->
tf
.
Tensor
:
"""A Swish6/H-Swish activation function.
Reference: Section 5.2 of Howard et al. "Searching for MobileNet V3."
https://arxiv.org/pdf/1905.02244.pdf
Args:
x: the input tensor.
Returns:
The activation output.
"""
return
x
*
tf
.
nn
.
relu6
(
x
+
3.
)
*
(
1.
/
6.
)
tf
.
keras
.
utils
.
get_custom_objects
().
update
({
'hard_swish'
:
hard_swish
})
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
SqueezeExcitation
(
tf
.
keras
.
layers
.
Layer
):
"""Creates a squeeze and excitation layer."""
...
...
@@ -706,9 +727,10 @@ class CausalConvMixin:
self
.
_use_buffered_input
=
variable
def
_compute_buffered_causal_padding
(
self
,
inputs
:
Optional
[
tf
.
Tensor
]
=
None
,
inputs
:
tf
.
Tensor
,
use_buffered_input
:
bool
=
False
,
time_axis
:
int
=
1
)
->
List
[
List
[
int
]]:
time_axis
:
int
=
1
,
)
->
List
[
List
[
int
]]:
"""Calculates padding for 'causal' option for conv layers.
Args:
...
...
@@ -720,7 +742,7 @@ class CausalConvMixin:
Returns:
A list of paddings for `tf.pad`.
"""
del
inputs
input_shape
=
tf
.
shape
(
inputs
)[
1
:
-
1
]
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_first'
:
raise
ValueError
(
'"channels_first" mode is unsupported.'
)
...
...
@@ -730,7 +752,14 @@ class CausalConvMixin:
(
self
.
kernel_size
[
i
]
-
1
)
*
(
self
.
dilation_rate
[
i
]
-
1
))
for
i
in
range
(
self
.
rank
)
]
pad_total
=
[
kernel_size_effective
[
i
]
-
1
for
i
in
range
(
self
.
rank
)]
if
LEGACY_PADDING
:
# Apply legacy padding that does not take into account spatial strides
pad_total
=
[
kernel_size_effective
[
i
]
-
1
for
i
in
range
(
self
.
rank
)]
else
:
pad_total
=
[
kernel_size_effective
[
0
]
-
1
]
for
i
in
range
(
1
,
self
.
rank
):
overlap
=
(
input_shape
[
i
]
-
1
)
%
self
.
strides
[
i
]
+
1
pad_total
.
append
(
tf
.
maximum
(
kernel_size_effective
[
i
]
-
overlap
,
0
))
pad_beg
=
[
pad_total
[
i
]
//
2
for
i
in
range
(
self
.
rank
)]
pad_end
=
[
pad_total
[
i
]
-
pad_beg
[
i
]
for
i
in
range
(
self
.
rank
)]
padding
=
[[
pad_beg
[
i
],
pad_end
[
i
]]
for
i
in
range
(
self
.
rank
)]
...
...
@@ -763,7 +792,8 @@ class CausalConvMixin:
# across time should be the input shape minus any padding, assuming
# the stride across time is 1.
if
self
.
_use_buffered_input
and
spatial_output_shape
[
0
]
is
not
None
:
padding
=
self
.
_compute_buffered_causal_padding
(
use_buffered_input
=
False
)
padding
=
self
.
_compute_buffered_causal_padding
(
tf
.
zeros
([
1
]
+
spatial_output_shape
+
[
1
]),
use_buffered_input
=
False
)
spatial_output_shape
[
0
]
-=
sum
(
padding
[
1
])
return
spatial_output_shape
...
...
@@ -911,15 +941,13 @@ class Conv3D(tf.keras.layers.Conv3D, CausalConvMixin):
base_config
=
super
(
Conv3D
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
build
(
self
,
input_shape
):
"""Builds the layer with the given input shape."""
super
(
Conv3D
,
self
).
build
(
input_shape
)
# TODO(b/177662019): tf.nn.conv3d with depthwise kernels on CPU
# in eager mode may produce incorrect output or cause a segfault.
# To avoid this issue, compile the op to TF graph using tf.function.
self
.
_convolution_op
=
tf
.
function
(
self
.
_convolution_op
,
experimental_compile
=
True
)
def
call
(
self
,
inputs
):
"""Call the layer with the given inputs."""
# Note: tf.nn.conv3d with depthwise kernels on CPU is currently only
# supported when compiling with TF graph (XLA) using tf.function, so it
# is compiled by default here (b/186463870).
conv_fn
=
tf
.
function
(
super
(
Conv3D
,
self
).
call
,
jit_compile
=
True
)
return
conv_fn
(
inputs
)
def
_compute_causal_padding
(
self
,
inputs
):
"""Computes causal padding dimensions for the given inputs."""
...
...
official/vision/beta/modeling/layers/nn_layers_test.py
View file @
5ffcc5b6
...
...
@@ -24,6 +24,15 @@ from official.vision.beta.modeling.layers import nn_layers
class
NNLayersTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
def
setUp
(
self
):
super
().
setUp
()
nn_layers
.
LEGACY_PADDING
=
False
def
test_hard_swish
(
self
):
activation
=
tf
.
keras
.
layers
.
Activation
(
'hard_swish'
)
output
=
activation
(
tf
.
constant
([
-
3
,
-
1.5
,
0
,
3
]))
self
.
assertAllEqual
(
output
,
[
0.
,
-
0.375
,
0.
,
3.
])
def
test_scale
(
self
):
scale
=
nn_layers
.
Scale
(
initializer
=
tf
.
keras
.
initializers
.
constant
(
10.
))
output
=
scale
(
3.
)
...
...
@@ -274,14 +283,14 @@ class NNLayersTest(parameterized.TestCase, tf.test.TestCase):
predicted
=
conv3d
(
padded_inputs
)
expected
=
tf
.
constant
(
[[[[[
1
2.
,
1
2.
,
1
2.
],
[[[[[
2
7
.
,
2
7
.
,
2
7
.
],
[
18.
,
18.
,
18.
]],
[[
18.
,
18.
,
18.
],
[
2
7
.
,
2
7
.
,
2
7
.
]]],
[[[
2
4.
,
2
4.
,
2
4.
],
[
1
2.
,
1
2.
,
1
2.
]]],
[[[
5
4.
,
5
4.
,
5
4.
],
[
36.
,
36.
,
36.
]],
[[
36.
,
36.
,
36.
],
[
5
4.
,
5
4.
,
5
4.
]]]]])
[
2
4.
,
2
4.
,
2
4.
]]]]])
self
.
assertEqual
(
predicted
.
shape
,
expected
.
shape
)
self
.
assertAllClose
(
predicted
,
expected
)
...
...
@@ -311,14 +320,17 @@ class NNLayersTest(parameterized.TestCase, tf.test.TestCase):
predicted
=
conv3d
(
padded_inputs
)
expected
=
tf
.
constant
(
[[[[[
4
.0
,
4
.0
,
4
.0
],
[[[[[
9
.0
,
9
.0
,
9
.0
],
[
6.0
,
6.0
,
6.0
]],
[[
6.0
,
6.0
,
6.0
],
[
9
.0
,
9
.0
,
9
.0
]]],
[[[
8.0
,
8.0
,
8.0
],
[
4
.0
,
4
.0
,
4
.0
]]],
[[[
1
8.0
,
1
8.0
,
1
8.0
],
[
12.
,
12.
,
12.
]],
[[
12.
,
12.
,
12.
],
[
18.
,
18.
,
18.
]]]]])
[
8.
,
8.
,
8.
]]]]])
output_shape
=
conv3d
.
_spatial_output_shape
([
4
,
4
,
4
])
self
.
assertAllClose
(
output_shape
,
[
2
,
2
,
2
])
self
.
assertEqual
(
predicted
.
shape
,
expected
.
shape
)
self
.
assertAllClose
(
predicted
,
expected
)
...
...
@@ -329,5 +341,74 @@ class NNLayersTest(parameterized.TestCase, tf.test.TestCase):
self
.
assertEqual
(
predicted
.
shape
,
expected
.
shape
)
self
.
assertAllClose
(
predicted
,
expected
)
def
test_conv3d_causal_padding_2d
(
self
):
"""Test to ensure causal padding works like standard padding."""
conv3d
=
nn_layers
.
Conv3D
(
filters
=
1
,
kernel_size
=
(
1
,
3
,
3
),
strides
=
(
1
,
2
,
2
),
padding
=
'causal'
,
use_buffered_input
=
False
,
kernel_initializer
=
'ones'
,
use_bias
=
False
,
)
keras_conv3d
=
tf
.
keras
.
layers
.
Conv3D
(
filters
=
1
,
kernel_size
=
(
1
,
3
,
3
),
strides
=
(
1
,
2
,
2
),
padding
=
'same'
,
kernel_initializer
=
'ones'
,
use_bias
=
False
,
)
inputs
=
tf
.
ones
([
1
,
1
,
4
,
4
,
1
])
predicted
=
conv3d
(
inputs
)
expected
=
keras_conv3d
(
inputs
)
self
.
assertEqual
(
predicted
.
shape
,
expected
.
shape
)
self
.
assertAllClose
(
predicted
,
expected
)
self
.
assertAllClose
(
predicted
,
[[[[[
9.
],
[
6.
]],
[[
6.
],
[
4.
]]]]])
def
test_conv3d_causal_padding_1d
(
self
):
"""Test to ensure causal padding works like standard padding."""
conv3d
=
nn_layers
.
Conv3D
(
filters
=
1
,
kernel_size
=
(
3
,
1
,
1
),
strides
=
(
2
,
1
,
1
),
padding
=
'causal'
,
use_buffered_input
=
False
,
kernel_initializer
=
'ones'
,
use_bias
=
False
,
)
keras_conv1d
=
tf
.
keras
.
layers
.
Conv1D
(
filters
=
1
,
kernel_size
=
3
,
strides
=
2
,
padding
=
'causal'
,
kernel_initializer
=
'ones'
,
use_bias
=
False
,
)
inputs
=
tf
.
ones
([
1
,
4
,
1
,
1
,
1
])
predicted
=
conv3d
(
inputs
)
expected
=
keras_conv1d
(
tf
.
squeeze
(
inputs
,
axis
=
[
2
,
3
]))
expected
=
tf
.
reshape
(
expected
,
[
1
,
2
,
1
,
1
,
1
])
self
.
assertEqual
(
predicted
.
shape
,
expected
.
shape
)
self
.
assertAllClose
(
predicted
,
expected
)
self
.
assertAllClose
(
predicted
,
[[[[[
1.
]]],
[[[
3.
]]]]])
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/beta/modeling/maskrcnn_model.py
View file @
5ffcc5b6
...
...
@@ -16,7 +16,6 @@
from
typing
import
Any
,
List
,
Mapping
,
Optional
,
Union
# Import libraries
import
tensorflow
as
tf
from
official.vision.beta.ops
import
anchor
...
...
@@ -147,14 +146,18 @@ class MaskRCNNModel(tf.keras.Model):
model_outputs
=
{}
# Feature extraction.
features
=
self
.
backbone
(
images
)
backbone_
features
=
self
.
backbone
(
images
)
if
self
.
decoder
:
features
=
self
.
decoder
(
features
)
features
=
self
.
decoder
(
backbone_features
)
else
:
features
=
backbone_features
# Region proposal network.
rpn_scores
,
rpn_boxes
=
self
.
rpn_head
(
features
)
model_outputs
.
update
({
'backbone_features'
:
backbone_features
,
'decoder_features'
:
features
,
'rpn_boxes'
:
rpn_boxes
,
'rpn_scores'
:
rpn_scores
})
...
...
official/vision/beta/ops/augment.py
View file @
5ffcc5b6
...
...
@@ -1205,7 +1205,8 @@ class RandAugment(ImageAugment):
self
.
magnitude
=
float
(
magnitude
)
self
.
cutout_const
=
float
(
cutout_const
)
self
.
translate_const
=
float
(
translate_const
)
self
.
prob_to_apply
=
prob_to_apply
self
.
prob_to_apply
=
(
float
(
prob_to_apply
)
if
prob_to_apply
is
not
None
else
None
)
self
.
available_ops
=
[
'AutoContrast'
,
'Equalize'
,
'Invert'
,
'Rotate'
,
'Posterize'
,
'Solarize'
,
'Color'
,
'Contrast'
,
'Brightness'
,
'Sharpness'
,
'ShearX'
,
'ShearY'
,
...
...
official/vision/beta/ops/spatial_transform_ops.py
View file @
5ffcc5b6
...
...
@@ -198,7 +198,8 @@ def multilevel_crop_and_resize(features,
# Assigns boxes to the right level.
box_width
=
boxes
[:,
:,
3
]
-
boxes
[:,
:,
1
]
box_height
=
boxes
[:,
:,
2
]
-
boxes
[:,
:,
0
]
areas_sqrt
=
tf
.
cast
(
tf
.
sqrt
(
box_height
*
box_width
),
tf
.
float32
)
areas_sqrt
=
tf
.
sqrt
(
tf
.
cast
(
box_height
,
tf
.
float32
)
*
tf
.
cast
(
box_width
,
tf
.
float32
))
levels
=
tf
.
cast
(
tf
.
math
.
floordiv
(
tf
.
math
.
log
(
tf
.
divide
(
areas_sqrt
,
224.0
)),
...
...
@@ -456,6 +457,12 @@ def crop_mask_in_target_box(masks,
[batch_size, num_boxes, output_size, output_size].
"""
with
tf
.
name_scope
(
'crop_mask_in_target_box'
):
# Cast to float32, as the y_transform and other transform variables may
# overflow in float16
masks
=
tf
.
cast
(
masks
,
tf
.
float32
)
boxes
=
tf
.
cast
(
boxes
,
tf
.
float32
)
target_boxes
=
tf
.
cast
(
target_boxes
,
tf
.
float32
)
batch_size
,
num_masks
,
height
,
width
=
masks
.
get_shape
().
as_list
()
if
batch_size
is
None
:
batch_size
=
tf
.
shape
(
masks
)[
0
]
...
...
@@ -504,18 +511,22 @@ def crop_mask_in_target_box(masks,
return
cropped_masks
def
nearest_upsampling
(
data
,
scale
):
def
nearest_upsampling
(
data
,
scale
,
use_keras_layer
=
False
):
"""Nearest neighbor upsampling implementation.
Args:
data: A tensor with a shape of [batch, height_in, width_in, channels].
scale: An integer multiple to scale resolution of input data.
use_keras_layer: If True, use keras Upsampling2D layer.
Returns:
data_up: A tensor with a shape of
[batch, height_in*scale, width_in*scale, channels]. Same dtype as input
data.
"""
if
use_keras_layer
:
return
tf
.
keras
.
layers
.
UpSampling2D
(
size
=
(
scale
,
scale
),
interpolation
=
'nearest'
)(
data
)
with
tf
.
name_scope
(
'nearest_upsampling'
):
bs
,
_
,
_
,
c
=
data
.
get_shape
().
as_list
()
shape
=
tf
.
shape
(
input
=
data
)
...
...
official/vision/beta/projects/example/example_config.py
0 → 100644
View file @
5ffcc5b6
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Example experiment configuration definition."""
from
typing
import
List
import
dataclasses
from
official.core
import
config_definitions
as
cfg
from
official.core
import
exp_factory
from
official.modeling
import
hyperparams
from
official.modeling
import
optimization
@
dataclasses
.
dataclass
class
ExampleDataConfig
(
cfg
.
DataConfig
):
"""Input config for training. Add more fields as needed."""
input_path
:
str
=
''
global_batch_size
:
int
=
0
is_training
:
bool
=
True
dtype
:
str
=
'float32'
shuffle_buffer_size
:
int
=
10000
cycle_length
:
int
=
10
file_type
:
str
=
'tfrecord'
@
dataclasses
.
dataclass
class
ExampleModel
(
hyperparams
.
Config
):
"""The model config. Used by build_example_model function."""
num_classes
:
int
=
0
input_size
:
List
[
int
]
=
dataclasses
.
field
(
default_factory
=
list
)
@
dataclasses
.
dataclass
class
Losses
(
hyperparams
.
Config
):
l2_weight_decay
:
float
=
0.0
@
dataclasses
.
dataclass
class
Evaluation
(
hyperparams
.
Config
):
top_k
:
int
=
5
@
dataclasses
.
dataclass
class
ExampleTask
(
cfg
.
TaskConfig
):
"""The task config."""
model
:
ExampleModel
=
ExampleModel
()
train_data
:
ExampleDataConfig
=
ExampleDataConfig
(
is_training
=
True
)
validation_data
:
ExampleDataConfig
=
ExampleDataConfig
(
is_training
=
False
)
losses
:
Losses
=
Losses
()
evaluation
:
Evaluation
=
Evaluation
()
@
exp_factory
.
register_config_factory
(
'tf_vision_example_experiment'
)
def
tf_vision_example_experiment
()
->
cfg
.
ExperimentConfig
:
"""Definition of a full example experiment."""
train_batch_size
=
256
eval_batch_size
=
256
steps_per_epoch
=
10
config
=
cfg
.
ExperimentConfig
(
task
=
ExampleTask
(
model
=
ExampleModel
(
num_classes
=
10
,
input_size
=
[
128
,
128
,
3
]),
losses
=
Losses
(
l2_weight_decay
=
1e-4
),
train_data
=
ExampleDataConfig
(
input_path
=
'/path/to/train*'
,
is_training
=
True
,
global_batch_size
=
train_batch_size
),
validation_data
=
ExampleDataConfig
(
input_path
=
'/path/to/valid*'
,
is_training
=
False
,
global_batch_size
=
eval_batch_size
)),
trainer
=
cfg
.
TrainerConfig
(
steps_per_loop
=
steps_per_epoch
,
summary_interval
=
steps_per_epoch
,
checkpoint_interval
=
steps_per_epoch
,
train_steps
=
90
*
steps_per_epoch
,
validation_steps
=
steps_per_epoch
,
validation_interval
=
steps_per_epoch
,
optimizer_config
=
optimization
.
OptimizationConfig
({
'optimizer'
:
{
'type'
:
'sgd'
,
'sgd'
:
{
'momentum'
:
0.9
}
},
'learning_rate'
:
{
'type'
:
'cosine'
,
'cosine'
:
{
'initial_learning_rate'
:
1.6
,
'decay_steps'
:
350
*
steps_per_epoch
}
},
'warmup'
:
{
'type'
:
'linear'
,
'linear'
:
{
'warmup_steps'
:
5
*
steps_per_epoch
,
'warmup_learning_rate'
:
0
}
}
})),
restrictions
=
[
'task.train_data.is_training != None'
,
'task.validation_data.is_training != None'
])
return
config
official/vision/beta/projects/example/example_config_local.yaml
0 → 100644
View file @
5ffcc5b6
task
:
model
:
num_classes
:
1001
input_size
:
[
128
,
128
,
3
]
train_data
:
input_path
:
'
imagenet-2012-tfrecord/train*'
is_training
:
true
global_batch_size
:
64
dtype
:
'
bfloat16'
validation_data
:
input_path
:
'
imagenet-2012-tfrecord/valid*'
is_training
:
false
global_batch_size
:
64
dtype
:
'
bfloat16'
drop_remainder
:
false
trainer
:
train_steps
:
62400
validation_steps
:
13
validation_interval
:
312
steps_per_loop
:
312
summary_interval
:
312
checkpoint_interval
:
312
optimizer_config
:
optimizer
:
type
:
'
sgd'
sgd
:
momentum
:
0.9
learning_rate
:
type
:
'
stepwise'
stepwise
:
boundaries
:
[
18750
,
37500
,
50000
]
values
:
[
0.1
,
0.01
,
0.001
,
0.0001
]
official/vision/beta/projects/example/example_config_tpu.yaml
0 → 100644
View file @
5ffcc5b6
runtime
:
distribution_strategy
:
'
tpu'
mixed_precision_dtype
:
'
bfloat16'
task
:
model
:
num_classes
:
1001
input_size
:
[
128
,
128
,
3
]
train_data
:
input_path
:
'
imagenet-2012-tfrecord/train*'
is_training
:
true
global_batch_size
:
4096
dtype
:
'
bfloat16'
validation_data
:
input_path
:
'
imagenet-2012-tfrecord/valid*'
is_training
:
false
global_batch_size
:
4096
dtype
:
'
bfloat16'
drop_remainder
:
false
trainer
:
train_steps
:
62400
validation_steps
:
13
validation_interval
:
312
steps_per_loop
:
312
summary_interval
:
312
checkpoint_interval
:
312
optimizer_config
:
optimizer
:
type
:
'
sgd'
sgd
:
momentum
:
0.9
learning_rate
:
type
:
'
stepwise'
stepwise
:
boundaries
:
[
18750
,
37500
,
50000
]
values
:
[
0.1
,
0.01
,
0.001
,
0.0001
]
official/vision/beta/projects/example/example_input.py
0 → 100644
View file @
5ffcc5b6
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Example classification decoder and parser.
This file defines the Decoder and Parser to load data. The example is shown on
loading standard tf.Example data but non-standard tf.Example or other data
format can be supported by implementing proper decoder and parser.
"""
from
typing
import
Mapping
,
List
,
Tuple
# Import libraries
import
tensorflow
as
tf
from
official.vision.beta.dataloaders
import
decoder
from
official.vision.beta.dataloaders
import
parser
from
official.vision.beta.ops
import
preprocess_ops
MEAN_RGB
=
(
0.485
*
255
,
0.456
*
255
,
0.406
*
255
)
STDDEV_RGB
=
(
0.229
*
255
,
0.224
*
255
,
0.225
*
255
)
class
Decoder
(
decoder
.
Decoder
):
"""A tf.Example decoder for classification task."""
def
__init__
(
self
):
"""Initializes the decoder.
The constructor defines the mapping between the field name and the value
from an input tf.Example. For example, we define two fields for image bytes
and labels. There is no limit on the number of fields to decode.
"""
self
.
_keys_to_features
=
{
'image/encoded'
:
tf
.
io
.
FixedLenFeature
((),
tf
.
string
,
default_value
=
''
),
'image/class/label'
:
tf
.
io
.
FixedLenFeature
((),
tf
.
int64
,
default_value
=-
1
)
}
def
decode
(
self
,
serialized_example
:
tf
.
train
.
Example
)
->
Mapping
[
str
,
tf
.
Tensor
]:
"""Decodes a tf.Example to a dictionary.
This function decodes a serialized tf.Example to a dictionary. The output
will be consumed by `_parse_train_data` and `_parse_validation_data` in
Parser.
Args:
serialized_example: A serialized tf.Example.
Returns:
A dictionary of field key name and decoded tensor mapping.
"""
return
tf
.
io
.
parse_single_example
(
serialized_example
,
self
.
_keys_to_features
)
class
Parser
(
parser
.
Parser
):
"""Parser to parse an image and its annotations.
To define own Parser, client should override _parse_train_data and
_parse_eval_data functions, where decoded tensors are parsed with optional
pre-processing steps. The output from the two functions can be any structure
like tuple, list or dictionary.
"""
def
__init__
(
self
,
output_size
:
List
[
int
],
num_classes
:
float
):
"""Initializes parameters for parsing annotations in the dataset.
This example only takes two arguments but one can freely add as many
arguments as needed. For example, pre-processing and augmentations usually
happen in Parser, and related parameters can be passed in by this
constructor.
Args:
output_size: `Tensor` or `list` for [height, width] of output image.
num_classes: `float`, number of classes.
"""
self
.
_output_size
=
output_size
self
.
_num_classes
=
num_classes
self
.
_dtype
=
tf
.
float32
def
_parse_data
(
self
,
decoded_tensors
:
Mapping
[
str
,
tf
.
Tensor
])
->
Tuple
[
tf
.
Tensor
,
tf
.
Tensor
]:
label
=
tf
.
cast
(
decoded_tensors
[
'image/class/label'
],
dtype
=
tf
.
int32
)
image_bytes
=
decoded_tensors
[
'image/encoded'
]
image
=
tf
.
io
.
decode_jpeg
(
image_bytes
,
channels
=
3
)
image
=
tf
.
image
.
resize
(
image
,
self
.
_output_size
,
method
=
tf
.
image
.
ResizeMethod
.
BILINEAR
)
image
=
tf
.
ensure_shape
(
image
,
self
.
_output_size
+
[
3
])
# Normalizes image with mean and std pixel values.
image
=
preprocess_ops
.
normalize_image
(
image
,
offset
=
MEAN_RGB
,
scale
=
STDDEV_RGB
)
image
=
tf
.
image
.
convert_image_dtype
(
image
,
self
.
_dtype
)
return
image
,
label
def
_parse_train_data
(
self
,
decoded_tensors
:
Mapping
[
str
,
tf
.
Tensor
])
->
Tuple
[
tf
.
Tensor
,
tf
.
Tensor
]:
"""Parses data for training.
Args:
decoded_tensors: A dictionary of field key name and decoded tensor mapping
from Decoder.
Returns:
A tuple of (image, label) tensors.
"""
return
self
.
_parse_data
(
decoded_tensors
)
def
_parse_eval_data
(
self
,
decoded_tensors
:
Mapping
[
str
,
tf
.
Tensor
])
->
Tuple
[
tf
.
Tensor
,
tf
.
Tensor
]:
"""Parses data for evaluation.
Args:
decoded_tensors: A dictionary of field key name and decoded tensor mapping
from Decoder.
Returns:
A tuple of (image, label) tensors.
"""
return
self
.
_parse_data
(
decoded_tensors
)
official/vision/beta/projects/example/example_model.py
0 → 100644
View file @
5ffcc5b6
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A sample model implementation.
This is only a dummy example to showcase how a model is composed. It is usually
not needed to implement a modedl from scratch. Most SoTA models can be found and
directly used from `official/vision/beta/modeling` directory.
"""
from
typing
import
Any
,
Mapping
# Import libraries
import
tensorflow
as
tf
from
official.vision.beta.projects.example
import
example_config
as
example_cfg
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
ExampleModel
(
tf
.
keras
.
Model
):
"""A example model class.
A model is a subclass of tf.keras.Model where layers are built in the
constructor.
"""
def
__init__
(
self
,
num_classes
:
int
,
input_specs
:
tf
.
keras
.
layers
.
InputSpec
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
None
,
None
,
3
]),
**
kwargs
):
"""Initializes the example model.
All layers are defined in the constructor, and config is recorded in the
`_config_dict` object for serialization.
Args:
num_classes: The number of classes in classification task.
input_specs: A `tf.keras.layers.InputSpec` spec of the input tensor.
**kwargs: Additional keyword arguments to be passed.
"""
inputs
=
tf
.
keras
.
Input
(
shape
=
input_specs
.
shape
[
1
:],
name
=
input_specs
.
name
)
outputs
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
16
,
kernel_size
=
3
,
strides
=
2
,
padding
=
'same'
,
use_bias
=
False
)(
inputs
)
outputs
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
32
,
kernel_size
=
3
,
strides
=
2
,
padding
=
'same'
,
use_bias
=
False
)(
outputs
)
outputs
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
64
,
kernel_size
=
3
,
strides
=
2
,
padding
=
'same'
,
use_bias
=
False
)(
outputs
)
outputs
=
tf
.
keras
.
layers
.
GlobalAveragePooling2D
()(
outputs
)
outputs
=
tf
.
keras
.
layers
.
Dense
(
1024
,
activation
=
'relu'
)(
outputs
)
outputs
=
tf
.
keras
.
layers
.
Dense
(
num_classes
)(
outputs
)
super
().
__init__
(
inputs
=
inputs
,
outputs
=
outputs
,
**
kwargs
)
self
.
_input_specs
=
input_specs
self
.
_config_dict
=
{
'num_classes'
:
num_classes
,
'input_specs'
:
input_specs
}
def
get_config
(
self
)
->
Mapping
[
str
,
Any
]:
"""Gets the config of this model."""
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
,
custom_objects
=
None
):
"""Constructs an instance of this model from input config."""
return
cls
(
**
config
)
def
build_example_model
(
input_specs
:
tf
.
keras
.
layers
.
InputSpec
,
model_config
:
example_cfg
.
ExampleModel
,
**
kwargs
)
->
tf
.
keras
.
Model
:
"""Builds and returns the example model.
This function is the main entry point to build a model. Commonly, it build a
model by building a backbone, decoder and head. An example of building a
classification model is at
third_party/tensorflow_models/official/vision/beta/modeling/backbones/resnet.py.
However, it is not mandatory for all models to have these three pieces
exactly. Depending on the task, model can be as simple as the example model
here or more complex, such as multi-head architecture.
Args:
input_specs: The specs of the input layer that defines input size.
model_config: The config containing parameters to build a model.
**kwargs: Additional keyword arguments to be passed.
Returns:
A tf.keras.Model object.
"""
return
ExampleModel
(
num_classes
=
model_config
.
num_classes
,
input_specs
=
input_specs
,
**
kwargs
)
official/vision/beta/projects/example/example_task.py
0 → 100644
View file @
5ffcc5b6
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""An example task definition for image classification."""
from
typing
import
Any
,
List
,
Optional
,
Tuple
,
Sequence
,
Mapping
import
tensorflow
as
tf
from
official.common
import
dataset_fn
from
official.core
import
base_task
from
official.core
import
task_factory
from
official.modeling
import
tf_utils
from
official.vision.beta.dataloaders
import
input_reader_factory
from
official.vision.beta.projects.example
import
example_config
as
exp_cfg
from
official.vision.beta.projects.example
import
example_input
from
official.vision.beta.projects.example
import
example_model
@
task_factory
.
register_task_cls
(
exp_cfg
.
ExampleTask
)
class
ExampleTask
(
base_task
.
Task
):
"""Class of an example task.
A task is a subclass of base_task.Task that defines model, input, loss, metric
and one training and evaluation step, etc.
"""
def
build_model
(
self
)
->
tf
.
keras
.
Model
:
"""Builds a model."""
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
]
+
self
.
task_config
.
model
.
input_size
)
model
=
example_model
.
build_example_model
(
input_specs
=
input_specs
,
model_config
=
self
.
task_config
.
model
)
return
model
def
build_inputs
(
self
,
params
:
exp_cfg
.
ExampleDataConfig
,
input_context
:
Optional
[
tf
.
distribute
.
InputContext
]
=
None
)
->
tf
.
data
.
Dataset
:
"""Builds input.
The input from this function is a tf.data.Dataset that has gone through
pre-processing steps, such as augmentation, batching, shuffuling, etc.
Args:
params: The experiment config.
input_context: An optional InputContext used by input reader.
Returns:
A tf.data.Dataset object.
"""
num_classes
=
self
.
task_config
.
model
.
num_classes
input_size
=
self
.
task_config
.
model
.
input_size
decoder
=
example_input
.
Decoder
()
parser
=
example_input
.
Parser
(
output_size
=
input_size
[:
2
],
num_classes
=
num_classes
)
reader
=
input_reader_factory
.
input_reader_generator
(
params
,
dataset_fn
=
dataset_fn
.
pick_dataset_fn
(
params
.
file_type
),
decoder_fn
=
decoder
.
decode
,
parser_fn
=
parser
.
parse_fn
(
params
.
is_training
))
dataset
=
reader
.
read
(
input_context
=
input_context
)
return
dataset
def
build_losses
(
self
,
labels
:
tf
.
Tensor
,
model_outputs
:
tf
.
Tensor
,
aux_losses
:
Optional
[
Any
]
=
None
)
->
tf
.
Tensor
:
"""Builds losses for training and validation.
Args:
labels: Input groundtruth labels.
model_outputs: Output of the model.
aux_losses: The auxiliarly loss tensors, i.e. `losses` in tf.keras.Model.
Returns:
The total loss tensor.
"""
total_loss
=
tf
.
keras
.
losses
.
sparse_categorical_crossentropy
(
labels
,
model_outputs
,
from_logits
=
True
)
total_loss
=
tf_utils
.
safe_mean
(
total_loss
)
if
aux_losses
:
total_loss
+=
tf
.
add_n
(
aux_losses
)
return
total_loss
def
build_metrics
(
self
,
training
:
bool
=
True
)
->
Sequence
[
tf
.
keras
.
metrics
.
Metric
]:
"""Gets streaming metrics for training/validation.
This function builds and returns a list of metrics to compute during
training and validation. The list contains objects of subclasses of
tf.keras.metrics.Metric. Training and validation can have different metrics.
Args:
training: Whether the metric is for training or not.
Returns:
A list of tf.keras.metrics.Metric objects.
"""
k
=
self
.
task_config
.
evaluation
.
top_k
metrics
=
[
tf
.
keras
.
metrics
.
SparseCategoricalAccuracy
(
name
=
'accuracy'
),
tf
.
keras
.
metrics
.
SparseTopKCategoricalAccuracy
(
k
=
k
,
name
=
'top_{}_accuracy'
.
format
(
k
))
]
return
metrics
def
train_step
(
self
,
inputs
:
Tuple
[
Any
,
Any
],
model
:
tf
.
keras
.
Model
,
optimizer
:
tf
.
keras
.
optimizers
.
Optimizer
,
metrics
:
Optional
[
List
[
Any
]]
=
None
)
->
Mapping
[
str
,
Any
]:
"""Does forward and backward.
This example assumes input is a tuple of (features, labels), which follows
the output from data loader, i.e., Parser. The output from Parser is fed
into train_step to perform one step forward and backward pass. Other data
structure, such as dictionary, can also be used, as long as it is consistent
between output from Parser and input used here.
Args:
inputs: A tuple of of input tensors of (features, labels).
model: A tf.keras.Model instance.
optimizer: The optimizer for this training step.
metrics: A nested structure of metrics objects.
Returns:
A dictionary of logs.
"""
features
,
labels
=
inputs
num_replicas
=
tf
.
distribute
.
get_strategy
().
num_replicas_in_sync
with
tf
.
GradientTape
()
as
tape
:
outputs
=
model
(
features
,
training
=
True
)
# Casting output layer as float32 is necessary when mixed_precision is
# mixed_float16 or mixed_bfloat16 to ensure output is casted as float32.
outputs
=
tf
.
nest
.
map_structure
(
lambda
x
:
tf
.
cast
(
x
,
tf
.
float32
),
outputs
)
# Computes per-replica loss.
loss
=
self
.
build_losses
(
model_outputs
=
outputs
,
labels
=
labels
,
aux_losses
=
model
.
losses
)
# Scales loss as the default gradients allreduce performs sum inside the
# optimizer.
scaled_loss
=
loss
/
num_replicas
# For mixed_precision policy, when LossScaleOptimizer is used, loss is
# scaled for numerical stability.
if
isinstance
(
optimizer
,
tf
.
keras
.
mixed_precision
.
LossScaleOptimizer
):
scaled_loss
=
optimizer
.
get_scaled_loss
(
scaled_loss
)
tvars
=
model
.
trainable_variables
grads
=
tape
.
gradient
(
scaled_loss
,
tvars
)
# Scales back gradient before apply_gradients when LossScaleOptimizer is
# used.
if
isinstance
(
optimizer
,
tf
.
keras
.
mixed_precision
.
LossScaleOptimizer
):
grads
=
optimizer
.
get_unscaled_gradients
(
grads
)
optimizer
.
apply_gradients
(
list
(
zip
(
grads
,
tvars
)))
logs
=
{
self
.
loss
:
loss
}
if
metrics
:
self
.
process_metrics
(
metrics
,
labels
,
outputs
)
return
logs
def
validation_step
(
self
,
inputs
:
Tuple
[
Any
,
Any
],
model
:
tf
.
keras
.
Model
,
metrics
:
Optional
[
List
[
Any
]]
=
None
)
->
Mapping
[
str
,
Any
]:
"""Runs validatation step.
Args:
inputs: A tuple of of input tensors of (features, labels).
model: A tf.keras.Model instance.
metrics: A nested structure of metrics objects.
Returns:
A dictionary of logs.
"""
features
,
labels
=
inputs
outputs
=
self
.
inference_step
(
features
,
model
)
outputs
=
tf
.
nest
.
map_structure
(
lambda
x
:
tf
.
cast
(
x
,
tf
.
float32
),
outputs
)
loss
=
self
.
build_losses
(
model_outputs
=
outputs
,
labels
=
labels
,
aux_losses
=
model
.
losses
)
logs
=
{
self
.
loss
:
loss
}
if
metrics
:
self
.
process_metrics
(
metrics
,
labels
,
outputs
)
return
logs
def
inference_step
(
self
,
inputs
:
tf
.
Tensor
,
model
:
tf
.
keras
.
Model
)
->
Any
:
"""Performs the forward step. It is used in validation_step."""
return
model
(
inputs
,
training
=
False
)
official/vision/beta/projects/example/registry_imports.py
0 → 100644
View file @
5ffcc5b6
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""All necessary imports for registration.
Custom models, task, configs, etc need to be imported to registry so they can be
picked up by the trainer. They can be included in this file so you do not need
to handle each file separately.
"""
# pylint: disable=unused-import
from
official.common
import
registry_imports
from
official.vision.beta.projects.example
import
example_config
from
official.vision.beta.projects.example
import
example_input
from
official.vision.beta.projects.example
import
example_model
from
official.vision.beta.projects.example
import
example_task
official/vision/beta/projects/example/train.py
0 → 100644
View file @
5ffcc5b6
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""TensorFlow Model Garden Vision trainer.
All custom registry are imported from registry_imports. Here we use default
trainer so we directly call train.main. If you need to customize the trainer,
branch from `official/vision/beta/train.py` and make changes.
"""
from
absl
import
app
from
official.common
import
flags
as
tfm_flags
from
official.vision.beta
import
train
from
official.vision.beta.projects.example
import
registry_imports
# pylint: disable=unused-import
if
__name__
==
'__main__'
:
tfm_flags
.
define_flags
()
app
.
run
(
train
.
main
)
official/vision/beta/projects/movinet/configs/movinet.py
View file @
5ffcc5b6
...
...
@@ -44,6 +44,13 @@ class Movinet(hyperparams.Config):
# 2plus1d: (2+1)D convolution with Conv2D (2D reshaping)
# 3d_2plus1d: (2+1)D convolution with Conv3D (no 2D reshaping)
conv_type
:
str
=
'3d'
# Choose from ['3d', '2d', '2plus3d']
# 3d: default 3D global average pooling.
# 2d: 2D global average pooling.
# 2plus3d: concatenation of 2D and 3D global average pooling.
se_type
:
str
=
'3d'
activation
:
str
=
'swish'
gating_activation
:
str
=
'sigmoid'
stochastic_depth_drop_rate
:
float
=
0.2
use_external_states
:
bool
=
False
...
...
Prev
1
2
3
4
5
6
7
8
…
10
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment