Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
bb124157
Commit
bb124157
authored
Mar 10, 2021
by
stephenwu
Browse files
Merge branch 'master' of
https://github.com/tensorflow/models
into RTESuperGLUE
parents
2e9bb539
0edeb7f6
Changes
386
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
561 additions
and
485 deletions
+561
-485
official/utils/misc/distribution_utils.py
official/utils/misc/distribution_utils.py
+2
-2
official/utils/misc/keras_utils.py
official/utils/misc/keras_utils.py
+2
-2
official/utils/misc/model_helpers.py
official/utils/misc/model_helpers.py
+2
-6
official/utils/misc/model_helpers_test.py
official/utils/misc/model_helpers_test.py
+2
-6
official/utils/testing/__init__.py
official/utils/testing/__init__.py
+14
-0
official/utils/testing/integration.py
official/utils/testing/integration.py
+2
-6
official/utils/testing/mock_task.py
official/utils/testing/mock_task.py
+3
-4
official/vision/beta/MODEL_GARDEN.md
official/vision/beta/MODEL_GARDEN.md
+3
-3
official/vision/beta/dataloaders/video_input.py
official/vision/beta/dataloaders/video_input.py
+31
-33
official/vision/beta/modeling/__init__.py
official/vision/beta/modeling/__init__.py
+19
-0
official/vision/beta/modeling/backbones/efficientnet.py
official/vision/beta/modeling/backbones/efficientnet.py
+30
-24
official/vision/beta/modeling/backbones/factory.py
official/vision/beta/modeling/backbones/factory.py
+7
-6
official/vision/beta/modeling/backbones/mobilenet.py
official/vision/beta/modeling/backbones/mobilenet.py
+97
-95
official/vision/beta/modeling/backbones/resnet.py
official/vision/beta/modeling/backbones/resnet.py
+43
-40
official/vision/beta/modeling/backbones/resnet_3d.py
official/vision/beta/modeling/backbones/resnet_3d.py
+34
-31
official/vision/beta/modeling/backbones/resnet_deeplab.py
official/vision/beta/modeling/backbones/resnet_deeplab.py
+38
-35
official/vision/beta/modeling/backbones/revnet.py
official/vision/beta/modeling/backbones/revnet.py
+31
-27
official/vision/beta/modeling/backbones/spinenet.py
official/vision/beta/modeling/backbones/spinenet.py
+42
-13
official/vision/beta/modeling/layers/box_sampler.py
official/vision/beta/modeling/layers/box_sampler.py
+15
-15
official/vision/beta/modeling/layers/detection_generator.py
official/vision/beta/modeling/layers/detection_generator.py
+144
-137
No files found.
official/utils/misc/distribution_utils.py
View file @
bb124157
# Copyright 201
8
The TensorFlow Authors. All Rights Reserved.
# Copyright 20
2
1 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Helper functions for running models in a distributed setting."""
# pylint: disable=wildcard-import
from
official.common.distribute_utils
import
*
official/utils/misc/keras_utils.py
View file @
bb124157
# Copyright 201
8
The TensorFlow Authors. All Rights Reserved.
# Copyright 20
2
1 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Helper functions for the Keras implementations of models."""
import
multiprocessing
...
...
official/utils/misc/model_helpers.py
View file @
bb124157
# Copyright 201
8
The TensorFlow Authors. All Rights Reserved.
# Copyright 20
2
1 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -11,12 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Miscellaneous functions that can be called by models."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
"""Miscellaneous functions that can be called by models."""
import
numbers
...
...
official/utils/misc/model_helpers_test.py
View file @
bb124157
# Copyright 201
8
The TensorFlow Authors. All Rights Reserved.
# Copyright 20
2
1 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -11,12 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for Model Helper functions."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
"""Tests for Model Helper functions."""
import
tensorflow
as
tf
# pylint: disable=g-bad-import-order
...
...
official/utils/testing/__init__.py
View file @
bb124157
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
official/utils/testing/integration.py
View file @
bb124157
# Copyright 201
8
The TensorFlow Authors. All Rights Reserved.
# Copyright 20
2
1 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -11,12 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Helper code to run complete models from within python."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
"""Helper code to run complete models from within python."""
import
os
import
shutil
...
...
official/utils/testing/mock_task.py
View file @
bb124157
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -12,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Mock task for testing."""
import
dataclasses
...
...
@@ -89,7 +88,7 @@ class MockTask(base_task.Task):
np
.
concatenate
([
np
.
expand_dims
(
v
.
numpy
(),
axis
=
0
)
for
v
in
value
]))
return
state
def
reduce_aggregated_logs
(
self
,
aggregated_logs
):
def
reduce_aggregated_logs
(
self
,
aggregated_logs
,
global_step
=
None
):
for
k
,
v
in
aggregated_logs
.
items
():
aggregated_logs
[
k
]
=
np
.
sum
(
np
.
stack
(
v
,
axis
=
0
))
return
aggregated_logs
...
...
official/vision/beta/MODEL_GARDEN.md
View file @
bb124157
...
...
@@ -65,9 +65,9 @@ ResNet-RS-350 | 320x320 | 164.3 | 84.2 | 96.9 | [config](https://github.c
| backbone | resolution | epochs | FLOPs (B) | params (M) | box AP | download |
| ------------ |:-------------:| ---------:|-----------:|--------:|---------:|-----------:|
| SpineNet-49 | 640x640 | 500 | 85.4| 28.5 | 44.2 |
[
config
](
https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/retinanet/coco_spinenet49_tpu.yaml
)
|
| SpineNet-96 | 1024x1024 | 500 | 265.4 | 43.0 | 48.5 |
[
config
](
https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/retinanet/coco_spinenet96_tpu.yaml
)
|
| SpineNet-143 | 1280x1280 | 500 | 524.0 | 67.0 | 50.0 |
[
config
](
https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/retinanet/coco_spinenet143_tpu.yaml
)
|
| SpineNet-49 | 640x640 | 500 | 85.4| 28.5 | 44.2 |
[
config
](
https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/retinanet/coco_spinenet49_tpu.yaml
)
[
TB.dev
]
(https://tensorboard.dev/experiment/n2UN83TkTdyKZn3slCWulg/#scalars&_smoothingWeight=0)
|
| SpineNet-96 | 1024x1024 | 500 | 265.4 | 43.0 | 48.5 |
[
config
](
https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/retinanet/coco_spinenet96_tpu.yaml
)
[
TB.dev
]
(https://tensorboard.dev/experiment/n2UN83TkTdyKZn3slCWulg/#scalars&_smoothingWeight=0)
|
| SpineNet-143 | 1280x1280 | 500 | 524.0 | 67.0 | 50.0 |
[
config
](
https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/retinanet/coco_spinenet143_tpu.yaml
)
[
TB.dev
]
(https://tensorboard.dev/experiment/n2UN83TkTdyKZn3slCWulg/#scalars&_smoothingWeight=0)
|
### Instance Segmentation Baselines
...
...
official/vision/beta/dataloaders/video_input.py
View file @
bb124157
...
...
@@ -29,7 +29,7 @@ IMAGE_KEY = 'image/encoded'
LABEL_KEY
=
'clip/label/index'
def
_
process_image
(
image
:
tf
.
Tensor
,
def
process_image
(
image
:
tf
.
Tensor
,
is_training
:
bool
=
True
,
num_frames
:
int
=
32
,
stride
:
int
=
1
,
...
...
@@ -112,7 +112,7 @@ def _process_image(image: tf.Tensor,
return
preprocess_ops_3d
.
normalize_image
(
image
,
zero_centering_image
)
def
_
postprocess_image
(
image
:
tf
.
Tensor
,
def
postprocess_image
(
image
:
tf
.
Tensor
,
is_training
:
bool
=
True
,
num_frames
:
int
=
32
,
num_test_clips
:
int
=
1
,
...
...
@@ -147,7 +147,7 @@ def _postprocess_image(image: tf.Tensor,
return
image
def
_
process_label
(
label
:
tf
.
Tensor
,
def
process_label
(
label
:
tf
.
Tensor
,
one_hot_label
:
bool
=
True
,
num_classes
:
Optional
[
int
]
=
None
)
->
tf
.
Tensor
:
"""Processes label Tensor."""
...
...
@@ -175,15 +175,13 @@ class Decoder(decoder.Decoder):
"""A tf.Example decoder for classification task."""
def
__init__
(
self
,
image_key
:
str
=
IMAGE_KEY
,
label_key
:
str
=
LABEL_KEY
):
self
.
_image_key
=
image_key
self
.
_label_key
=
label_key
self
.
_context_description
=
{
# One integer stored in context.
self
.
_
label_key
:
tf
.
io
.
VarLenFeature
(
tf
.
int64
),
label_key
:
tf
.
io
.
VarLenFeature
(
tf
.
int64
),
}
self
.
_sequence_description
=
{
# Each image is a string encoding JPEG.
self
.
_
image_key
:
tf
.
io
.
FixedLenSequenceFeature
((),
tf
.
string
),
image_key
:
tf
.
io
.
FixedLenSequenceFeature
((),
tf
.
string
),
}
def
add_feature
(
self
,
feature_name
:
str
,
...
...
@@ -245,7 +243,7 @@ class Parser(parser.Parser):
"""Parses data for training."""
# Process image and label.
image
=
decoded_tensors
[
self
.
_image_key
]
image
=
_
process_image
(
image
=
process_image
(
image
=
image
,
is_training
=
True
,
num_frames
=
self
.
_num_frames
,
...
...
@@ -261,7 +259,7 @@ class Parser(parser.Parser):
features
=
{
'image'
:
image
}
label
=
decoded_tensors
[
self
.
_label_key
]
label
=
_
process_label
(
label
,
self
.
_one_hot_label
,
self
.
_num_classes
)
label
=
process_label
(
label
,
self
.
_one_hot_label
,
self
.
_num_classes
)
if
self
.
_output_audio
:
audio
=
decoded_tensors
[
self
.
_audio_feature
]
...
...
@@ -279,7 +277,7 @@ class Parser(parser.Parser):
)
->
Tuple
[
Dict
[
str
,
tf
.
Tensor
],
tf
.
Tensor
]:
"""Parses data for evaluation."""
image
=
decoded_tensors
[
self
.
_image_key
]
image
=
_
process_image
(
image
=
process_image
(
image
=
image
,
is_training
=
False
,
num_frames
=
self
.
_num_frames
,
...
...
@@ -292,14 +290,14 @@ class Parser(parser.Parser):
features
=
{
'image'
:
image
}
label
=
decoded_tensors
[
self
.
_label_key
]
label
=
_
process_label
(
label
,
self
.
_one_hot_label
,
self
.
_num_classes
)
label
=
process_label
(
label
,
self
.
_one_hot_label
,
self
.
_num_classes
)
if
self
.
_output_audio
:
audio
=
decoded_tensors
[
self
.
_audio_feature
]
audio
=
tf
.
cast
(
audio
,
dtype
=
self
.
_dtype
)
audio
=
preprocess_ops_3d
.
sample_sequence
(
audio
,
20
,
random
=
False
,
stride
=
1
)
audio
=
tf
.
ensure_shape
(
audio
,
[
20
,
2048
]
)
audio
=
tf
.
ensure_shape
(
audio
,
self
.
_audio_shape
)
features
[
'audio'
]
=
audio
return
features
,
label
...
...
@@ -318,9 +316,9 @@ class PostBatchProcessor(object):
def
__call__
(
self
,
features
:
Dict
[
str
,
tf
.
Tensor
],
label
:
tf
.
Tensor
)
->
Tuple
[
Dict
[
str
,
tf
.
Tensor
],
tf
.
Tensor
]:
"""Parses a single tf.Example into image and label tensors."""
for
key
in
[
'image'
,
'audio'
]:
for
key
in
[
'image'
]:
if
key
in
features
:
features
[
key
]
=
_
postprocess_image
(
features
[
key
]
=
postprocess_image
(
image
=
features
[
key
],
is_training
=
self
.
_is_training
,
num_frames
=
self
.
_num_frames
,
...
...
official/vision/beta/modeling/__init__.py
View file @
bb124157
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Modeling package definition."""
from
official.vision.beta.modeling
import
backbones
from
official.vision.beta.modeling
import
decoders
official/vision/beta/modeling/backbones/efficientnet.py
View file @
bb124157
...
...
@@ -51,14 +51,14 @@ SCALING_MAP = {
def
round_repeats
(
repeats
,
multiplier
,
skip
=
False
):
"""Round number of filters based on depth multiplier."""
"""R
eturns r
ound
ed
number of filters based on depth multiplier."""
if
skip
or
not
multiplier
:
return
repeats
return
int
(
math
.
ceil
(
multiplier
*
repeats
))
def
block_spec_decoder
(
specs
,
width_scale
,
depth_scale
):
"""Decode specs for a block."""
"""Decode
s and returns
specs for a block."""
decoded_specs
=
[]
for
s
in
specs
:
s
=
s
+
(
...
...
@@ -87,7 +87,13 @@ class BlockSpec(object):
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
EfficientNet
(
tf
.
keras
.
Model
):
"""Class to build EfficientNet family model."""
"""Creates an EfficientNet family model.
This implements the EfficientNet model from:
Mingxing Tan, Quoc V. Le.
EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks.
(https://arxiv.org/pdf/1905.11946)
"""
def
__init__
(
self
,
model_id
,
...
...
@@ -102,25 +108,25 @@ class EfficientNet(tf.keras.Model):
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
**
kwargs
):
"""
EfficientNet initialization function
.
"""
Initializes an EfficientNet model
.
Args:
model_id: `str` model id of EfficientNet.
input_specs: `tf.keras.layers.InputSpec` specs of the input tensor.
se_ratio: `float` squeeze and excitation ratio for inverted bottleneck
blocks.
stochastic_depth_drop_rate: `float` drop rate for drop connect layer.
kernel_initializer: kernel_initializer for convolutional layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
Default to None.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
model_id: A `str` of model ID of EfficientNet.
input_specs: A `tf.keras.layers.InputSpec` of the input tensor.
se_ratio: A `float` of squeeze and excitation ratio for inverted
bottleneck blocks.
stochastic_depth_drop_rate: A `float` of drop rate for drop connect layer.
kernel_initializer: A `str` for kernel initializer of convolutional
layers.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default to None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
Default to None.
activation: `str` name of the activation function.
use_sync_bn: if True, use synchronized batch normalization.
norm_momentum: `float` normalization omentum for the moving average.
norm_epsilon: `float` small float added to variance to avoid dividing by
zero.
**kwargs: keyword arguments to be passed.
activation: A `str` of name of the activation function.
use_sync_bn: If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
**kwargs: Additional keyword arguments to be passed.
"""
self
.
_model_id
=
model_id
self
.
_input_specs
=
input_specs
...
...
@@ -203,12 +209,12 @@ class EfficientNet(tf.keras.Model):
"""Creates one group of blocks for the EfficientNet model.
Args:
inputs:
`
Tensor` of size `[batch, channels, height, width]`.
specs: specifications for one inverted bottleneck block group.
name: `str`name for the block.
inputs:
A `tf.
Tensor` of size `[batch, channels, height, width]`.
specs:
The
specifications for one inverted bottleneck block group.
name:
A
`str`
name for the block.
Returns:
The output `Tensor` of the block layer.
The output `
tf.
Tensor` of the block layer.
"""
if
specs
.
block_fn
==
'mbconv'
:
block_fn
=
nn_blocks
.
InvertedBottleneckBlock
...
...
@@ -282,7 +288,7 @@ def build_efficientnet(
input_specs
:
tf
.
keras
.
layers
.
InputSpec
,
model_config
,
l2_regularizer
:
tf
.
keras
.
regularizers
.
Regularizer
=
None
)
->
tf
.
keras
.
Model
:
"""Builds
ResNet 3d
backbone from a config."""
"""Builds
EfficientNet
backbone from a config."""
backbone_type
=
model_config
.
backbone
.
type
backbone_cfg
=
model_config
.
backbone
.
get
()
norm_activation_config
=
model_config
.
norm_activation
...
...
official/vision/beta/modeling/backbones/factory.py
View file @
bb124157
...
...
@@ -70,10 +70,10 @@ def register_backbone_builder(key: str):
```
Args:
key:
the
key to look up the builder.
key:
A `str` of
key to look up the builder.
Returns:
A callable for us
e
as class decorator that registers the decorated class
A callable for us
ing
as class decorator that registers the decorated class
for creation from an instance of task_config_cls.
"""
return
registry
.
register
(
_REGISTERED_BACKBONE_CLS
,
key
)
...
...
@@ -85,12 +85,13 @@ def build_backbone(input_specs: tf.keras.layers.InputSpec,
"""Builds backbone from a config.
Args:
input_specs: tf.keras.layers.InputSpec.
model_config: a OneOfConfig. Model config.
l2_regularizer: tf.keras.regularizers.Regularizer instance. Default to None.
input_specs: A `tf.keras.layers.InputSpec` of input.
model_config: A `OneOfConfig` of model config.
l2_regularizer: A `tf.keras.regularizers.Regularizer` object. Default to
None.
Returns:
tf.keras.Model instance of the backbone.
A `
tf.keras.Model
`
instance of the backbone.
"""
backbone_builder
=
registry
.
lookup
(
_REGISTERED_BACKBONE_CLS
,
model_config
.
backbone
.
type
)
...
...
official/vision/beta/modeling/backbones/mobilenet.py
View file @
bb124157
...
...
@@ -12,9 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains definitions of Mobile
n
et Networks."""
"""Contains definitions of Mobile
N
et Networks."""
from
typing
import
Text
,
Optional
,
Dict
,
Any
,
Tuple
from
typing
import
Optional
,
Dict
,
Any
,
Tuple
# Import libraries
import
dataclasses
...
...
@@ -41,8 +41,8 @@ class Conv2DBNBlock(tf.keras.layers.Layer):
kernel_size
:
int
=
3
,
strides
:
int
=
1
,
use_bias
:
bool
=
False
,
activation
:
Text
=
'relu6'
,
kernel_initializer
:
Text
=
'VarianceScaling'
,
activation
:
str
=
'relu6'
,
kernel_initializer
:
str
=
'VarianceScaling'
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
bias_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
use_normalization
:
bool
=
True
,
...
...
@@ -53,25 +53,25 @@ class Conv2DBNBlock(tf.keras.layers.Layer):
"""A convolution block with batch normalization.
Args:
filters: `int` number of filters for the first two convolutions. Note that
the third and final convolution will use 4 times as many filters.
kernel_size: `int` an integer specifying the height and width of the
2D convolution window.
strides: `int` block stride. If greater than 1, this block will ultimately
downsample the input.
use_bias: if True, use biase in the convolution layer.
activation: `str` name of the activation function.
kernel_initializer: kernel_initializer for convolutional layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
filters: An `int` number of filters for the first two convolutions. Note
that the third and final convolution will use 4 times as many filters.
kernel_size: An `int` specifying the height and width of the 2D
convolution window.
strides: An `int` of block stride. If greater than 1, this block will
ultimately downsample the input.
use_bias: If True, use bias in the convolution layer.
activation: A `str` name of the activation function.
kernel_initializer: A `str` for kernel initializer of convolutional
layers.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default to None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
Default to None.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
Default to None.
use_normalization: if True, use batch normalization.
use_sync_bn: if True, use synchronized batch normalization.
norm_momentum: `float` normalization momentum for the moving average.
norm_epsilon: `float` small float added to variance to avoid dividing by
zero.
**kwargs: keyword arguments to be passed.
use_normalization: If True, use batch normalization.
use_sync_bn: If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
**kwargs: Additional keyword arguments to be passed.
"""
super
(
Conv2DBNBlock
,
self
).
__init__
(
**
kwargs
)
self
.
_filters
=
filters
...
...
@@ -375,13 +375,13 @@ SUPPORTED_SPECS_MAP = {
class
BlockSpec
(
hyperparams
.
Config
):
"""A container class that specifies the block configuration for MobileNet."""
block_fn
:
Text
=
'convbn'
block_fn
:
str
=
'convbn'
kernel_size
:
int
=
3
strides
:
int
=
1
filters
:
int
=
32
use_bias
:
bool
=
False
use_normalization
:
bool
=
True
activation
:
Text
=
'relu6'
activation
:
str
=
'relu6'
# used for block type InvertedResConv
expand_ratio
:
Optional
[
float
]
=
6.
# used for block type InvertedResConv with SE
...
...
@@ -395,22 +395,22 @@ def block_spec_decoder(specs: Dict[Any, Any],
# set to 1 for mobilenetv1
divisible_by
:
int
=
8
,
finegrain_classification_mode
:
bool
=
True
):
"""Decode specs for a block.
"""Decode
s
specs for a block.
Args:
specs: `dict` specification of block specs of a mobilenet version.
filter_size_scale: `float` multiplier for the filter size
for all
convolution ops. The value must be greater than zero. Typical
usage will
be to set this value in (0, 1) to reduce the number of
parameters or
computation cost of the model.
divisible_by: `int` ensures all inner dimensions are divisible by
specs:
A
`dict` specification of block specs of a mobilenet version.
filter_size_scale:
A
`float` multiplier for the filter size
for all
convolution ops. The value must be greater than zero. Typical
usage will
be to set this value in (0, 1) to reduce the number of
parameters or
computation cost of the model.
divisible_by:
An
`int`
that
ensures all inner dimensions are divisible by
this number.
finegrain_classification_mode:
i
f True, the model
will keep the last layer
large even for small multipliers
. F
ollowing
https://arxiv.org/abs/1801.04381
finegrain_classification_mode:
I
f True, the model
will keep the last layer
large even for small multipliers
, f
ollowing
https://arxiv.org/abs/1801.04381
.
Returns:
List[
BlockSpec
]
` defines structure of the base network.
A list of `
BlockSpec`
that
defines structure of the base network.
"""
spec_name
=
specs
[
'spec_name'
]
...
...
@@ -449,17 +449,18 @@ def block_spec_decoder(specs: Dict[Any, Any],
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
MobileNet
(
tf
.
keras
.
Model
):
"""C
lass to build
MobileNet family model."""
"""C
reates a
MobileNet family model."""
def
__init__
(
self
,
model_id
:
Text
=
'MobileNetV2'
,
def
__init__
(
self
,
model_id
:
str
=
'MobileNetV2'
,
filter_size_scale
:
float
=
1.0
,
input_specs
:
layers
.
InputSpec
=
layers
.
InputSpec
(
shape
=
[
None
,
None
,
None
,
3
]),
# The followings are for hyper-parameter tuning
norm_momentum
:
float
=
0.99
,
norm_epsilon
:
float
=
0.001
,
kernel_initializer
:
Text
=
'VarianceScaling'
,
kernel_initializer
:
str
=
'VarianceScaling'
,
kernel_regularizer
:
Optional
[
regularizers
.
Regularizer
]
=
None
,
bias_regularizer
:
Optional
[
regularizers
.
Regularizer
]
=
None
,
# The followings should be kept the same most of the times
...
...
@@ -473,42 +474,43 @@ class MobileNet(tf.keras.Model):
# finegrain is not used in MobileNetV1
finegrain_classification_mode
:
bool
=
True
,
**
kwargs
):
"""
MobileNet initializer
.
"""
Initializes a MobileNet model
.
Args:
model_id: `str`
version
of MobileNet. The supported values are
'
MobileNetV1
'
,
'
MobileNetV2
'
,
'
MobileNetV3Large
'
,
'
MobileNetV3Small
'
,
and
'
MobileNetV3EdgeTPU
'
.
filter_size_scale: `float` multiplier for the filters (number of
channels)
for all convolution ops. The value must be greater than zero.
Typical
usage will be to set this value in (0, 1) to reduce the number
of
parameters or computation cost of the model.
input_specs: `tf.keras.layers.InputSpec` specs of the input tensor.
norm_momentum: `float` normalization omentum for the moving average.
norm_epsilon: `float`
small float
added to variance to avoid dividing by
zero.
kernel_initializer: `str` kernel_initializer for convolutional
layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for
Conv2D.
Default to None.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2
d
.
model_id:
A
`str` of MobileNet
version
. The supported values are
`
MobileNetV1
`
,
`
MobileNetV2
`
,
`
MobileNetV3Large
`
,
`
MobileNetV3Small
`
,
and
`
MobileNetV3EdgeTPU
`
.
filter_size_scale:
A
`float`
of
multiplier for the filters (number of
channels)
for all convolution ops. The value must be greater than zero.
Typical
usage will be to set this value in (0, 1) to reduce the number
of
parameters or computation cost of the model.
input_specs:
A
`tf.keras.layers.InputSpec`
of
specs of the input tensor.
norm_momentum:
A
`float`
of
normalization
m
omentum for the moving average.
norm_epsilon:
A
`float` added to variance to avoid dividing by
zero.
kernel_initializer: A `str` for kernel initializer of convolutional
layers.
kernel_regularizer:
A `
tf.keras.regularizers.Regularizer
`
object for
Conv2D.
Default to None.
bias_regularizer:
A `
tf.keras.regularizers.Regularizer
`
object for Conv2
D
.
Default to None.
output_stride: `int` specifies the requested ratio of input to output
spatial resolution. If not None, then we invoke atrous convolution
if necessary to prevent the network from reducing the spatial resolution
of activation maps. Allowed values are 8 (accurate fully convolutional
mode), 16 (fast fully convolutional mode), 32 (classification mode).
min_depth: `int` minimum depth (number of channels) for all conv ops.
Enforced when filter_size_scale < 1, and not an active constraint when
filter_size_scale >= 1.
divisible_by: `int` ensures all inner dimensions are divisible by
output_stride: An `int` that specifies the requested ratio of input to
output spatial resolution. If not None, then we invoke atrous
convolution if necessary to prevent the network from reducing the
spatial resolution of activation maps. Allowed values are 8 (accurate
fully convolutional mode), 16 (fast fully convolutional mode), 32
(classification mode).
min_depth: An `int` of minimum depth (number of channels) for all
convolution ops. Enforced when filter_size_scale < 1, and not an active
constraint when filter_size_scale >= 1.
divisible_by: An `int` that ensures all inner dimensions are divisible by
this number.
stochastic_depth_drop_rate: `float` drop rate for drop connect layer.
regularize_depthwise:
i
f Ture, apply regularization on depthwise.
use_sync_bn:
i
f True, use synchronized batch normalization.
finegrain_classification_mode:
i
f True, the model
will keep the last layer
large even for small multipliers
. F
ollowing
https://arxiv.org/abs/1801.04381
**kwargs: keyword arguments to be passed.
stochastic_depth_drop_rate:
A
`float`
of
drop rate for drop connect layer.
regularize_depthwise:
I
f Ture, apply regularization on depthwise.
use_sync_bn:
I
f True, use synchronized batch normalization.
finegrain_classification_mode:
I
f True, the model
will keep the last layer
large even for small multipliers
, f
ollowing
https://arxiv.org/abs/1801.04381
.
**kwargs:
Additional
keyword arguments to be passed.
"""
if
model_id
not
in
SUPPORTED_SPECS_MAP
:
raise
ValueError
(
'The MobileNet version {} '
...
...
@@ -567,10 +569,10 @@ class MobileNet(tf.keras.Model):
def
_mobilenet_base
(
self
,
inputs
:
tf
.
Tensor
)
->
Tuple
[
tf
.
Tensor
,
Dict
[
int
,
tf
.
Tensor
]]:
"""Build the base MobileNet architecture.
"""Build
s
the base MobileNet architecture.
Args:
inputs:
Input t
ensor of shape [batch_size, height, width, channels].
inputs:
A `tf.T
ensor
`
of shape
`
[batch_size, height, width, channels]
`
.
Returns:
A tuple of output Tensor and dictionary that collects endpoints.
...
...
@@ -725,7 +727,7 @@ def build_mobilenet(
input_specs
:
tf
.
keras
.
layers
.
InputSpec
,
model_config
,
l2_regularizer
:
tf
.
keras
.
regularizers
.
Regularizer
=
None
)
->
tf
.
keras
.
Model
:
"""Builds MobileNet
3d
backbone from a config."""
"""Builds MobileNet backbone from a config."""
backbone_type
=
model_config
.
backbone
.
type
backbone_cfg
=
model_config
.
backbone
.
get
()
norm_activation_config
=
model_config
.
norm_activation
...
...
official/vision/beta/modeling/backbones/resnet.py
View file @
bb124157
...
...
@@ -12,12 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains definitions of Residual Networks.
Residual networks (ResNets) were proposed in:
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
Deep Residual Learning for Image Recognition. arXiv:1512.03385
"""
"""Contains definitions of Residual Networks."""
# Import libraries
import
tensorflow
as
tf
...
...
@@ -92,7 +87,13 @@ RESNET_SPECS = {
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
ResNet
(
tf
.
keras
.
Model
):
"""Class to build ResNet family model."""
"""Creates a ResNet family model.
This implements the Deep Residual Network from:
Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun.
Deep Residual Learning for Image Recognition.
(https://arxiv.org/pdf/1512.03385)
"""
def
__init__
(
self
,
model_id
,
...
...
@@ -111,32 +112,31 @@ class ResNet(tf.keras.Model):
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
**
kwargs
):
"""
ResNet initialization function
.
"""
Initializes a ResNet model
.
Args:
model_id: `int` depth of ResNet backbone model.
input_specs: `tf.keras.layers.InputSpec` specs of the input tensor.
depth_multiplier: `float` a depth multiplier to uniformaly scale up all
layers in channel size in ResNet.
stem_type: `str` stem type of ResNet. Default to `v0`. If set to `v1`,
use ResNet-D type stem (https://arxiv.org/abs/1812.01187).
resnetd_shortcut: `bool` whether to use ResNet-D shortcut in downsampling
blocks.
replace_stem_max_pool: `bool` if True, replace the max pool in stem with
a stride-2 conv,
se_ratio: `float` or None. Ratio of the Squeeze-and-Excitation layer.
init_stochastic_depth_rate: `float` initial stochastic depth rate.
activation: `str` name of the activation function.
use_sync_bn: if True, use synchronized batch normalization.
norm_momentum: `float` normalization omentum for the moving average.
norm_epsilon: `float` small float added to variance to avoid dividing by
zero.
kernel_initializer: kernel_initializer for convolutional layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
Default to None.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
model_id: An `int` of the depth of ResNet backbone model.
input_specs: A `tf.keras.layers.InputSpec` of the input tensor.
depth_multiplier: A `float` of the depth multiplier to uniformaly scale up
all layers in channel size in ResNet.
stem_type: A `str` of stem type of ResNet. Default to `v0`. If set to
`v1`, use ResNet-D type stem (https://arxiv.org/abs/1812.01187).
resnetd_shortcut: A `bool` of whether to use ResNet-D shortcut in
downsampling blocks.
replace_stem_max_pool: A `bool` of whether to replace the max pool in stem
with a stride-2 conv,
se_ratio: A `float` or None. Ratio of the Squeeze-and-Excitation layer.
init_stochastic_depth_rate: A `float` of initial stochastic depth rate.
activation: A `str` name of the activation function.
use_sync_bn: If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A small `float` added to variance to avoid dividing by zero.
kernel_initializer: A str for kernel initializer of convolutional layers.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default to None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
Default to None.
**kwargs: keyword arguments to be passed.
**kwargs:
Additional
keyword arguments to be passed.
"""
self
.
_model_id
=
model_id
self
.
_input_specs
=
input_specs
...
...
@@ -279,17 +279,20 @@ class ResNet(tf.keras.Model):
"""Creates one group of blocks for the ResNet model.
Args:
inputs: `Tensor` of size `[batch, channels, height, width]`.
filters: `int` number of filters for the first convolution of the layer.
strides: `int` stride to use for the first convolution of the layer. If
greater than 1, this layer will downsample the input.
block_fn: Either `nn_blocks.ResidualBlock` or `nn_blocks.BottleneckBlock`.
block_repeats: `int` number of blocks contained in the layer.
stochastic_depth_drop_rate: `float` drop rate of the current block group.
name: `str`name for the block.
inputs: A `tf.Tensor` of size `[batch, channels, height, width]`.
filters: An `int` number of filters for the first convolution of the
layer.
strides: An `int` stride to use for the first convolution of the layer.
If greater than 1, this layer will downsample the input.
block_fn: The type of block group. Either `nn_blocks.ResidualBlock` or
`nn_blocks.BottleneckBlock`.
block_repeats: An `int` number of blocks contained in the layer.
stochastic_depth_drop_rate: A `float` of drop rate of the current block
group.
name: A `str` name for the block.
Returns:
The output `Tensor` of the block layer.
The output `
tf.
Tensor` of the block layer.
"""
x
=
block_fn
(
filters
=
filters
,
...
...
official/vision/beta/modeling/backbones/resnet_3d.py
View file @
bb124157
...
...
@@ -41,7 +41,7 @@ RESNET_SPECS = {
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
ResNet3D
(
tf
.
keras
.
Model
):
"""C
lass to build
3D ResNet family model."""
"""C
reates a
3D ResNet family model."""
def
__init__
(
self
,
model_id
:
int
,
...
...
@@ -60,32 +60,33 @@ class ResNet3D(tf.keras.Model):
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
**
kwargs
):
"""
ResNet3D initialization function
.
"""
Initializes a 3D ResNet model
.
Args:
model_id: `int` depth of ResNet backbone model.
temporal_strides:
a
list of integers that specifies the temporal strides
model_id:
An
`int`
of
depth of ResNet backbone model.
temporal_strides:
A
list of integers that specifies the temporal strides
for all 3d blocks.
temporal_kernel_sizes:
a
list of tuples that specifies the temporal kernel
temporal_kernel_sizes:
A
list of tuples that specifies the temporal kernel
sizes for all 3d blocks in different block groups.
use_self_gating:
a
list of booleans to specify applying self-gating module
use_self_gating:
A
list of booleans to specify applying self-gating module
or not in each block group. If None, self-gating is not applied.
input_specs: `tf.keras.layers.InputSpec` specs of the input tensor.
stem_conv_temporal_kernel_size: `int` temporal kernel size for the first
conv layer.
stem_conv_temporal_stride: `int` temporal stride for the first conv layer.
stem_pool_temporal_stride: `int` temporal stride for the first pool layer.
activation: `str` name of the activation function.
use_sync_bn: if True, use synchronized batch normalization.
norm_momentum: `float` normalization omentum for the moving average.
norm_epsilon: `float` small float added to variance to avoid dividing by
zero.
kernel_initializer: kernel_initializer for convolutional layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
input_specs: A `tf.keras.layers.InputSpec` of the input tensor.
stem_conv_temporal_kernel_size: An `int` of temporal kernel size for the
first conv layer.
stem_conv_temporal_stride: An `int` of temporal stride for the first conv
layer.
stem_pool_temporal_stride: An `int` of temporal stride for the first pool
layer.
activation: A `str` of name of the activation function.
use_sync_bn: If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_initializer: A str for kernel initializer of convolutional layers.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default to None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
Default to None.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
Default to None.
**kwargs: keyword arguments to be passed.
**kwargs: Additional keyword arguments to be passed.
"""
self
.
_model_id
=
model_id
self
.
_temporal_strides
=
temporal_strides
...
...
@@ -181,21 +182,23 @@ class ResNet3D(tf.keras.Model):
"""Creates one group of blocks for the ResNet3D model.
Args:
inputs: `Tensor` of size `[batch, channels, height, width]`.
filters: `int` number of filters for the first convolution of the layer.
temporal_kernel_sizes: a tuple that specifies the temporal kernel sizes
inputs: A `tf.Tensor` of size `[batch, channels, height, width]`.
filters: An `int` of number of filters for the first convolution of the
layer.
temporal_kernel_sizes: A tuple that specifies the temporal kernel sizes
for each block in the current group.
temporal_strides: `int` temporal strides for the first convolution
in this
group.
spatial_strides: `int` stride to use for the first convolution of the
temporal_strides:
An
`int`
of
temporal strides for the first convolution
in this
group.
spatial_strides:
An
`int` stride to use for the first convolution of the
layer. If greater than 1, this layer will downsample the input.
block_fn: Either `nn_blocks.ResidualBlock` or `nn_blocks.BottleneckBlock`.
block_repeats: `int` number of blocks contained in the layer.
use_self_gating: `bool` apply self-gating module or not.
name: `str`name for the block.
block_repeats: An `int` of number of blocks contained in the layer.
use_self_gating: A `bool` that specifies whether to apply self-gating
module or not.
name: A `str` name for the block.
Returns:
The output `Tensor` of the block layer.
The output `
tf.
Tensor` of the block layer.
"""
if
len
(
temporal_kernel_sizes
)
!=
block_repeats
:
raise
ValueError
(
...
...
official/vision/beta/modeling/backbones/resnet_deeplab.py
View file @
bb124157
...
...
@@ -45,12 +45,12 @@ RESNET_SPECS = {
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
DilatedResNet
(
tf
.
keras
.
Model
):
"""C
lass to build
ResNet model with Deeplabv3 modifications.
"""C
reates a
ResNet model with Deeplabv3 modifications.
This backbone is suitable for semantic segmentation.
It was proposed in:
[1]
Liang-Chieh Chen, George Papandreou, Florian Schroff, Hartwig Adam
This backbone is suitable for semantic segmentation.
This implements
Liang-Chieh Chen, George Papandreou, Florian Schroff, Hartwig Adam
.
Rethinking Atrous Convolution for Semantic Image Segmentation.
ar
X
iv
:
1706.05587
(https://
ar
x
iv
.org/pdf/
1706.05587
)
"""
def
__init__
(
self
,
...
...
@@ -70,30 +70,31 @@ class DilatedResNet(tf.keras.Model):
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
**
kwargs
):
"""
ResNet with DeepLab modification initialization func
tion.
"""
Initializes a ResNet model with DeepLab modifica
tion.
Args:
model_id: `int` depth of ResNet backbone model.
output_stride: `int` output stride, ratio of input to output resolution.
input_specs: `tf.keras.layers.InputSpec` specs of the input tensor.
stem_type: `standard` or `deeplab`, deeplab replaces 7x7 conv by 3 3x3
convs.
se_ratio: `float` or None. Ratio of the Squeeze-and-Excitation layer.
init_stochastic_depth_rate: `float` initial stochastic depth rate.
multigrid: `Tuple` of the same length as the number of blocks in the last
model_id: An `int` specifies depth of ResNet backbone model.
output_stride: An `int` of output stride, ratio of input to output
resolution.
input_specs: A `tf.keras.layers.InputSpec` of the input tensor.
stem_type: A `str` of stem type. Can be `standard` or `deeplab`. `deeplab`
replaces 7x7 conv by 3 3x3 convs.
se_ratio: A `float` or None. Ratio of the Squeeze-and-Excitation layer.
init_stochastic_depth_rate: A `float` of initial stochastic depth rate.
multigrid: A tuple of the same length as the number of blocks in the last
resnet stage.
last_stage_repeats: `int`, how many times last stage is repeated.
activation: `str` name of the activation function.
use_sync_bn: if True, use synchronized batch normalization.
norm_momentum: `float` normalization omentum for the moving average.
norm_epsilon: `float` small float added to variance to avoid dividing by
zero.
kernel_initializer: kernel_initializer for convolutional layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
last_stage_repeats: An `int` that specifies how many times last stage is
repeated.
activation: A `str` name of the activation function.
use_sync_bn: If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_initializer: A str for kernel initializer of convolutional layers.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default to None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
Default to None.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
Default to None.
**kwargs: keyword arguments to be passed.
**kwargs: Additional keyword arguments to be passed.
"""
self
.
_model_id
=
model_id
self
.
_output_stride
=
output_stride
...
...
@@ -247,20 +248,22 @@ class DilatedResNet(tf.keras.Model):
Deeplab applies strides at the last block.
Args:
inputs: `Tensor` of size `[batch, channels, height, width]`.
filters: `int` number of filters for the first convolution of the layer.
strides: `int` stride to use for the first convolution of the layer. If
greater than 1, this layer will downsample the input.
dilation_rate: `int`, diluted convolution rates.
inputs: A `tf.Tensor` of size `[batch, channels, height, width]`.
filters: An `int` off number of filters for the first convolution of the
layer.
strides: An `int` of stride to use for the first convolution of the layer.
If greater than 1, this layer will downsample the input.
dilation_rate: An `int` of diluted convolution rates.
block_fn: Either `nn_blocks.ResidualBlock` or `nn_blocks.BottleneckBlock`.
block_repeats: `int` number of blocks contained in the layer.
stochastic_depth_drop_rate: `float` drop rate of the current block group.
multigrid: List of ints or None, if specified, dilation rates for each
block_repeats: An `int` of number of blocks contained in the layer.
stochastic_depth_drop_rate: A `float` of drop rate of the current block
group.
multigrid: A list of `int` or None. If specified, dilation rates for each
block is scaled up by its corresponding factor in the multigrid.
name: `str`name for the block.
name:
A
`str`
name for the block.
Returns:
The output `Tensor` of the block layer.
The output `
tf.
Tensor` of the block layer.
"""
if
multigrid
is
not
None
and
len
(
multigrid
)
!=
block_repeats
:
raise
ValueError
(
'multigrid has to match number of block_repeats'
)
...
...
official/vision/beta/modeling/backbones/revnet.py
View file @
bb124157
...
...
@@ -13,12 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# =============================================================================="""
"""RevNet Implementation.
[1] Aidan N. Gomez, Mengye Ren, Raquel Urtasun, Roger B. Grosse
The Reversible Residual Network: Backpropagation Without Storing Activations
https://arxiv.org/pdf/1707.04585.pdf
"""
"""Contains definitions of RevNet."""
from
typing
import
Any
,
Callable
,
Dict
,
Optional
# Import libraries
...
...
@@ -55,7 +50,14 @@ REVNET_SPECS = {
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
RevNet
(
tf
.
keras
.
Model
):
"""Reversible ResNet, RevNet implementation."""
"""Creates a Reversible ResNet (RevNet) family model.
This implements:
Aidan N. Gomez, Mengye Ren, Raquel Urtasun, Roger B. Grosse.
The Reversible Residual Network: Backpropagation Without Storing
Activations.
(https://arxiv.org/pdf/1707.04585.pdf)
"""
def
__init__
(
self
,
model_id
:
int
,
...
...
@@ -68,19 +70,19 @@ class RevNet(tf.keras.Model):
kernel_initializer
:
str
=
'VarianceScaling'
,
kernel_regularizer
:
tf
.
keras
.
regularizers
.
Regularizer
=
None
,
**
kwargs
):
"""
RevNet initialization function
.
"""
Initializes a RevNet model
.
Args:
model_id: `int` depth/id of ResNet backbone model.
input_specs: `tf.keras.layers.InputSpec`
specs
of the input tensor.
activation: `str` name of the activation function.
use_sync_bn:
`bool` i
f True, use synchronized batch normalization.
norm_momentum: `float` normalization omentum for the moving average.
norm_epsilon: `float`
small float
added to variance to avoid dividing by
zero
.
kernel_
initializer: `str` kernel_initializer for convolutional layers.
kernel_regularizer: `tf.keras.regularizers.Regularizer` for Conv2D
.
**kwargs:
a
dditional keyword arguments to be passed.
model_id:
An
`int`
of
depth/id of ResNet backbone model.
input_specs:
A
`tf.keras.layers.InputSpec` of the input tensor.
activation:
A
`str` name of the activation function.
use_sync_bn:
I
f True, use synchronized batch normalization.
norm_momentum:
A
`float`
of
normalization
m
omentum for the moving average.
norm_epsilon:
A
`float` added to variance to avoid dividing by
zero.
kernel_initializer: A str for kernel initializer of convolutional layers
.
kernel_
regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default to None
.
**kwargs:
A
dditional keyword arguments to be passed.
"""
self
.
_model_id
=
model_id
self
.
_input_specs
=
input_specs
...
...
@@ -148,19 +150,21 @@ class RevNet(tf.keras.Model):
"""Creates one reversible block for RevNet model.
Args:
inputs: `Tensor` of size `[batch, channels, height, width]`.
filters: `int` number of filters for the first convolution of the layer.
strides: `int` stride to use for the first convolution of the layer. If
inputs: A `tf.Tensor` of size `[batch, channels, height, width]`.
filters: An `int` number of filters for the first convolution of the
layer.
strides: An `int` stride to use for the first convolution of the layer. If
greater than 1, this block group will downsample the input.
inner_block_fn: Either `nn_blocks.ResidualInner` or
`nn_blocks.BottleneckResidualInner`.
block_repeats: `int` number of blocks contained in this block group.
batch_norm_first: `bool` whether to apply BatchNormalization and
activation layer before feeding into convolution layers.
name: `str`name for the block.
block_repeats: An `int` number of blocks contained in this block group.
batch_norm_first: A `bool` that specifies whether to apply
BatchNormalization and activation layer before feeding into convolution
layers.
name: A `str` name for the block.
Returns:
The output `Tensor` of the block layer.
The output `
tf.
Tensor` of the block layer.
"""
x
=
inputs
for
i
in
range
(
block_repeats
):
...
...
@@ -210,7 +214,7 @@ def build_revnet(
input_specs
:
tf
.
keras
.
layers
.
InputSpec
,
model_config
,
l2_regularizer
:
tf
.
keras
.
regularizers
.
Regularizer
=
None
)
->
tf
.
keras
.
Model
:
"""Builds Re
s
Net
3d
backbone from a config."""
"""Builds Re
v
Net backbone from a config."""
backbone_type
=
model_config
.
backbone
.
type
backbone_cfg
=
model_config
.
backbone
.
get
()
norm_activation_config
=
model_config
.
norm_activation
...
...
official/vision/beta/modeling/backbones/spinenet.py
View file @
bb124157
...
...
@@ -13,12 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Implementation of SpineNet model.
X. Du, T-Y. Lin, P. Jin, G. Ghiasi, M. Tan, Y. Cui, Q. V. Le, X. Song
SpineNet: Learning Scale-Permuted Backbone for Recognition and Localization
https://arxiv.org/abs/1912.05027
"""
"""Contains definitions of SpineNet Networks."""
import
math
# Import libraries
...
...
@@ -117,7 +112,14 @@ def build_block_specs(block_specs=None):
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
SpineNet
(
tf
.
keras
.
Model
):
"""Class to build SpineNet models."""
"""Creates a SpineNet family model.
This implements:
Xianzhi Du, Tsung-Yi Lin, Pengchong Jin, Golnaz Ghiasi, Mingxing Tan,
Yin Cui, Quoc V. Le, Xiaodan Song.
SpineNet: Learning Scale-Permuted Backbone for Recognition and Localization.
(https://arxiv.org/abs/1912.05027)
"""
def
__init__
(
self
,
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
640
,
640
,
3
]),
...
...
@@ -137,7 +139,34 @@ class SpineNet(tf.keras.Model):
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
**
kwargs
):
"""SpineNet model."""
"""Initializes a SpineNet model.
Args:
input_specs: A `tf.keras.layers.InputSpec` of the input tensor.
min_level: An `int` of min level for output mutiscale features.
max_level: An `int` of max level for output mutiscale features.
block_specs: The block specifications for the SpineNet model discovered by
NAS.
endpoints_num_filters: An `int` of feature dimension for the output
endpoints.
resample_alpha: A `float` of resampling factor in cross-scale connections.
block_repeats: An `int` of number of blocks contained in the layer.
filter_size_scale: A `float` of multiplier for the filters (number of
channels) for all convolution ops. The value must be greater than zero.
Typical usage will be to set this value in (0, 1) to reduce the number
of parameters or computation cost of the model.
init_stochastic_depth_rate: A `float` of initial stochastic depth rate.
kernel_initializer: A str for kernel initializer of convolutional layers.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default to None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
Default to None.
activation: A `str` name of the activation function.
use_sync_bn: If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A small `float` added to variance to avoid dividing by zero.
**kwargs: Additional keyword arguments to be passed.
"""
self
.
_input_specs
=
input_specs
self
.
_min_level
=
min_level
self
.
_max_level
=
max_level
...
...
@@ -235,7 +264,7 @@ class SpineNet(tf.keras.Model):
return
tf
.
identity
(
x
,
name
=
name
)
def
_build_stem
(
self
,
inputs
):
"""Build SpineNet stem."""
"""Build
s
SpineNet stem."""
x
=
layers
.
Conv2D
(
filters
=
64
,
kernel_size
=
7
,
...
...
@@ -271,7 +300,7 @@ class SpineNet(tf.keras.Model):
net
,
input_width
,
weighted_fusion
=
False
):
"""Build scale-permuted network."""
"""Build
s
scale-permuted network."""
net_sizes
=
[
int
(
math
.
ceil
(
input_width
/
2
**
2
))]
*
len
(
net
)
net_block_fns
=
[
self
.
_init_block_fn
]
*
len
(
net
)
num_outgoing_connections
=
[
0
]
*
len
(
net
)
...
...
@@ -363,7 +392,7 @@ class SpineNet(tf.keras.Model):
return
endpoints
def
_build_endpoints
(
self
,
net
):
"""Match filter size for endpoints before sharing conv layers."""
"""Match
es
filter size for endpoints before sharing conv layers."""
endpoints
=
{}
for
level
in
range
(
self
.
_min_level
,
self
.
_max_level
+
1
):
x
=
layers
.
Conv2D
(
...
...
@@ -392,7 +421,7 @@ class SpineNet(tf.keras.Model):
target_num_filters
,
target_block_fn
,
alpha
=
0.5
):
"""Match resolution and feature dimension."""
"""Match
es
resolution and feature dimension."""
_
,
_
,
_
,
input_num_filters
=
inputs
.
get_shape
().
as_list
()
if
input_block_fn
==
'bottleneck'
:
input_num_filters
/=
4
...
...
@@ -493,7 +522,7 @@ def build_spinenet(
input_specs
:
tf
.
keras
.
layers
.
InputSpec
,
model_config
,
l2_regularizer
:
tf
.
keras
.
regularizers
.
Regularizer
=
None
)
->
tf
.
keras
.
Model
:
"""Builds
ResNet 3d
backbone from a config."""
"""Builds
SpineNet
backbone from a config."""
backbone_type
=
model_config
.
backbone
.
type
backbone_cfg
=
model_config
.
backbone
.
get
()
norm_activation_config
=
model_config
.
norm_activation
...
...
official/vision/beta/modeling/layers/box_sampler.py
View file @
bb124157
...
...
@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""
B
ox sampler."""
"""
Contains definitions of b
ox sampler."""
# Import libraries
import
tensorflow
as
tf
...
...
@@ -22,19 +22,19 @@ from official.vision.beta.ops import sampling_ops
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
BoxSampler
(
tf
.
keras
.
layers
.
Layer
):
"""
S
ample positive and negative boxes."""
"""
Creates a BoxSampler to s
ample positive and negative boxes."""
def
__init__
(
self
,
num_samples
=
512
,
foreground_fraction
=
0.25
,
**
kwargs
):
"""Initializes a
ROI
sampler.
"""Initializes a
box
sampler.
Args:
num_samples:
int,
the number of sampled boxes per image.
foreground_fraction: float in [0, 1], what percentage of boxes should
be
sampled from the positive examples.
**kwargs:
other
key
word arguments passed to Layer.
num_samples:
An `int` of
the number of sampled boxes per image.
foreground_fraction:
A `
float
`
in [0, 1], what percentage of boxes should
be
sampled from the positive examples.
**kwargs:
Additional
keyword arguments passed to Layer.
"""
self
.
_config_dict
=
{
'num_samples'
:
num_samples
,
...
...
@@ -43,22 +43,22 @@ class BoxSampler(tf.keras.layers.Layer):
super
(
BoxSampler
,
self
).
__init__
(
**
kwargs
)
def
call
(
self
,
positive_matches
,
negative_matches
,
ignored_matches
):
"""Sample and select positive and negative instances.
"""Sample
s
and select
s
positive and negative instances.
Args:
positive_matches:
a
`bool` tensor of shape of [batch, N] where N is the
positive_matches:
A
`bool` tensor of shape of [batch, N] where N is the
number of instances. For each element, `True` means the instance
corresponds to a positive example.
negative_matches:
a
`bool` tensor of shape of [batch, N] where N is the
negative_matches:
A
`bool` tensor of shape of [batch, N] where N is the
number of instances. For each element, `True` means the instance
corresponds to a negative example.
ignored_matches:
a
`bool` tensor of shape of [batch, N] where N is the
number of instances. For each element, `True` means the instance
should
be ignored.
ignored_matches:
A
`bool` tensor of shape of [batch, N] where N is the
number of instances. For each element, `True` means the instance
should
be ignored.
Returns:
selected_indices: a
tensor of shape of [batch_size, K], storing the
indices of the
sampled examples, where K is `num_samples`.
A `tf.
tensor
`
of shape of [batch_size, K], storing the
indices of the
sampled examples, where K is `num_samples`.
"""
sample_candidates
=
tf
.
logical_and
(
tf
.
logical_or
(
positive_matches
,
negative_matches
),
...
...
official/vision/beta/modeling/layers/detection_generator.py
View file @
bb124157
...
...
@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""
G
enerators to generate the final detections."""
"""
Contains definitions of g
enerators to generate the final detections."""
# Import libraries
...
...
@@ -28,39 +28,41 @@ def _generate_detections_v1(boxes,
pre_nms_score_threshold
=
0.05
,
nms_iou_threshold
=
0.5
,
max_num_detections
=
100
):
"""Generate the final detections given the model outputs.
"""Generate
s
the final detections given the model outputs.
The implementation unrolls the batch dimension and process images one by one.
It required the batch dimension to be statically known and it is TPU
compatible.
Args:
boxes:
a t
ensor with shape [batch_size, N, num_classes, 4] or
[batch_size, N, 1, 4], which box predictions on all feature levels. The
N
is the number of total anchors on all levels.
scores:
a t
ensor with shape [batch_size, N, num_classes], which
boxes:
A `tf.T
ensor
`
with shape
`
[batch_size, N, num_classes, 4]
`
or
`
[batch_size, N, 1, 4]
`
, which box predictions on all feature levels. The
N
is the number of total anchors on all levels.
scores:
A `tf.T
ensor
`
with shape
`
[batch_size, N, num_classes]
`
, which
stacks class probability on all feature levels. The N is the number of
total anchors on all levels. The num_classes is the number of classes
predicted by the model. Note that the class_outputs here is the raw score.
pre_nms_top_k:
a
n int number of top candidate detections per class
before
NMS.
pre_nms_score_threshold:
a
float representing the threshold for deciding
pre_nms_top_k:
A
n
`
int
`
number of top candidate detections per class
before
NMS.
pre_nms_score_threshold:
A `
float
`
representing the threshold for deciding
when to remove boxes based on score.
nms_iou_threshold:
a
float representing the threshold for deciding whether
nms_iou_threshold:
A `
float
`
representing the threshold for deciding whether
boxes overlap too much with respect to IOU.
max_num_detections:
a
scalar representing maximum number of boxes retained
max_num_detections:
A
scalar representing maximum number of boxes retained
over all classes.
Returns:
nms_boxes: `float` Tensor of shape [batch_size, max_num_detections, 4]
representing top detected boxes in [y1, x1, y2, x2].
nms_scores: `float` Tensor of shape [batch_size, max_num_detections]
representing sorted confidence scores for detected boxes. The values are
between [0, 1].
nms_classes: `int` Tensor of shape [batch_size, max_num_detections]
representing classes for detected boxes.
valid_detections: `int` Tensor of shape [batch_size] only the top
`valid_detections` boxes are valid detections.
nms_boxes: A `float` type `tf.Tensor` of shape
`[batch_size, max_num_detections, 4]` representing top detected boxes in
`[y1, x1, y2, x2]`.
nms_scores: A `float` type `tf.Tensor` of shape
`[batch_size, max_num_detections]` representing sorted confidence scores
for detected boxes. The values are between `[0, 1]`.
nms_classes: An `int` type `tf.Tensor` of shape
`[batch_size, max_num_detections]` representing classes for detected
boxes.
valid_detections: An `int` type `tf.Tensor` of shape `[batch_size]` only the
top `valid_detections` boxes are valid detections.
"""
with
tf
.
name_scope
(
'generate_detections'
):
batch_size
=
scores
.
get_shape
().
as_list
()[
0
]
...
...
@@ -94,34 +96,35 @@ def _generate_detections_per_image(boxes,
pre_nms_score_threshold
=
0.05
,
nms_iou_threshold
=
0.5
,
max_num_detections
=
100
):
"""Generate the final detections per image given the model outputs.
"""Generate
s
the final detections per image given the model outputs.
Args:
boxes:
a t
ensor with shape [N, num_classes, 4] or [N, 1, 4], which
box
predictions on all feature levels. The N is the number of total
anchors on
all levels.
scores:
a t
ensor with shape [N, num_classes], which stacks class
probability
on all feature levels. The N is the number of total anchors on
all levels.
The num_classes is the number of classes predicted by the
model. Note that
the class_outputs here is the raw score.
pre_nms_top_k:
a
n int number of top candidate detections per class
before
NMS.
pre_nms_score_threshold:
a
float representing the threshold for deciding
boxes:
A `tf.T
ensor
`
with shape
`
[N, num_classes, 4]
`
or
`
[N, 1, 4]
`
, which
box
predictions on all feature levels. The N is the number of total
anchors on
all levels.
scores:
A `tf.T
ensor
`
with shape
`
[N, num_classes]
`
, which stacks class
probability
on all feature levels. The N is the number of total anchors on
all levels.
The num_classes is the number of classes predicted by the
model. Note that
the class_outputs here is the raw score.
pre_nms_top_k:
A
n
`
int
`
number of top candidate detections per class
before
NMS.
pre_nms_score_threshold:
A `
float
`
representing the threshold for deciding
when to remove boxes based on score.
nms_iou_threshold:
a
float representing the threshold for deciding whether
nms_iou_threshold:
A `
float
`
representing the threshold for deciding whether
boxes overlap too much with respect to IOU.
max_num_detections:
a
scalar representing maximum number of boxes retained
max_num_detections:
A `
scalar
`
representing maximum number of boxes retained
over all classes.
Returns:
nms_boxes: `float` Tensor of shape [max_num_detections, 4] representing top
detected boxes in [y1, x1, y2, x2].
nms_scores: `float` Tensor of shape [max_num_detections] representing sorted
confidence scores for detected boxes. The values are between [0, 1].
nms_classes: `int` Tensor of shape [max_num_detections] representing classes
for detected boxes.
valid_detections: `int` Tensor of shape [1] only the top `valid_detections`
boxes are valid detections.
nms_boxes: A `float` tf.Tensor of shape `[max_num_detections, 4]`
representing top detected boxes in `[y1, x1, y2, x2]`.
nms_scores: A `float` tf.Tensor of shape `[max_num_detections]` representing
sorted confidence scores for detected boxes. The values are between [0,
1].
nms_classes: An `int` tf.Tensor of shape `[max_num_detections]` representing
classes for detected boxes.
valid_detections: An `int` tf.Tensor of shape [1] only the top
`valid_detections` boxes are valid detections.
"""
nmsed_boxes
=
[]
nmsed_scores
=
[]
...
...
@@ -171,18 +174,18 @@ def _generate_detections_per_image(boxes,
def
_select_top_k_scores
(
scores_in
,
pre_nms_num_detections
):
"""Select top_k scores and indices for each class.
"""Select
s
top_k scores and indices for each class.
Args:
scores_in:
a
Tensor with shape [batch_size, N, num_classes], which
stacks
class logit outputs on all feature levels. The N is the number of
total
anchors on all levels. The num_classes is the number of classes
predicted
by the model.
scores_in:
A `tf.
Tensor
`
with shape
`
[batch_size, N, num_classes]
`
, which
stacks
class logit outputs on all feature levels. The N is the number of
total
anchors on all levels. The num_classes is the number of classes
predicted
by the model.
pre_nms_num_detections: Number of candidates before NMS.
Returns:
scores and indices: Tensor
s
with shape
[batch_size, pre_nms_num_detections,
num_classes].
scores and indices:
A `tf.
Tensor
`
with shape
`[batch_size, pre_nms_num_detections,
num_classes]
`
.
"""
batch_size
,
num_anchors
,
num_class
=
scores_in
.
get_shape
().
as_list
()
scores_trans
=
tf
.
transpose
(
scores_in
,
perm
=
[
0
,
2
,
1
])
...
...
@@ -206,7 +209,7 @@ def _generate_detections_v2(boxes,
pre_nms_score_threshold
=
0.05
,
nms_iou_threshold
=
0.5
,
max_num_detections
=
100
):
"""Generate the final detections given the model outputs.
"""Generate
s
the final detections given the model outputs.
This implementation unrolls classes dimension while using the tf.while_loop
to implement the batched NMS, so that it can be parallelized at the batch
...
...
@@ -214,31 +217,31 @@ def _generate_detections_v2(boxes,
It is TPU compatible.
Args:
boxes:
a t
ensor with shape [batch_size, N, num_classes, 4] or
[batch_size,
N, 1, 4], which box predictions on all feature levels. The
N is the number
of total anchors on all levels.
scores:
a t
ensor with shape [batch_size, N, num_classes], which
stacks class
probability on all feature levels. The N is the number of
total anchors on
all levels. The num_classes is the number of classes
predicted by the
model. Note that the class_outputs here is the raw score.
pre_nms_top_k:
a
n int number of top candidate detections per class
before
NMS.
pre_nms_score_threshold:
a
float representing the threshold for deciding
boxes:
A `tf.T
ensor
`
with shape
`
[batch_size, N, num_classes, 4]
`
or
`[batch_size,
N, 1, 4]
`
, which box predictions on all feature levels. The
N is the number
of total anchors on all levels.
scores:
A `tf.T
ensor
`
with shape
`
[batch_size, N, num_classes]
`
, which
stacks class
probability on all feature levels. The N is the number of
total anchors on
all levels. The num_classes is the number of classes
predicted by the
model. Note that the class_outputs here is the raw score.
pre_nms_top_k:
A
n
`
int
`
number of top candidate detections per class
before
NMS.
pre_nms_score_threshold:
A `
float
`
representing the threshold for deciding
when to remove boxes based on score.
nms_iou_threshold:
a
float representing the threshold for deciding whether
nms_iou_threshold:
A `
float
`
representing the threshold for deciding whether
boxes overlap too much with respect to IOU.
max_num_detections:
a
scalar representing maximum number of boxes retained
max_num_detections:
A `
scalar
`
representing maximum number of boxes retained
over all classes.
Returns:
nms_boxes: `float` Tensor of shape [batch_size, max_num_detections, 4]
nms_boxes:
A
`float`
tf.
Tensor of shape [batch_size, max_num_detections, 4]
representing top detected boxes in [y1, x1, y2, x2].
nms_scores: `float` Tensor of shape [batch_size, max_num_detections]
nms_scores:
A
`float`
tf.
Tensor of shape [batch_size, max_num_detections]
representing sorted confidence scores for detected boxes. The values are
between [0, 1].
nms_classes: `int` Tensor of shape [batch_size, max_num_detections]
nms_classes:
An
`int`
tf.
Tensor of shape [batch_size, max_num_detections]
representing classes for detected boxes.
valid_detections: `int` Tensor of shape [batch_size] only the top
valid_detections:
An
`int`
tf.
Tensor of shape [batch_size] only the top
`valid_detections` boxes are valid detections.
"""
with
tf
.
name_scope
(
'generate_detections'
):
...
...
@@ -294,29 +297,29 @@ def _generate_detections_batched(boxes,
supported on TPU currently.
Args:
boxes:
a t
ensor with shape [batch_size, N, num_classes, 4] or
[batch_size, N, 1, 4], which box predictions on all feature levels. The
N
is the number of total anchors on all levels.
scores:
a t
ensor with shape [batch_size, N, num_classes], which
boxes:
A `tf.T
ensor
`
with shape
`
[batch_size, N, num_classes, 4]
`
or
`
[batch_size, N, 1, 4]
`
, which box predictions on all feature levels. The
N
is the number of total anchors on all levels.
scores:
A `tf.T
ensor
`
with shape
`
[batch_size, N, num_classes]
`
, which
stacks class probability on all feature levels. The N is the number of
total anchors on all levels. The num_classes is the number of classes
predicted by the model. Note that the class_outputs here is the raw score.
pre_nms_score_threshold:
a
float representing the threshold for deciding
pre_nms_score_threshold:
A `
float
`
representing the threshold for deciding
when to remove boxes based on score.
nms_iou_threshold:
a
float representing the threshold for deciding whether
nms_iou_threshold:
A `
float
`
representing the threshold for deciding whether
boxes overlap too much with respect to IOU.
max_num_detections:
a
scalar representing maximum number of boxes retained
max_num_detections:
A `
scalar
`
representing maximum number of boxes retained
over all classes.
Returns:
nms_boxes: `float` Tensor of shape [batch_size, max_num_detections, 4]
nms_boxes:
A
`float`
tf.
Tensor of shape [batch_size, max_num_detections, 4]
representing top detected boxes in [y1, x1, y2, x2].
nms_scores: `float` Tensor of shape [batch_size, max_num_detections]
nms_scores:
A
`float`
tf.
Tensor of shape [batch_size, max_num_detections]
representing sorted confidence scores for detected boxes. The values are
between [0, 1].
nms_classes: `int` Tensor of shape [batch_size, max_num_detections]
nms_classes:
An
`int`
tf.
Tensor of shape [batch_size, max_num_detections]
representing classes for detected boxes.
valid_detections: `int` Tensor of shape [batch_size] only the top
valid_detections:
An
`int`
tf.
Tensor of shape [batch_size] only the top
`valid_detections` boxes are valid detections.
"""
with
tf
.
name_scope
(
'generate_detections'
):
...
...
@@ -348,18 +351,19 @@ class DetectionGenerator(tf.keras.layers.Layer):
"""Initializes a detection generator.
Args:
apply_nms: bool
,
whether or not apply non maximum suppression.
If False,
the decoded boxes and their scores are returned.
pre_nms_top_k:
int,
the number of top scores proposals to be kept
before
applying NMS.
pre_nms_score_threshold: float
,
the score threshold to apply before
apply_nms:
A `
bool
` of
whether or not apply non maximum suppression.
If False,
the decoded boxes and their scores are returned.
pre_nms_top_k:
An `int` of
the number of top scores proposals to be kept
before
applying NMS.
pre_nms_score_threshold:
A `
float
` of
the score threshold to apply before
applying NMS. Proposals whose scores are below this threshold are
thrown away.
nms_iou_threshold: float in [0, 1], the NMS IoU threshold.
max_num_detections: int, the final number of total detections to generate.
use_batched_nms: bool, whether or not use
nms_iou_threshold: A `float` in [0, 1], the NMS IoU threshold.
max_num_detections: An `int` of the final number of total detections to
generate.
use_batched_nms: A `bool` of whether or not use
`tf.image.combined_non_max_suppression`.
**kwargs:
other
key
word arguments passed to Layer.
**kwargs:
Additional
keyword arguments passed to Layer.
"""
self
.
_config_dict
=
{
'apply_nms'
:
apply_nms
,
...
...
@@ -376,35 +380,36 @@ class DetectionGenerator(tf.keras.layers.Layer):
raw_scores
,
anchor_boxes
,
image_shape
):
"""Generate final detections.
"""Generate
s
final detections.
Args:
raw_boxes:
a t
ensor of shape of [batch_size, K, num_classes * 4]
raw_boxes:
A `tf.T
ensor
`
of shape of
`
[batch_size, K, num_classes * 4]
`
representing the class-specific box coordinates relative to anchors.
raw_scores:
a t
ensor of shape of [batch_size, K, num_classes]
raw_scores:
A `tf.T
ensor
`
of shape of
`
[batch_size, K, num_classes]
`
representing the class logits before applying score activiation.
anchor_boxes:
a t
ensor of shape of [batch_size, K, 4] representing
the
corresponding anchor boxes w.r.t `box_outputs`.
image_shape:
a t
ensor of shape of [batch_size, 2] storing the image
height
and width w.r.t. the scaled image, i.e. the same image space as
anchor_boxes:
A `tf.T
ensor
`
of shape of
`
[batch_size, K, 4]
`
representing
the
corresponding anchor boxes w.r.t `box_outputs`.
image_shape:
A `tf.T
ensor
`
of shape of
`
[batch_size, 2]
`
storing the image
height
and width w.r.t. the scaled image, i.e. the same image space as
`box_outputs` and `anchor_boxes`.
Returns:
If `apply_nms` = True, the return is a dictionary with keys:
`detection_boxes`: float Tensor of shape [batch, max_num_detections, 4]
representing top detected boxes in [y1, x1, y2, x2].
`detection_scores`: float Tensor of shape [batch, max_num_detections]
representing sorted confidence scores for detected boxes. The values
are between [0, 1].
`detection_classes`: int Tensor of shape [batch, max_num_detections]
representing classes for detected boxes.
`num_detections`: int Tensor of shape [batch] only the first
`detection_boxes`: A `float` tf.Tensor of shape
[batch, max_num_detections, 4] representing top detected boxes in
[y1, x1, y2, x2].
`detection_scores`: A `float` `tf.Tensor` of shape
[batch, max_num_detections] representing sorted confidence scores for
detected boxes. The values are between [0, 1].
`detection_classes`: An `int` tf.Tensor of shape
[batch, max_num_detections] representing classes for detected boxes.
`num_detections`: An `int` tf.Tensor of shape [batch] only the first
`num_detections` boxes are valid detections
If `apply_nms` = False, the return is a dictionary with keys:
`decoded_boxes`: float
Tensor of shape [batch, num_raw_boxes, 4]
`decoded_boxes`:
A `
float
` tf.
Tensor of shape [batch, num_raw_boxes, 4]
representing all the decoded boxes.
`decoded_box_scores`: float
Tensor of shape
[batch, num_raw_boxes]
representing socres of all the decoded boxes.
`decoded_box_scores`:
A `
float
` tf.
Tensor of shape
[batch, num_raw_boxes]
representing socres of all the decoded boxes.
"""
box_scores
=
tf
.
nn
.
softmax
(
raw_scores
,
axis
=-
1
)
...
...
@@ -496,21 +501,22 @@ class MultilevelDetectionGenerator(tf.keras.layers.Layer):
max_num_detections
=
100
,
use_batched_nms
=
False
,
**
kwargs
):
"""Initializes a detection generator.
"""Initializes a
multi-level
detection generator.
Args:
apply_nms: bool, whether or not apply non maximum suppression. If False,
the decoded boxes and their scores are returned.
pre_nms_top_k: int, the number of top scores proposals to be kept before
applying NMS.
pre_nms_score_threshold: float, the score threshold to apply before
applying NMS. Proposals whose scores are below this threshold are
thrown away.
nms_iou_threshold: float in [0, 1], the NMS IoU threshold.
max_num_detections: int, the final number of total detections to generate.
use_batched_nms: bool, whether or not use
apply_nms: A `bool` of whether or not apply non maximum suppression. If
False, the decoded boxes and their scores are returned.
pre_nms_top_k: An `int` of the number of top scores proposals to be kept
before applying NMS.
pre_nms_score_threshold: A `float` of the score threshold to apply before
applying NMS. Proposals whose scores are below this threshold are thrown
away.
nms_iou_threshold: A `float` in [0, 1], the NMS IoU threshold.
max_num_detections: An `int` of the final number of total detections to
generate.
use_batched_nms: A `bool` of whether or not use
`tf.image.combined_non_max_suppression`.
**kwargs:
other
key
word arguments passed to Layer.
**kwargs:
Additional
keyword arguments passed to Layer.
"""
self
.
_config_dict
=
{
'apply_nms'
:
apply_nms
,
...
...
@@ -527,37 +533,38 @@ class MultilevelDetectionGenerator(tf.keras.layers.Layer):
raw_scores
,
anchor_boxes
,
image_shape
):
"""Generate final detections.
"""Generate
s
final detections.
Args:
raw_boxes:
a
dict with keys representing FPN levels and values
representing box tenors of shape
[batch, feature_h, feature_w,
num_anchors * 4].
raw_scores:
a
dict with keys representing FPN levels and values
representing logit tensors of shape
[batch, feature_h, feature_w,
num_anchors].
anchor_boxes:
a t
ensor of shape of [batch_size, K, 4] representing
the
corresponding anchor boxes w.r.t `box_outputs`.
image_shape:
a t
ensor of shape of [batch_size, 2] storing the image
height
and width w.r.t. the scaled image, i.e. the same image space as
raw_boxes:
A `
dict
`
with keys representing FPN levels and values
representing box tenors of shape
`[batch, feature_h, feature_w,
num_anchors * 4]
`
.
raw_scores:
A `
dict
`
with keys representing FPN levels and values
representing logit tensors of shape
`[batch, feature_h, feature_w,
num_anchors]
`
.
anchor_boxes:
A `tf.T
ensor
`
of shape of [batch_size, K, 4] representing
the
corresponding anchor boxes w.r.t `box_outputs`.
image_shape:
A `tf.T
ensor
`
of shape of [batch_size, 2] storing the image
height
and width w.r.t. the scaled image, i.e. the same image space as
`box_outputs` and `anchor_boxes`.
Returns:
If `apply_nms` = True, the return is a dictionary with keys:
`detection_boxes`: float Tensor of shape [batch, max_num_detections, 4]
representing top detected boxes in [y1, x1, y2, x2].
`detection_scores`: float Tensor of shape [batch, max_num_detections]
representing sorted confidence scores for detected boxes. The values
are between [0, 1].
`detection_classes`: int Tensor of shape [batch, max_num_detections]
representing classes for detected boxes.
`num_detections`: int Tensor of shape [batch] only the first
`detection_boxes`: A `float` tf.Tensor of shape
[batch, max_num_detections, 4] representing top detected boxes in
[y1, x1, y2, x2].
`detection_scores`: A `float` tf.Tensor of shape
[batch, max_num_detections] representing sorted confidence scores for
detected boxes. The values are between [0, 1].
`detection_classes`: An `int` tf.Tensor of shape
[batch, max_num_detections] representing classes for detected boxes.
`num_detections`: An `int` tf.Tensor of shape [batch] only the first
`num_detections` boxes are valid detections
If `apply_nms` = False, the return is a dictionary with keys:
`decoded_boxes`: float
Tensor of shape [batch, num_raw_boxes, 4]
`decoded_boxes`:
A `
float
` tf.
Tensor of shape [batch, num_raw_boxes, 4]
representing all the decoded boxes.
`decoded_box_scores`: float
Tensor of shape
[batch, num_raw_boxes]
representing socres of all the decoded boxes.
`decoded_box_scores`:
A `
float
` tf.
Tensor of shape
[batch, num_raw_boxes]
representing socres of all the decoded boxes.
"""
# Collects outputs from all levels into a list.
boxes
=
[]
...
...
Prev
1
…
13
14
15
16
17
18
19
20
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment