Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
460890ed
Commit
460890ed
authored
Nov 01, 2021
by
A. Unique TensorFlower
Browse files
Internal change
PiperOrigin-RevId: 406888835
parent
f2bc366e
Changes
39
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1861 additions
and
2 deletions
+1861
-2
official/vision/beta/modeling/backbones/mobilenet.py
official/vision/beta/modeling/backbones/mobilenet.py
+16
-1
official/vision/beta/modeling/layers/nn_blocks.py
official/vision/beta/modeling/layers/nn_blocks.py
+15
-1
official/vision/beta/modeling/layers/nn_layers.py
official/vision/beta/modeling/layers/nn_layers.py
+11
-0
official/vision/beta/projects/centernet/README.md
official/vision/beta/projects/centernet/README.md
+82
-0
official/vision/beta/projects/centernet/common/registry_imports.py
...vision/beta/projects/centernet/common/registry_imports.py
+22
-0
official/vision/beta/projects/centernet/configs/__init__.py
official/vision/beta/projects/centernet/configs/__init__.py
+14
-0
official/vision/beta/projects/centernet/configs/backbones.py
official/vision/beta/projects/centernet/configs/backbones.py
+35
-0
official/vision/beta/projects/centernet/configs/centernet.py
official/vision/beta/projects/centernet/configs/centernet.py
+226
-0
official/vision/beta/projects/centernet/configs/centernet_test.py
.../vision/beta/projects/centernet/configs/centernet_test.py
+41
-0
official/vision/beta/projects/centernet/configs/experiments/coco-centernet-hourglass-gpu.yaml
...net/configs/experiments/coco-centernet-hourglass-gpu.yaml
+85
-0
official/vision/beta/projects/centernet/configs/experiments/coco-centernet-hourglass-tpu.yaml
...net/configs/experiments/coco-centernet-hourglass-tpu.yaml
+84
-0
official/vision/beta/projects/centernet/dataloaders/centernet_input.py
...on/beta/projects/centernet/dataloaders/centernet_input.py
+343
-0
official/vision/beta/projects/centernet/losses/centernet_losses.py
...vision/beta/projects/centernet/losses/centernet_losses.py
+109
-0
official/vision/beta/projects/centernet/losses/centernet_losses_test.py
...n/beta/projects/centernet/losses/centernet_losses_test.py
+126
-0
official/vision/beta/projects/centernet/modeling/backbones/hourglass.py
...n/beta/projects/centernet/modeling/backbones/hourglass.py
+276
-0
official/vision/beta/projects/centernet/modeling/backbones/hourglass_test.py
...a/projects/centernet/modeling/backbones/hourglass_test.py
+42
-0
official/vision/beta/projects/centernet/modeling/centernet_model.py
...ision/beta/projects/centernet/modeling/centernet_model.py
+86
-0
official/vision/beta/projects/centernet/modeling/centernet_model_test.py
.../beta/projects/centernet/modeling/centernet_model_test.py
+72
-0
official/vision/beta/projects/centernet/modeling/heads/centernet_head.py
.../beta/projects/centernet/modeling/heads/centernet_head.py
+107
-0
official/vision/beta/projects/centernet/modeling/heads/centernet_head_test.py
.../projects/centernet/modeling/heads/centernet_head_test.py
+69
-0
No files found.
official/vision/beta/modeling/backbones/mobilenet.py
View file @
460890ed
...
@@ -41,6 +41,7 @@ class Conv2DBNBlock(tf.keras.layers.Layer):
...
@@ -41,6 +41,7 @@ class Conv2DBNBlock(tf.keras.layers.Layer):
kernel_size
:
int
=
3
,
kernel_size
:
int
=
3
,
strides
:
int
=
1
,
strides
:
int
=
1
,
use_bias
:
bool
=
False
,
use_bias
:
bool
=
False
,
use_explicit_padding
:
bool
=
False
,
activation
:
str
=
'relu6'
,
activation
:
str
=
'relu6'
,
kernel_initializer
:
str
=
'VarianceScaling'
,
kernel_initializer
:
str
=
'VarianceScaling'
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
...
@@ -60,6 +61,9 @@ class Conv2DBNBlock(tf.keras.layers.Layer):
...
@@ -60,6 +61,9 @@ class Conv2DBNBlock(tf.keras.layers.Layer):
strides: An `int` of block stride. If greater than 1, this block will
strides: An `int` of block stride. If greater than 1, this block will
ultimately downsample the input.
ultimately downsample the input.
use_bias: If True, use bias in the convolution layer.
use_bias: If True, use bias in the convolution layer.
use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding
were used.
activation: A `str` name of the activation function.
activation: A `str` name of the activation function.
kernel_initializer: A `str` for kernel initializer of convolutional
kernel_initializer: A `str` for kernel initializer of convolutional
layers.
layers.
...
@@ -79,6 +83,7 @@ class Conv2DBNBlock(tf.keras.layers.Layer):
...
@@ -79,6 +83,7 @@ class Conv2DBNBlock(tf.keras.layers.Layer):
self
.
_strides
=
strides
self
.
_strides
=
strides
self
.
_activation
=
activation
self
.
_activation
=
activation
self
.
_use_bias
=
use_bias
self
.
_use_bias
=
use_bias
self
.
_use_explicit_padding
=
use_explicit_padding
self
.
_kernel_initializer
=
kernel_initializer
self
.
_kernel_initializer
=
kernel_initializer
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_bias_regularizer
=
bias_regularizer
self
.
_bias_regularizer
=
bias_regularizer
...
@@ -87,6 +92,10 @@ class Conv2DBNBlock(tf.keras.layers.Layer):
...
@@ -87,6 +92,10 @@ class Conv2DBNBlock(tf.keras.layers.Layer):
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
self
.
_norm_epsilon
=
norm_epsilon
if
use_explicit_padding
and
kernel_size
>
1
:
self
.
_padding
=
'valid'
else
:
self
.
_padding
=
'same'
if
use_sync_bn
:
if
use_sync_bn
:
self
.
_norm
=
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
self
.
_norm
=
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
else
:
else
:
...
@@ -102,6 +111,7 @@ class Conv2DBNBlock(tf.keras.layers.Layer):
...
@@ -102,6 +111,7 @@ class Conv2DBNBlock(tf.keras.layers.Layer):
'strides'
:
self
.
_strides
,
'strides'
:
self
.
_strides
,
'kernel_size'
:
self
.
_kernel_size
,
'kernel_size'
:
self
.
_kernel_size
,
'use_bias'
:
self
.
_use_bias
,
'use_bias'
:
self
.
_use_bias
,
'use_explicit_padding'
:
self
.
_use_explicit_padding
,
'kernel_initializer'
:
self
.
_kernel_initializer
,
'kernel_initializer'
:
self
.
_kernel_initializer
,
'kernel_regularizer'
:
self
.
_kernel_regularizer
,
'kernel_regularizer'
:
self
.
_kernel_regularizer
,
'bias_regularizer'
:
self
.
_bias_regularizer
,
'bias_regularizer'
:
self
.
_bias_regularizer
,
...
@@ -115,11 +125,14 @@ class Conv2DBNBlock(tf.keras.layers.Layer):
...
@@ -115,11 +125,14 @@ class Conv2DBNBlock(tf.keras.layers.Layer):
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
build
(
self
,
input_shape
):
def
build
(
self
,
input_shape
):
if
self
.
_use_explicit_padding
and
self
.
_kernel_size
>
1
:
padding_size
=
nn_layers
.
get_padding_for_kernel_size
(
self
.
_kernel_size
)
self
.
_pad
=
tf
.
keras
.
layers
.
ZeroPadding2D
(
padding_size
)
self
.
_conv0
=
tf
.
keras
.
layers
.
Conv2D
(
self
.
_conv0
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_filters
,
filters
=
self
.
_filters
,
kernel_size
=
self
.
_kernel_size
,
kernel_size
=
self
.
_kernel_size
,
strides
=
self
.
_strides
,
strides
=
self
.
_strides
,
padding
=
'same'
,
padding
=
self
.
_padding
,
use_bias
=
self
.
_use_bias
,
use_bias
=
self
.
_use_bias
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
...
@@ -135,6 +148,8 @@ class Conv2DBNBlock(tf.keras.layers.Layer):
...
@@ -135,6 +148,8 @@ class Conv2DBNBlock(tf.keras.layers.Layer):
super
(
Conv2DBNBlock
,
self
).
build
(
input_shape
)
super
(
Conv2DBNBlock
,
self
).
build
(
input_shape
)
def
call
(
self
,
inputs
,
training
=
None
):
def
call
(
self
,
inputs
,
training
=
None
):
if
self
.
_use_explicit_padding
and
self
.
_kernel_size
>
1
:
inputs
=
self
.
_pad
(
inputs
)
x
=
self
.
_conv0
(
inputs
)
x
=
self
.
_conv0
(
inputs
)
if
self
.
_use_normalization
:
if
self
.
_use_normalization
:
x
=
self
.
_norm0
(
x
)
x
=
self
.
_norm0
(
x
)
...
...
official/vision/beta/modeling/layers/nn_blocks.py
View file @
460890ed
...
@@ -69,6 +69,7 @@ class ResidualBlock(tf.keras.layers.Layer):
...
@@ -69,6 +69,7 @@ class ResidualBlock(tf.keras.layers.Layer):
kernel_regularizer
=
None
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
bias_regularizer
=
None
,
activation
=
'relu'
,
activation
=
'relu'
,
use_explicit_padding
:
bool
=
False
,
use_sync_bn
=
False
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
norm_epsilon
=
0.001
,
...
@@ -97,6 +98,9 @@ class ResidualBlock(tf.keras.layers.Layer):
...
@@ -97,6 +98,9 @@ class ResidualBlock(tf.keras.layers.Layer):
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2d.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2d.
Default to None.
Default to None.
activation: A `str` name of the activation function.
activation: A `str` name of the activation function.
use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding
were used.
use_sync_bn: A `bool`. If True, use synchronized batch normalization.
use_sync_bn: A `bool`. If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
...
@@ -111,6 +115,7 @@ class ResidualBlock(tf.keras.layers.Layer):
...
@@ -111,6 +115,7 @@ class ResidualBlock(tf.keras.layers.Layer):
self
.
_use_projection
=
use_projection
self
.
_use_projection
=
use_projection
self
.
_se_ratio
=
se_ratio
self
.
_se_ratio
=
se_ratio
self
.
_resnetd_shortcut
=
resnetd_shortcut
self
.
_resnetd_shortcut
=
resnetd_shortcut
self
.
_use_explicit_padding
=
use_explicit_padding
self
.
_use_sync_bn
=
use_sync_bn
self
.
_use_sync_bn
=
use_sync_bn
self
.
_activation
=
activation
self
.
_activation
=
activation
self
.
_stochastic_depth_drop_rate
=
stochastic_depth_drop_rate
self
.
_stochastic_depth_drop_rate
=
stochastic_depth_drop_rate
...
@@ -147,11 +152,17 @@ class ResidualBlock(tf.keras.layers.Layer):
...
@@ -147,11 +152,17 @@ class ResidualBlock(tf.keras.layers.Layer):
epsilon
=
self
.
_norm_epsilon
,
epsilon
=
self
.
_norm_epsilon
,
trainable
=
self
.
_bn_trainable
)
trainable
=
self
.
_bn_trainable
)
conv1_padding
=
'same'
# explicit padding here is added for centernet
if
self
.
_use_explicit_padding
:
self
.
_pad
=
tf
.
keras
.
layers
.
ZeroPadding2D
(
padding
=
(
1
,
1
))
conv1_padding
=
'valid'
self
.
_conv1
=
tf
.
keras
.
layers
.
Conv2D
(
self
.
_conv1
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_filters
,
filters
=
self
.
_filters
,
kernel_size
=
3
,
kernel_size
=
3
,
strides
=
self
.
_strides
,
strides
=
self
.
_strides
,
padding
=
'same'
,
padding
=
conv1_padding
,
use_bias
=
False
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
...
@@ -208,6 +219,7 @@ class ResidualBlock(tf.keras.layers.Layer):
...
@@ -208,6 +219,7 @@ class ResidualBlock(tf.keras.layers.Layer):
'kernel_regularizer'
:
self
.
_kernel_regularizer
,
'kernel_regularizer'
:
self
.
_kernel_regularizer
,
'bias_regularizer'
:
self
.
_bias_regularizer
,
'bias_regularizer'
:
self
.
_bias_regularizer
,
'activation'
:
self
.
_activation
,
'activation'
:
self
.
_activation
,
'use_explicit_padding'
:
self
.
_use_explicit_padding
,
'use_sync_bn'
:
self
.
_use_sync_bn
,
'use_sync_bn'
:
self
.
_use_sync_bn
,
'norm_momentum'
:
self
.
_norm_momentum
,
'norm_momentum'
:
self
.
_norm_momentum
,
'norm_epsilon'
:
self
.
_norm_epsilon
,
'norm_epsilon'
:
self
.
_norm_epsilon
,
...
@@ -222,6 +234,8 @@ class ResidualBlock(tf.keras.layers.Layer):
...
@@ -222,6 +234,8 @@ class ResidualBlock(tf.keras.layers.Layer):
shortcut
=
self
.
_shortcut
(
shortcut
)
shortcut
=
self
.
_shortcut
(
shortcut
)
shortcut
=
self
.
_norm0
(
shortcut
)
shortcut
=
self
.
_norm0
(
shortcut
)
if
self
.
_use_explicit_padding
:
inputs
=
self
.
_pad
(
inputs
)
x
=
self
.
_conv1
(
inputs
)
x
=
self
.
_conv1
(
inputs
)
x
=
self
.
_norm1
(
x
)
x
=
self
.
_norm1
(
x
)
x
=
self
.
_activation_fn
(
x
)
x
=
self
.
_activation_fn
(
x
)
...
...
official/vision/beta/modeling/layers/nn_layers.py
View file @
460890ed
...
@@ -69,6 +69,17 @@ def round_filters(filters: int,
...
@@ -69,6 +69,17 @@ def round_filters(filters: int,
return
int
(
new_filters
)
return
int
(
new_filters
)
def
get_padding_for_kernel_size
(
kernel_size
):
"""Compute padding size given kernel size."""
if
kernel_size
==
7
:
return
(
3
,
3
)
elif
kernel_size
==
3
:
return
(
1
,
1
)
else
:
raise
ValueError
(
'Padding for kernel size {} not known.'
.
format
(
kernel_size
))
def
hard_swish
(
x
:
tf
.
Tensor
)
->
tf
.
Tensor
:
def
hard_swish
(
x
:
tf
.
Tensor
)
->
tf
.
Tensor
:
"""A Swish6/H-Swish activation function.
"""A Swish6/H-Swish activation function.
...
...
official/vision/beta/projects/centernet/README.md
0 → 100644
View file @
460890ed
# Centernet
[

](https://arxiv.org/abs/1904.07850)
Centernet builds upon CornerNet, an anchor-free model for object detection.
Many other models, such as YOLO and RetinaNet, use anchor boxes. These anchor
boxes are predefined to be close to the aspect ratios and scales of the objects
in the training dataset. Anchor-based models do not predict the bounding boxes
of objects directly. They instead predict the location and size/shape
refinements to a predefined anchor box. The detection generator then computes
the final confidences, positions, and size of the detection.
CornerNet eliminates the need for anchor boxes. RetinaNet needs thousands of
anchor boxes in order to cover the most common ground truth boxes. This adds
unnecessary complexity to the model which slow down training and create
imbalances in positive and negative anchor boxes. Instead, CornerNet creates
heatmaps for each of the corners and pools them together in order to get the
final detection boxes for the objects. CenterNet removes even more complexity
by using the center instead of the corners, meaning that only one set of
heatmaps (one heatmap for each class) is needed to predict the object. CenterNet
proves that this can be done without a significant difference in accuracy.
## Enviroment setup
The code can be run on multiple GPUs or TPUs with different distribution
strategies. See the TensorFlow distributed training
[
guide
](
https://www.tensorflow.org/guide/distributed_training
)
for an overview
of
`tf.distribute`
.
The code is compatible with TensorFlow 2.5+. See requirements.txt for all
prerequisites, and you can also install them using the following command.
`pip
install -r ./official/requirements.txt`
## Training
To train the model on Coco, try the following command:
```
python3 -m official.vision.beta.projects.centernet.train \
--mode=train_and_eval \
--experiment=centernet_hourglass_coco \
--model_dir={MODEL_DIR} \
--config_file={CONFIG_FILE}
```
## Configurations
In the following table, we report the mAP measured on the
`coco-val2017`
set.
Backbone | Config name | mAP
:--------------- | :-----------------------------------------------| -------:
Hourglass-104 |
`coco-centernet-hourglass-gpu.yaml`
| 40.01
Hourglass-104 |
`coco-centernet-hourglass-tpu.yaml`
| 40.5
**Note:**
`float16`
(
`bfloat16`
for TPU) is used in the provided configurations.
## Cite
[
Centernet
](
https://arxiv.org/abs/1904.07850
)
:
```
@article{Zhou2019ObjectsAP,
title={Objects as Points},
author={Xingyi Zhou and Dequan Wang and Philipp Kr{\"a}henb{\"u}hl},
journal={ArXiv},
year={2019},
volume={abs/1904.07850}
}
```
[
CornerNet
](
https://arxiv.org/abs/1808.01244
)
:
```
@article{Law2019CornerNetDO,
title={CornerNet: Detecting Objects as Paired Keypoints},
author={Hei Law and J. Deng},
journal={International Journal of Computer Vision},
year={2019},
volume={128},
pages={642-656}
}
```
official/vision/beta/projects/centernet/common/registry_imports.py
0 → 100644
View file @
460890ed
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""All necessary imports for registration."""
# pylint: disable=unused-import
from
official.common
import
registry_imports
from
official.vision.beta.projects.centernet.configs
import
centernet
from
official.vision.beta.projects.centernet.modeling
import
centernet_model
from
official.vision.beta.projects.centernet.modeling.backbones
import
hourglass
from
official.vision.beta.projects.centernet.tasks
import
centernet
as
centernet_task
official/vision/beta/projects/centernet/configs/__init__.py
0 → 100644
View file @
460890ed
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
official/vision/beta/projects/centernet/configs/backbones.py
0 → 100644
View file @
460890ed
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Backbones configurations."""
import
dataclasses
from
official.modeling
import
hyperparams
from
official.vision.beta.configs
import
backbones
@
dataclasses
.
dataclass
class
Hourglass
(
hyperparams
.
Config
):
"""Hourglass config."""
model_id
:
int
=
52
input_channel_dims
:
int
=
128
num_hourglasses
:
int
=
2
initial_downsample
:
bool
=
True
activation
:
str
=
'relu'
@
dataclasses
.
dataclass
class
Backbone
(
backbones
.
Backbone
):
hourglass
:
Hourglass
=
Hourglass
()
official/vision/beta/projects/centernet/configs/centernet.py
0 → 100644
View file @
460890ed
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""CenterNet configuration definition."""
import
dataclasses
import
os
from
typing
import
List
,
Optional
,
Tuple
from
official.core
import
exp_factory
from
official.modeling
import
hyperparams
from
official.modeling
import
optimization
from
official.modeling.hyperparams
import
config_definitions
as
cfg
from
official.vision.beta.configs
import
common
from
official.vision.beta.projects.centernet.configs
import
backbones
TfExampleDecoderLabelMap
=
common
.
TfExampleDecoderLabelMap
@
dataclasses
.
dataclass
class
TfExampleDecoder
(
hyperparams
.
Config
):
regenerate_source_id
:
bool
=
False
@
dataclasses
.
dataclass
class
DataDecoder
(
hyperparams
.
OneOfConfig
):
type
:
Optional
[
str
]
=
'simple_decoder'
simple_decoder
:
TfExampleDecoder
=
TfExampleDecoder
()
label_map_decoder
:
TfExampleDecoderLabelMap
=
TfExampleDecoderLabelMap
()
@
dataclasses
.
dataclass
class
Parser
(
hyperparams
.
Config
):
"""Config for parser."""
bgr_ordering
:
bool
=
True
aug_rand_hflip
:
bool
=
True
aug_scale_min
:
float
=
1.0
aug_scale_max
:
float
=
1.0
aug_rand_saturation
:
bool
=
False
aug_rand_brightness
:
bool
=
False
aug_rand_hue
:
bool
=
False
aug_rand_contrast
:
bool
=
False
odapi_augmentation
:
bool
=
False
channel_means
:
Tuple
[
float
,
float
,
float
]
=
dataclasses
.
field
(
default_factory
=
lambda
:
(
104.01362025
,
114.03422265
,
119.9165958
))
channel_stds
:
Tuple
[
float
,
float
,
float
]
=
dataclasses
.
field
(
default_factory
=
lambda
:
(
73.6027665
,
69.89082075
,
70.9150767
))
@
dataclasses
.
dataclass
class
DataConfig
(
cfg
.
DataConfig
):
"""Input config for training."""
input_path
:
str
=
''
global_batch_size
:
int
=
32
is_training
:
bool
=
True
dtype
:
str
=
'float16'
decoder
:
DataDecoder
=
DataDecoder
()
parser
:
Parser
=
Parser
()
shuffle_buffer_size
:
int
=
10000
file_type
:
str
=
'tfrecord'
drop_remainder
:
bool
=
True
@
dataclasses
.
dataclass
class
DetectionLoss
(
hyperparams
.
Config
):
object_center_weight
:
float
=
1.0
offset_weight
:
float
=
1.0
scale_weight
:
float
=
0.1
@
dataclasses
.
dataclass
class
Losses
(
hyperparams
.
Config
):
detection
:
DetectionLoss
=
DetectionLoss
()
gaussian_iou
:
float
=
0.7
class_offset
:
int
=
1
@
dataclasses
.
dataclass
class
CenterNetHead
(
hyperparams
.
Config
):
heatmap_bias
:
float
=
-
2.19
input_levels
:
List
[
str
]
=
dataclasses
.
field
(
default_factory
=
lambda
:
[
'2_0'
,
'2'
])
@
dataclasses
.
dataclass
class
CenterNetDetectionGenerator
(
hyperparams
.
Config
):
max_detections
:
int
=
100
peak_error
:
float
=
1e-6
peak_extract_kernel_size
:
int
=
3
class_offset
:
int
=
1
use_nms
:
bool
=
False
nms_pre_thresh
:
float
=
0.1
nms_thresh
:
float
=
0.4
use_reduction_sum
:
bool
=
True
@
dataclasses
.
dataclass
class
CenterNetModel
(
hyperparams
.
Config
):
"""Config for centernet model."""
num_classes
:
int
=
90
max_num_instances
:
int
=
128
input_size
:
List
[
int
]
=
dataclasses
.
field
(
default_factory
=
list
)
backbone
:
backbones
.
Backbone
=
backbones
.
Backbone
(
type
=
'hourglass'
,
hourglass
=
backbones
.
Hourglass
(
model_id
=
52
))
head
:
CenterNetHead
=
CenterNetHead
()
# pylint: disable=line-too-long
detection_generator
:
CenterNetDetectionGenerator
=
CenterNetDetectionGenerator
()
norm_activation
:
common
.
NormActivation
=
common
.
NormActivation
(
norm_momentum
=
0.1
,
norm_epsilon
=
1e-5
,
use_sync_bn
=
True
)
@
dataclasses
.
dataclass
class
CenterNetDetection
(
hyperparams
.
Config
):
# use_center is the only option implemented currently.
use_centers
:
bool
=
True
@
dataclasses
.
dataclass
class
CenterNetSubTasks
(
hyperparams
.
Config
):
detection
:
CenterNetDetection
=
CenterNetDetection
()
@
dataclasses
.
dataclass
class
CenterNetTask
(
cfg
.
TaskConfig
):
"""Config for centernet task."""
model
:
CenterNetModel
=
CenterNetModel
()
train_data
:
DataConfig
=
DataConfig
(
is_training
=
True
)
validation_data
:
DataConfig
=
DataConfig
(
is_training
=
False
)
subtasks
:
CenterNetSubTasks
=
CenterNetSubTasks
()
losses
:
Losses
=
Losses
()
gradient_clip_norm
:
float
=
10.0
per_category_metrics
:
bool
=
False
weight_decay
:
float
=
5e-4
# Load checkpoints
init_checkpoint
:
Optional
[
str
]
=
None
init_checkpoint_modules
:
str
=
'all'
annotation_file
:
Optional
[
str
]
=
None
def
get_output_length_dict
(
self
):
task_outputs
=
{}
if
self
.
subtasks
.
detection
and
self
.
subtasks
.
detection
.
use_centers
:
task_outputs
.
update
({
'ct_heatmaps'
:
self
.
model
.
num_classes
,
'ct_offset'
:
2
,
'ct_size'
:
2
})
else
:
raise
ValueError
(
'Detection with center point is only available '
)
return
task_outputs
COCO_INPUT_PATH_BASE
=
'coco'
COCO_TRAIN_EXAMPLES
=
118287
COCO_VAL_EXAMPLES
=
5000
@
exp_factory
.
register_config_factory
(
'centernet_hourglass_coco'
)
def
centernet_hourglass_coco
()
->
cfg
.
ExperimentConfig
:
"""COCO object detection with CenterNet."""
train_batch_size
=
128
eval_batch_size
=
8
steps_per_epoch
=
COCO_TRAIN_EXAMPLES
//
train_batch_size
config
=
cfg
.
ExperimentConfig
(
task
=
CenterNetTask
(
annotation_file
=
os
.
path
.
join
(
COCO_INPUT_PATH_BASE
,
'instances_val2017.json'
),
model
=
CenterNetModel
(),
train_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
COCO_INPUT_PATH_BASE
,
'train*'
),
is_training
=
True
,
global_batch_size
=
train_batch_size
,
parser
=
Parser
(),
shuffle_buffer_size
=
2
),
validation_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
COCO_INPUT_PATH_BASE
,
'val*'
),
is_training
=
False
,
global_batch_size
=
eval_batch_size
,
shuffle_buffer_size
=
2
),
),
trainer
=
cfg
.
TrainerConfig
(
steps_per_loop
=
steps_per_epoch
,
summary_interval
=
steps_per_epoch
,
checkpoint_interval
=
steps_per_epoch
,
train_steps
=
150
*
steps_per_epoch
,
validation_steps
=
COCO_VAL_EXAMPLES
//
eval_batch_size
,
validation_interval
=
steps_per_epoch
,
optimizer_config
=
optimization
.
OptimizationConfig
({
'optimizer'
:
{
'type'
:
'adam'
,
'adam'
:
{
'epsilon'
:
1e-7
}
},
'learning_rate'
:
{
'type'
:
'cosine'
,
'cosine'
:
{
'initial_learning_rate'
:
0.001
,
'decay_steps'
:
150
*
steps_per_epoch
}
},
'warmup'
:
{
'type'
:
'linear'
,
'linear'
:
{
'warmup_steps'
:
2000
,
}
}
})),
restrictions
=
[
'task.train_data.is_training != None'
,
'task.validation_data.is_training != None'
])
return
config
official/vision/beta/projects/centernet/configs/centernet_test.py
0 → 100644
View file @
460890ed
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for centernet."""
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
official.core
import
config_definitions
as
cfg
from
official.core
import
exp_factory
from
official.vision.beta.projects.centernet.common
import
registry_imports
# pylint: disable=unused-import
from
official.vision.beta.projects.centernet.configs
import
centernet
as
exp_cfg
class
CenterNetConfigTest
(
tf
.
test
.
TestCase
,
parameterized
.
TestCase
):
@
parameterized
.
parameters
((
'centernet_hourglass_coco'
,))
def
test_centernet_configs
(
self
,
config_name
):
config
=
exp_factory
.
get_exp_config
(
config_name
)
self
.
assertIsInstance
(
config
,
cfg
.
ExperimentConfig
)
self
.
assertIsInstance
(
config
.
task
,
exp_cfg
.
CenterNetTask
)
self
.
assertIsInstance
(
config
.
task
.
model
,
exp_cfg
.
CenterNetModel
)
self
.
assertIsInstance
(
config
.
task
.
train_data
,
exp_cfg
.
DataConfig
)
config
.
task
.
train_data
.
is_training
=
None
with
self
.
assertRaises
(
KeyError
):
config
.
validate
()
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/beta/projects/centernet/configs/experiments/coco-centernet-hourglass-gpu.yaml
0 → 100644
View file @
460890ed
# COCO AP 40.01% for float16 precision is achieved with the configuration below.
runtime
:
distribution_strategy
:
'
mirrored'
mixed_precision_dtype
:
'
float16'
loss_scale
:
'
dynamic'
num_gpus
:
8
task
:
model
:
num_classes
:
90
max_num_instances
:
128
input_size
:
[
512
,
512
,
3
]
backbone
:
type
:
hourglass
hourglass
:
model_id
:
52
num_hourglasses
:
2
head
:
heatmap_bias
:
-2.19
input_levels
:
[
'
2_0'
,
'
2'
]
detection_generator
:
max_detections
:
100
peak_error
:
0.000001
peak_extract_kernel_size
:
3
use_nms
:
false
nms_pre_thresh
:
0.1
nms_thresh
:
0.4
class_offset
:
1
norm_activation
:
norm_epsilon
:
0.00001
norm_momentum
:
0.1
use_sync_bn
:
true
losses
:
detection
:
offset_weight
:
1.0
scale_weight
:
0.1
gaussian_iou
:
0.7
class_offset
:
1
per_category_metrics
:
false
weight_decay
:
0.0005
gradient_clip_norm
:
10.0
annotation_file
:
'
coco/instances_val2017.json'
init_checkpoint
:
'
/placer/prod/scratch/home/tf-model-garden-dev/vision/centernet/extremenet_hg104_512x512_coco17/2021-10-19'
init_checkpoint_modules
:
'
backbone'
train_data
:
input_path
:
'
coco/train*'
drop_remainder
:
true
dtype
:
'
float16'
global_batch_size
:
64
is_training
:
true
parser
:
aug_rand_hflip
:
true
aug_scale_min
:
0.6
aug_scale_max
:
1.3
aug_rand_saturation
:
true
aug_rand_brightness
:
true
aug_rand_hue
:
true
aug_rand_contrast
:
true
odapi_augmentation
:
true
validation_data
:
input_path
:
'
coco/val*'
drop_remainder
:
false
dtype
:
'
float16'
global_batch_size
:
16
is_training
:
false
trainer
:
train_steps
:
280000
validation_steps
:
312
# 5000 / 16
steps_per_loop
:
1848
# 118287 / 128
validation_interval
:
1848
summary_interval
:
1848
checkpoint_interval
:
1848
optimizer_config
:
learning_rate
:
type
:
'
cosine'
cosine
:
initial_learning_rate
:
0.0005
decay_steps
:
280000
optimizer
:
type
:
adam
adam
:
epsilon
:
0.0000001
warmup
:
type
:
'
linear'
linear
:
warmup_steps
:
2000
official/vision/beta/projects/centernet/configs/experiments/coco-centernet-hourglass-tpu.yaml
0 → 100644
View file @
460890ed
# COCO AP 40.6% for float16 precision is achieved with the configuration below.
# Expected COCO AP for float32 from OD API is 41.92 +/- 0.16.
runtime
:
distribution_strategy
:
'
tpu'
mixed_precision_dtype
:
'
bfloat16'
task
:
model
:
num_classes
:
90
max_num_instances
:
128
input_size
:
[
512
,
512
,
3
]
backbone
:
type
:
hourglass
hourglass
:
model_id
:
52
num_hourglasses
:
2
head
:
heatmap_bias
:
-2.19
input_levels
:
[
'
2_0'
,
'
2'
]
detection_generator
:
max_detections
:
100
peak_error
:
0.000001
peak_extract_kernel_size
:
3
use_nms
:
false
nms_pre_thresh
:
0.1
nms_thresh
:
0.4
class_offset
:
1
norm_activation
:
norm_epsilon
:
0.00001
norm_momentum
:
0.1
use_sync_bn
:
true
losses
:
detection
:
offset_weight
:
1.0
scale_weight
:
0.1
gaussian_iou
:
0.7
class_offset
:
1
per_category_metrics
:
false
weight_decay
:
0.0005
gradient_clip_norm
:
10.0
annotation_file
:
'
coco/instances_val2017.json'
init_checkpoint
:
'
/placer/prod/scratch/home/tf-model-garden-dev/vision/centernet/extremenet_hg104_512x512_coco17/2021-10-19'
init_checkpoint_modules
:
'
backbone'
train_data
:
input_path
:
'
coco/train*'
drop_remainder
:
true
dtype
:
'
bfloat16'
global_batch_size
:
128
is_training
:
true
parser
:
aug_rand_hflip
:
true
aug_scale_min
:
0.6
aug_scale_max
:
1.3
aug_rand_saturation
:
true
aug_rand_brightness
:
true
aug_rand_hue
:
true
aug_rand_contrast
:
true
odapi_augmentation
:
true
validation_data
:
input_path
:
'
coco/val*'
drop_remainder
:
false
dtype
:
'
bfloat16'
global_batch_size
:
16
is_training
:
false
trainer
:
train_steps
:
140000
validation_steps
:
78
# 5000 / 16
steps_per_loop
:
924
# 118287 / 128
validation_interval
:
924
summary_interval
:
924
checkpoint_interval
:
924
optimizer_config
:
learning_rate
:
type
:
'
cosine'
cosine
:
initial_learning_rate
:
0.001
decay_steps
:
140000
optimizer
:
type
:
adam
adam
:
epsilon
:
0.0000001
warmup
:
type
:
'
linear'
linear
:
warmup_steps
:
2000
official/vision/beta/projects/centernet/dataloaders/centernet_input.py
0 → 100644
View file @
460890ed
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Data parser and processing for Centernet."""
from
typing
import
Tuple
import
tensorflow
as
tf
from
official.vision.beta.dataloaders
import
parser
from
official.vision.beta.dataloaders
import
utils
from
official.vision.beta.ops
import
box_ops
from
official.vision.beta.ops
import
preprocess_ops
from
official.vision.beta.projects.centernet.ops
import
box_list
from
official.vision.beta.projects.centernet.ops
import
box_list_ops
from
official.vision.beta.projects.centernet.ops
import
preprocess_ops
as
cn_prep_ops
CHANNEL_MEANS
=
(
104.01362025
,
114.03422265
,
119.9165958
)
CHANNEL_STDS
=
(
73.6027665
,
69.89082075
,
70.9150767
)
class
CenterNetParser
(
parser
.
Parser
):
"""Parse an image and its annotations into a dictionary of tensors."""
def
__init__
(
self
,
output_width
:
int
=
512
,
output_height
:
int
=
512
,
max_num_instances
:
int
=
128
,
bgr_ordering
:
bool
=
True
,
aug_rand_hflip
=
True
,
aug_scale_min
=
1.0
,
aug_scale_max
=
1.0
,
aug_rand_saturation
=
False
,
aug_rand_brightness
=
False
,
aug_rand_hue
=
False
,
aug_rand_contrast
=
False
,
odapi_augmentation
=
False
,
channel_means
:
Tuple
[
float
,
float
,
float
]
=
CHANNEL_MEANS
,
channel_stds
:
Tuple
[
float
,
float
,
float
]
=
CHANNEL_STDS
,
dtype
:
str
=
'float32'
):
"""Initializes parameters for parsing annotations in the dataset.
Args:
output_width: A `Tensor` or `int` for width of output image.
output_height: A `Tensor` or `int` for height of output image.
max_num_instances: An `int` number of maximum number of instances
in an image.
bgr_ordering: `bool`, if set will change the channel ordering to be in the
[blue, red, green] order.
aug_rand_hflip: `bool`, if True, augment training with random horizontal
flip.
aug_scale_min: `float`, the minimum scale applied to `output_size` for
data augmentation during training.
aug_scale_max: `float`, the maximum scale applied to `output_size` for
data augmentation during training.
aug_rand_saturation: `bool`, if True, augment training with random
saturation.
aug_rand_brightness: `bool`, if True, augment training with random
brightness.
aug_rand_hue: `bool`, if True, augment training with random hue.
aug_rand_contrast: `bool`, if True, augment training with random contrast.
odapi_augmentation: `bool`, if Ture, use OD API preprocessing.
channel_means: A tuple of floats, denoting the mean of each channel
which will be subtracted from it.
channel_stds: A tuple of floats, denoting the standard deviation of each
channel. Each channel will be divided by its standard deviation value.
dtype: `str`, data type. One of {`bfloat16`, `float32`, `float16`}.
Raises:
Exception: if datatype is not supported.
"""
self
.
_output_width
=
output_width
self
.
_output_height
=
output_height
self
.
_max_num_instances
=
max_num_instances
self
.
_bgr_ordering
=
bgr_ordering
self
.
_channel_means
=
channel_means
self
.
_channel_stds
=
channel_stds
if
dtype
==
'float16'
:
self
.
_dtype
=
tf
.
float16
elif
dtype
==
'bfloat16'
:
self
.
_dtype
=
tf
.
bfloat16
elif
dtype
==
'float32'
:
self
.
_dtype
=
tf
.
float32
else
:
raise
Exception
(
'Unsupported datatype used in parser only '
'{float16, bfloat16, or float32}'
)
# Data augmentation.
self
.
_aug_rand_hflip
=
aug_rand_hflip
self
.
_aug_scale_min
=
aug_scale_min
self
.
_aug_scale_max
=
aug_scale_max
self
.
_aug_rand_saturation
=
aug_rand_saturation
self
.
_aug_rand_brightness
=
aug_rand_brightness
self
.
_aug_rand_hue
=
aug_rand_hue
self
.
_aug_rand_contrast
=
aug_rand_contrast
self
.
_odapi_augmentation
=
odapi_augmentation
def
_build_label
(
self
,
boxes
,
classes
,
image_info
,
unpad_image_shape
,
data
):
# Sets up groundtruth data for evaluation.
groundtruths
=
{
'source_id'
:
data
[
'source_id'
],
'height'
:
data
[
'height'
],
'width'
:
data
[
'width'
],
'num_detections'
:
tf
.
shape
(
data
[
'groundtruth_classes'
])[
0
],
'boxes'
:
box_ops
.
denormalize_boxes
(
data
[
'groundtruth_boxes'
],
tf
.
shape
(
input
=
data
[
'image'
])[
0
:
2
]),
'classes'
:
data
[
'groundtruth_classes'
],
'areas'
:
data
[
'groundtruth_area'
],
'is_crowds'
:
tf
.
cast
(
data
[
'groundtruth_is_crowd'
],
tf
.
int32
),
}
groundtruths
[
'source_id'
]
=
utils
.
process_source_id
(
groundtruths
[
'source_id'
])
groundtruths
=
utils
.
pad_groundtruths_to_fixed_size
(
groundtruths
,
self
.
_max_num_instances
)
labels
=
{
'boxes'
:
preprocess_ops
.
clip_or_pad_to_fixed_size
(
boxes
,
self
.
_max_num_instances
,
-
1
),
'classes'
:
preprocess_ops
.
clip_or_pad_to_fixed_size
(
classes
,
self
.
_max_num_instances
,
-
1
),
'image_info'
:
image_info
,
'unpad_image_shapes'
:
unpad_image_shape
,
'groundtruths'
:
groundtruths
}
return
labels
def
_parse_train_data
(
self
,
data
):
"""Generates images and labels that are usable for model training.
We use random flip, random scaling (between 0.6 to 1.3), cropping,
and color jittering as data augmentation
Args:
data: the decoded tensor dictionary from TfExampleDecoder.
Returns:
images: the image tensor.
labels: a dict of Tensors that contains labels.
"""
image
=
tf
.
cast
(
data
[
'image'
],
dtype
=
tf
.
float32
)
boxes
=
data
[
'groundtruth_boxes'
]
classes
=
data
[
'groundtruth_classes'
]
image_shape
=
tf
.
shape
(
input
=
image
)[
0
:
2
]
if
self
.
_aug_rand_hflip
:
image
,
boxes
,
_
=
preprocess_ops
.
random_horizontal_flip
(
image
,
boxes
)
# Image augmentation
if
not
self
.
_odapi_augmentation
:
# Color and lighting jittering
if
self
.
_aug_rand_hue
:
image
=
tf
.
image
.
random_hue
(
image
=
image
,
max_delta
=
.
02
)
if
self
.
_aug_rand_contrast
:
image
=
tf
.
image
.
random_contrast
(
image
=
image
,
lower
=
0.8
,
upper
=
1.25
)
if
self
.
_aug_rand_saturation
:
image
=
tf
.
image
.
random_saturation
(
image
=
image
,
lower
=
0.8
,
upper
=
1.25
)
if
self
.
_aug_rand_brightness
:
image
=
tf
.
image
.
random_brightness
(
image
=
image
,
max_delta
=
.
2
)
image
=
tf
.
clip_by_value
(
image
,
clip_value_min
=
0.0
,
clip_value_max
=
255.0
)
# Converts boxes from normalized coordinates to pixel coordinates.
boxes
=
box_ops
.
denormalize_boxes
(
boxes
,
image_shape
)
# Resizes and crops image.
image
,
image_info
=
preprocess_ops
.
resize_and_crop_image
(
image
,
[
self
.
_output_height
,
self
.
_output_width
],
padded_size
=
[
self
.
_output_height
,
self
.
_output_width
],
aug_scale_min
=
self
.
_aug_scale_min
,
aug_scale_max
=
self
.
_aug_scale_max
)
unpad_image_shape
=
tf
.
cast
(
tf
.
shape
(
image
),
tf
.
float32
)
# Resizes and crops boxes.
image_scale
=
image_info
[
2
,
:]
offset
=
image_info
[
3
,
:]
boxes
=
preprocess_ops
.
resize_and_crop_boxes
(
boxes
,
image_scale
,
image_info
[
1
,
:],
offset
)
else
:
# Color and lighting jittering
if
self
.
_aug_rand_hue
:
image
=
cn_prep_ops
.
random_adjust_hue
(
image
=
image
,
max_delta
=
.
02
)
if
self
.
_aug_rand_contrast
:
image
=
cn_prep_ops
.
random_adjust_contrast
(
image
=
image
,
min_delta
=
0.8
,
max_delta
=
1.25
)
if
self
.
_aug_rand_saturation
:
image
=
cn_prep_ops
.
random_adjust_saturation
(
image
=
image
,
min_delta
=
0.8
,
max_delta
=
1.25
)
if
self
.
_aug_rand_brightness
:
image
=
cn_prep_ops
.
random_adjust_brightness
(
image
=
image
,
max_delta
=
.
2
)
sc_image
,
sc_boxes
,
classes
=
cn_prep_ops
.
random_square_crop_by_scale
(
image
=
image
,
boxes
=
boxes
,
labels
=
classes
,
scale_min
=
self
.
_aug_scale_min
,
scale_max
=
self
.
_aug_scale_max
)
image
,
unpad_image_shape
=
cn_prep_ops
.
resize_to_range
(
image
=
sc_image
,
min_dimension
=
self
.
_output_width
,
max_dimension
=
self
.
_output_width
,
pad_to_max_dimension
=
True
)
preprocessed_shape
=
tf
.
cast
(
tf
.
shape
(
image
),
tf
.
float32
)
unpad_image_shape
=
tf
.
cast
(
unpad_image_shape
,
tf
.
float32
)
im_box
=
tf
.
stack
([
0.0
,
0.0
,
preprocessed_shape
[
0
]
/
unpad_image_shape
[
0
],
preprocessed_shape
[
1
]
/
unpad_image_shape
[
1
]
])
realigned_bboxes
=
box_list_ops
.
change_coordinate_frame
(
boxlist
=
box_list
.
BoxList
(
sc_boxes
),
window
=
im_box
)
valid_boxes
=
box_list_ops
.
assert_or_prune_invalid_boxes
(
realigned_bboxes
.
get
())
boxes
=
box_list_ops
.
to_absolute_coordinates
(
boxlist
=
box_list
.
BoxList
(
valid_boxes
),
height
=
self
.
_output_height
,
width
=
self
.
_output_width
).
get
()
image_info
=
tf
.
stack
([
tf
.
cast
(
image_shape
,
dtype
=
tf
.
float32
),
tf
.
constant
([
self
.
_output_height
,
self
.
_output_width
],
dtype
=
tf
.
float32
),
tf
.
cast
(
tf
.
shape
(
sc_image
)[
0
:
2
]
/
image_shape
,
dtype
=
tf
.
float32
),
tf
.
constant
([
0.
,
0.
])
])
# Filters out ground truth boxes that are all zeros.
indices
=
box_ops
.
get_non_empty_box_indices
(
boxes
)
boxes
=
tf
.
gather
(
boxes
,
indices
)
classes
=
tf
.
gather
(
classes
,
indices
)
labels
=
self
.
_build_label
(
unpad_image_shape
=
unpad_image_shape
,
boxes
=
boxes
,
classes
=
classes
,
image_info
=
image_info
,
data
=
data
)
if
self
.
_bgr_ordering
:
red
,
green
,
blue
=
tf
.
unstack
(
image
,
num
=
3
,
axis
=
2
)
image
=
tf
.
stack
([
blue
,
green
,
red
],
axis
=
2
)
image
=
preprocess_ops
.
normalize_image
(
image
=
image
,
offset
=
self
.
_channel_means
,
scale
=
self
.
_channel_stds
)
image
=
tf
.
cast
(
image
,
self
.
_dtype
)
return
image
,
labels
def
_parse_eval_data
(
self
,
data
):
"""Generates images and labels that are usable for model evaluation.
Args:
data: the decoded tensor dictionary from TfExampleDecoder.
Returns:
images: the image tensor.
labels: a dict of Tensors that contains labels.
"""
image
=
tf
.
cast
(
data
[
'image'
],
dtype
=
tf
.
float32
)
boxes
=
data
[
'groundtruth_boxes'
]
classes
=
data
[
'groundtruth_classes'
]
image_shape
=
tf
.
shape
(
input
=
image
)[
0
:
2
]
# Converts boxes from normalized coordinates to pixel coordinates.
boxes
=
box_ops
.
denormalize_boxes
(
boxes
,
image_shape
)
# Resizes and crops image.
image
,
image_info
=
preprocess_ops
.
resize_and_crop_image
(
image
,
[
self
.
_output_height
,
self
.
_output_width
],
padded_size
=
[
self
.
_output_height
,
self
.
_output_width
],
aug_scale_min
=
1.0
,
aug_scale_max
=
1.0
)
unpad_image_shape
=
tf
.
cast
(
tf
.
shape
(
image
),
tf
.
float32
)
# Resizes and crops boxes.
image_scale
=
image_info
[
2
,
:]
offset
=
image_info
[
3
,
:]
boxes
=
preprocess_ops
.
resize_and_crop_boxes
(
boxes
,
image_scale
,
image_info
[
1
,
:],
offset
)
# Filters out ground truth boxes that are all zeros.
indices
=
box_ops
.
get_non_empty_box_indices
(
boxes
)
boxes
=
tf
.
gather
(
boxes
,
indices
)
classes
=
tf
.
gather
(
classes
,
indices
)
labels
=
self
.
_build_label
(
unpad_image_shape
=
unpad_image_shape
,
boxes
=
boxes
,
classes
=
classes
,
image_info
=
image_info
,
data
=
data
)
if
self
.
_bgr_ordering
:
red
,
green
,
blue
=
tf
.
unstack
(
image
,
num
=
3
,
axis
=
2
)
image
=
tf
.
stack
([
blue
,
green
,
red
],
axis
=
2
)
image
=
preprocess_ops
.
normalize_image
(
image
=
image
,
offset
=
self
.
_channel_means
,
scale
=
self
.
_channel_stds
)
image
=
tf
.
cast
(
image
,
self
.
_dtype
)
return
image
,
labels
official/vision/beta/projects/centernet/losses/centernet_losses.py
0 → 100644
View file @
460890ed
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Losses for centernet model."""
import
tensorflow
as
tf
class
PenaltyReducedLogisticFocalLoss
(
object
):
"""Penalty-reduced pixelwise logistic regression with focal loss."""
def
__init__
(
self
,
alpha
=
2.0
,
beta
=
4.0
,
sigmoid_clip_value
=
1e-4
):
"""Constructor.
The loss is defined in Equation (1) of the Objects as Points[1] paper.
Although the loss is defined per-pixel in the output space, this class
assumes that each pixel is an anchor to be compatible with the base class.
[1]: https://arxiv.org/abs/1904.07850
Args:
alpha: Focussing parameter of the focal loss. Increasing this will
decrease the loss contribution of the well classified examples.
beta: The local penalty reduction factor. Increasing this will decrease
the contribution of loss due to negative pixels near the keypoint.
sigmoid_clip_value: The sigmoid operation used internally will be clipped
between [sigmoid_clip_value, 1 - sigmoid_clip_value)
"""
self
.
_alpha
=
alpha
self
.
_beta
=
beta
self
.
_sigmoid_clip_value
=
sigmoid_clip_value
super
(
PenaltyReducedLogisticFocalLoss
,
self
).
__init__
()
def
__call__
(
self
,
prediction_tensor
,
target_tensor
,
weights
=
1.0
):
"""Compute loss function.
In all input tensors, `num_anchors` is the total number of pixels in the
the output space.
Args:
prediction_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing the predicted unscaled logits for each class.
The function will compute sigmoid on this tensor internally.
target_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing a tensor with the 'splatted' keypoints,
possibly using a gaussian kernel. This function assumes that
the target is bounded between [0, 1].
weights: a float tensor of shape, either [batch_size, num_anchors,
num_classes] or [batch_size, num_anchors, 1]. If the shape is
[batch_size, num_anchors, 1], all the classses are equally weighted.
Returns:
loss: a float tensor of shape [batch_size, num_anchors, num_classes]
representing the value of the loss function.
"""
with
tf
.
name_scope
(
'prlf_loss'
):
is_present_tensor
=
tf
.
math
.
equal
(
target_tensor
,
1.0
)
prediction_tensor
=
tf
.
clip_by_value
(
tf
.
sigmoid
(
prediction_tensor
),
self
.
_sigmoid_clip_value
,
1
-
self
.
_sigmoid_clip_value
)
positive_loss
=
(
tf
.
math
.
pow
((
1
-
prediction_tensor
),
self
.
_alpha
)
*
tf
.
math
.
log
(
prediction_tensor
))
negative_loss
=
(
tf
.
math
.
pow
((
1
-
target_tensor
),
self
.
_beta
)
*
tf
.
math
.
pow
(
prediction_tensor
,
self
.
_alpha
)
*
tf
.
math
.
log
(
1
-
prediction_tensor
))
loss
=
-
tf
.
where
(
is_present_tensor
,
positive_loss
,
negative_loss
)
return
loss
*
weights
class
L1LocalizationLoss
(
object
):
"""L1 loss or absolute difference."""
def
__call__
(
self
,
prediction_tensor
,
target_tensor
,
weights
=
1.0
):
"""Compute loss function.
When used in a per-pixel manner, each pixel should be given as an anchor.
Args:
prediction_tensor: A float tensor of shape [batch_size, num_anchors]
representing the (encoded) predicted locations of objects.
target_tensor: A float tensor of shape [batch_size, num_anchors]
representing the regression targets
weights: a float tensor of shape [batch_size, num_anchors]
Returns:
loss: a float tensor of shape [batch_size, num_anchors] tensor
representing the value of the loss function.
"""
with
tf
.
name_scope
(
'l1l_loss'
):
return
tf
.
compat
.
v1
.
losses
.
absolute_difference
(
labels
=
target_tensor
,
predictions
=
prediction_tensor
,
weights
=
weights
,
reduction
=
tf
.
losses
.
Reduction
.
NONE
)
official/vision/beta/projects/centernet/losses/centernet_losses_test.py
0 → 100644
View file @
460890ed
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for losses of centernet model."""
import
numpy
as
np
import
tensorflow
as
tf
from
official.vision.beta.projects.centernet.losses
import
centernet_losses
LOG_2
=
np
.
log
(
2
)
LOG_3
=
np
.
log
(
3
)
class
L1LocalizationLossTest
(
tf
.
test
.
TestCase
):
def
test_returns_correct_loss
(
self
):
def
graph_fn
():
loss
=
centernet_losses
.
L1LocalizationLoss
()
pred
=
[[
0.1
,
0.2
],
[
0.7
,
0.5
]]
target
=
[[
0.9
,
1.0
],
[
0.1
,
0.4
]]
weights
=
[[
1.0
,
0.0
],
[
1.0
,
1.0
]]
return
loss
(
pred
,
target
,
weights
=
weights
)
computed_value
=
graph_fn
()
self
.
assertAllClose
(
computed_value
,
[[
0.8
,
0.0
],
[
0.6
,
0.1
]],
rtol
=
1e-6
)
class
PenaltyReducedLogisticFocalLossTest
(
tf
.
test
.
TestCase
):
"""Testing loss function."""
def
__init__
(
self
,
*
args
,
**
kwargs
):
super
().
__init__
(
*
args
,
**
kwargs
)
self
.
_prediction
=
np
.
array
([
# First batch
[[
1
/
2
,
1
/
4
,
3
/
4
],
[
3
/
4
,
1
/
3
,
1
/
3
]],
# Second Batch
[[
0.0
,
1.0
,
1
/
2
],
[
3
/
4
,
2
/
3
,
1
/
3
]]],
np
.
float32
)
self
.
_prediction
=
np
.
log
(
self
.
_prediction
/
(
1
-
self
.
_prediction
))
self
.
_target
=
np
.
array
([
# First batch
[[
1.0
,
0.91
,
1.0
],
[
0.36
,
0.84
,
1.0
]],
# Second Batch
[[
0.01
,
1.0
,
0.75
],
[
0.96
,
1.0
,
1.0
]]],
np
.
float32
)
def
test_returns_correct_loss
(
self
):
def
graph_fn
(
prediction
,
target
):
weights
=
tf
.
constant
([
[[
1.0
],
[
1.0
]],
[[
1.0
],
[
1.0
]],
])
loss
=
centernet_losses
.
PenaltyReducedLogisticFocalLoss
(
alpha
=
2.0
,
beta
=
0.5
)
computed_value
=
loss
(
prediction
,
target
,
weights
=
weights
)
return
computed_value
computed_value
=
graph_fn
(
self
.
_prediction
,
self
.
_target
)
expected_value
=
np
.
array
([
# First batch
[[
1
/
4
*
LOG_2
,
0.3
*
0.0625
*
(
2
*
LOG_2
-
LOG_3
),
1
/
16
*
(
2
*
LOG_2
-
LOG_3
)],
[
0.8
*
9
/
16
*
2
*
LOG_2
,
0.4
*
1
/
9
*
(
LOG_3
-
LOG_2
),
4
/
9
*
LOG_3
]],
# Second Batch
[[
0.0
,
0.0
,
1
/
2
*
1
/
4
*
LOG_2
],
[
0.2
*
9
/
16
*
2
*
LOG_2
,
1
/
9
*
(
LOG_3
-
LOG_2
),
4
/
9
*
LOG_3
]]])
self
.
assertAllClose
(
expected_value
,
computed_value
,
rtol
=
1e-3
,
atol
=
1e-3
)
def
test_returns_correct_loss_weighted
(
self
):
def
graph_fn
(
prediction
,
target
):
weights
=
tf
.
constant
([
[[
1.0
,
0.0
,
1.0
],
[
0.0
,
0.0
,
1.0
]],
[[
1.0
,
1.0
,
1.0
],
[
0.0
,
0.0
,
0.0
]],
])
loss
=
centernet_losses
.
PenaltyReducedLogisticFocalLoss
(
alpha
=
2.0
,
beta
=
0.5
)
computed_value
=
loss
(
prediction
,
target
,
weights
=
weights
)
return
computed_value
computed_value
=
graph_fn
(
self
.
_prediction
,
self
.
_target
)
expected_value
=
np
.
array
([
# First batch
[[
1
/
4
*
LOG_2
,
0.0
,
1
/
16
*
(
2
*
LOG_2
-
LOG_3
)],
[
0.0
,
0.0
,
4
/
9
*
LOG_3
]],
# Second Batch
[[
0.0
,
0.0
,
1
/
2
*
1
/
4
*
LOG_2
],
[
0.0
,
0.0
,
0.0
]]])
self
.
assertAllClose
(
expected_value
,
computed_value
,
rtol
=
1e-3
,
atol
=
1e-3
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/beta/projects/centernet/modeling/backbones/hourglass.py
0 → 100644
View file @
460890ed
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Build Hourglass backbone."""
from
typing
import
Optional
import
tensorflow
as
tf
from
official.modeling
import
hyperparams
from
official.vision.beta.modeling.backbones
import
factory
from
official.vision.beta.modeling.backbones
import
mobilenet
from
official.vision.beta.modeling.layers
import
nn_blocks
from
official.vision.beta.projects.centernet.modeling.layers
import
cn_nn_blocks
HOURGLASS_SPECS
=
{
10
:
{
'blocks_per_stage'
:
[
1
,
1
],
'channel_dims_per_stage'
:
[
2
,
2
]
},
20
:
{
'blocks_per_stage'
:
[
1
,
2
,
2
],
'channel_dims_per_stage'
:
[
2
,
2
,
3
]
},
32
:
{
'blocks_per_stage'
:
[
2
,
2
,
2
,
2
],
'channel_dims_per_stage'
:
[
2
,
2
,
3
,
3
]
},
52
:
{
'blocks_per_stage'
:
[
2
,
2
,
2
,
2
,
2
,
4
],
'channel_dims_per_stage'
:
[
2
,
2
,
3
,
3
,
3
,
4
]
},
100
:
{
'blocks_per_stage'
:
[
4
,
4
,
4
,
4
,
4
,
8
],
'channel_dims_per_stage'
:
[
2
,
2
,
3
,
3
,
3
,
4
]
},
}
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'centernet'
)
class
Hourglass
(
tf
.
keras
.
Model
):
"""CenterNet Hourglass backbone."""
def
__init__
(
self
,
model_id
:
int
,
input_channel_dims
:
int
,
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
None
,
None
,
3
]),
num_hourglasses
:
int
=
1
,
initial_downsample
:
bool
=
True
,
activation
:
str
=
'relu'
,
use_sync_bn
:
bool
=
True
,
norm_momentum
=
0.1
,
norm_epsilon
=
1e-5
,
kernel_initializer
:
str
=
'VarianceScaling'
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
bias_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
**
kwargs
):
"""Initialize Hourglass backbone.
Args:
model_id: An `int` of the scale of Hourglass backbone model.
input_channel_dims: `int`, number of filters used to downsample the
input image.
input_specs: A `tf.keras.layers.InputSpec` of specs of the input tensor.
num_hourglasses: `int``, number of hourglass blocks in backbone. For
example, hourglass-104 has two hourglass-52 modules.
initial_downsample: `bool`, whether or not to downsample the input.
activation: A `str` name of the activation function.
use_sync_bn: If True, use synchronized batch normalization.
norm_momentum: `float`, momentum for the batch normalization layers.
norm_epsilon: `float`, epsilon for the batch normalization layers.
kernel_initializer: A `str` for kernel initializer of conv layers.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default to None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
Default to None.
**kwargs: Additional keyword arguments to be passed.
"""
self
.
_input_channel_dims
=
input_channel_dims
self
.
_model_id
=
model_id
self
.
_num_hourglasses
=
num_hourglasses
self
.
_initial_downsample
=
initial_downsample
self
.
_activation
=
activation
self
.
_kernel_initializer
=
kernel_initializer
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_bias_regularizer
=
bias_regularizer
self
.
_use_sync_bn
=
use_sync_bn
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
specs
=
HOURGLASS_SPECS
[
model_id
]
self
.
_blocks_per_stage
=
specs
[
'blocks_per_stage'
]
self
.
_channel_dims_per_stage
=
[
item
*
self
.
_input_channel_dims
for
item
in
specs
[
'channel_dims_per_stage'
]]
inputs
=
tf
.
keras
.
layers
.
Input
(
shape
=
input_specs
.
shape
[
1
:])
inp_filters
=
self
.
_channel_dims_per_stage
[
0
]
# Downsample the input
if
initial_downsample
:
prelayer_kernel_size
=
7
prelayer_strides
=
2
else
:
prelayer_kernel_size
=
3
prelayer_strides
=
1
x_downsampled
=
mobilenet
.
Conv2DBNBlock
(
filters
=
self
.
_input_channel_dims
,
kernel_size
=
prelayer_kernel_size
,
strides
=
prelayer_strides
,
use_explicit_padding
=
True
,
activation
=
self
.
_activation
,
bias_regularizer
=
self
.
_bias_regularizer
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
use_sync_bn
=
self
.
_use_sync_bn
,
norm_momentum
=
self
.
_norm_momentum
,
norm_epsilon
=
self
.
_norm_epsilon
)(
inputs
)
x_downsampled
=
nn_blocks
.
ResidualBlock
(
filters
=
inp_filters
,
use_projection
=
True
,
use_explicit_padding
=
True
,
strides
=
prelayer_strides
,
bias_regularizer
=
self
.
_bias_regularizer
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
use_sync_bn
=
self
.
_use_sync_bn
,
norm_momentum
=
self
.
_norm_momentum
,
norm_epsilon
=
self
.
_norm_epsilon
)(
x_downsampled
)
all_heatmaps
=
{}
for
i
in
range
(
num_hourglasses
):
# Create an hourglass stack
x_hg
=
cn_nn_blocks
.
HourglassBlock
(
channel_dims_per_stage
=
self
.
_channel_dims_per_stage
,
blocks_per_stage
=
self
.
_blocks_per_stage
,
)(
x_downsampled
)
x_hg
=
mobilenet
.
Conv2DBNBlock
(
filters
=
inp_filters
,
kernel_size
=
3
,
strides
=
1
,
use_explicit_padding
=
True
,
activation
=
self
.
_activation
,
bias_regularizer
=
self
.
_bias_regularizer
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
use_sync_bn
=
self
.
_use_sync_bn
,
norm_momentum
=
self
.
_norm_momentum
,
norm_epsilon
=
self
.
_norm_epsilon
)(
x_hg
)
# Given two down-sampling blocks above, the starting level is set to 2
# To make it compatible with implementation of remaining backbones, the
# output of hourglass backbones is organized as
# '2' -> the last layer of output
# '2_0' -> the first layer of output
# ......
# '2_{num_hourglasses-2}' -> the second to last layer of output
if
i
<
num_hourglasses
-
1
:
all_heatmaps
[
'2_{}'
.
format
(
i
)]
=
x_hg
else
:
all_heatmaps
[
'2'
]
=
x_hg
# Intermediate conv and residual layers between hourglasses
if
i
<
num_hourglasses
-
1
:
inter_hg_conv1
=
mobilenet
.
Conv2DBNBlock
(
filters
=
inp_filters
,
kernel_size
=
1
,
strides
=
1
,
activation
=
'identity'
,
bias_regularizer
=
self
.
_bias_regularizer
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
use_sync_bn
=
self
.
_use_sync_bn
,
norm_momentum
=
self
.
_norm_momentum
,
norm_epsilon
=
self
.
_norm_epsilon
)(
x_downsampled
)
inter_hg_conv2
=
mobilenet
.
Conv2DBNBlock
(
filters
=
inp_filters
,
kernel_size
=
1
,
strides
=
1
,
activation
=
'identity'
,
bias_regularizer
=
self
.
_bias_regularizer
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
use_sync_bn
=
self
.
_use_sync_bn
,
norm_momentum
=
self
.
_norm_momentum
,
norm_epsilon
=
self
.
_norm_epsilon
)(
x_hg
)
x_downsampled
=
tf
.
keras
.
layers
.
Add
()([
inter_hg_conv1
,
inter_hg_conv2
])
x_downsampled
=
tf
.
keras
.
layers
.
ReLU
()(
x_downsampled
)
x_downsampled
=
nn_blocks
.
ResidualBlock
(
filters
=
inp_filters
,
use_projection
=
False
,
use_explicit_padding
=
True
,
strides
=
1
,
bias_regularizer
=
self
.
_bias_regularizer
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
use_sync_bn
=
self
.
_use_sync_bn
,
norm_momentum
=
self
.
_norm_momentum
,
norm_epsilon
=
self
.
_norm_epsilon
)(
x_downsampled
)
self
.
_output_specs
=
{
l
:
all_heatmaps
[
l
].
get_shape
()
for
l
in
all_heatmaps
}
super
().
__init__
(
inputs
=
inputs
,
outputs
=
all_heatmaps
,
**
kwargs
)
def
get_config
(
self
):
config
=
{
'model_id'
:
self
.
_model_id
,
'input_channel_dims'
:
self
.
_input_channel_dims
,
'num_hourglasses'
:
self
.
_num_hourglasses
,
'initial_downsample'
:
self
.
_initial_downsample
,
'kernel_initializer'
:
self
.
_kernel_initializer
,
'kernel_regularizer'
:
self
.
_kernel_regularizer
,
'bias_regularizer'
:
self
.
_bias_regularizer
,
'use_sync_bn'
:
self
.
_use_sync_bn
,
'norm_momentum'
:
self
.
_norm_momentum
,
'norm_epsilon'
:
self
.
_norm_epsilon
}
config
.
update
(
super
(
Hourglass
,
self
).
get_config
())
return
config
@
property
def
num_hourglasses
(
self
):
return
self
.
_num_hourglasses
@
property
def
output_specs
(
self
):
return
self
.
_output_specs
@
factory
.
register_backbone_builder
(
'hourglass'
)
def
build_hourglass
(
input_specs
:
tf
.
keras
.
layers
.
InputSpec
,
backbone_config
:
hyperparams
.
Config
,
norm_activation_config
:
hyperparams
.
Config
,
l2_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
)
->
tf
.
keras
.
Model
:
"""Builds Hourglass backbone from a configuration."""
backbone_type
=
backbone_config
.
type
backbone_cfg
=
backbone_config
.
get
()
assert
backbone_type
==
'hourglass'
,
(
f
'Inconsistent backbone type '
f
'
{
backbone_type
}
'
)
return
Hourglass
(
model_id
=
backbone_cfg
.
model_id
,
input_channel_dims
=
backbone_cfg
.
input_channel_dims
,
num_hourglasses
=
backbone_cfg
.
num_hourglasses
,
input_specs
=
input_specs
,
initial_downsample
=
backbone_cfg
.
initial_downsample
,
activation
=
norm_activation_config
.
activation
,
use_sync_bn
=
norm_activation_config
.
use_sync_bn
,
norm_momentum
=
norm_activation_config
.
norm_momentum
,
norm_epsilon
=
norm_activation_config
.
norm_epsilon
,
kernel_regularizer
=
l2_regularizer
,
)
official/vision/beta/projects/centernet/modeling/backbones/hourglass_test.py
0 → 100644
View file @
460890ed
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for hourglass module."""
from
absl.testing
import
parameterized
import
numpy
as
np
import
tensorflow
as
tf
from
official.vision.beta.configs
import
common
from
official.vision.beta.projects.centernet.common
import
registry_imports
# pylint: disable=unused-import
from
official.vision.beta.projects.centernet.configs
import
backbones
from
official.vision.beta.projects.centernet.modeling.backbones
import
hourglass
class
HourglassTest
(
tf
.
test
.
TestCase
,
parameterized
.
TestCase
):
def
test_hourglass
(
self
):
backbone
=
hourglass
.
build_hourglass
(
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
512
,
512
,
3
]),
backbone_config
=
backbones
.
Backbone
(
type
=
'hourglass'
),
norm_activation_config
=
common
.
NormActivation
(
use_sync_bn
=
True
)
)
inputs
=
np
.
zeros
((
2
,
512
,
512
,
3
),
dtype
=
np
.
float32
)
outputs
=
backbone
(
inputs
)
self
.
assertEqual
(
outputs
[
'2_0'
].
shape
,
(
2
,
128
,
128
,
256
))
self
.
assertEqual
(
outputs
[
'2'
].
shape
,
(
2
,
128
,
128
,
256
))
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/beta/projects/centernet/modeling/centernet_model.py
0 → 100644
View file @
460890ed
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Centernet detection models."""
from
typing
import
Mapping
,
Union
,
Any
import
tensorflow
as
tf
class
CenterNetModel
(
tf
.
keras
.
Model
):
"""CenterNet Model."""
def
__init__
(
self
,
backbone
:
tf
.
keras
.
Model
,
head
:
tf
.
keras
.
Model
,
detection_generator
:
tf
.
keras
.
layers
.
Layer
,
**
kwargs
):
"""CenterNet Model.
Args:
backbone: a backbone network.
head: a projection head for centernet.
detection_generator: a detection generator for centernet.
**kwargs: keyword arguments to be passed.
"""
super
(
CenterNetModel
,
self
).
__init__
(
**
kwargs
)
# model components
self
.
_backbone
=
backbone
self
.
_detection_generator
=
detection_generator
self
.
_head
=
head
def
call
(
self
,
inputs
:
tf
.
Tensor
,
training
:
bool
=
None
,
**
kwargs
)
->
Mapping
[
str
,
tf
.
Tensor
]:
features
=
self
.
_backbone
(
inputs
)
raw_outputs
=
self
.
_head
(
features
)
model_outputs
=
{
'raw_output'
:
raw_outputs
}
if
not
training
:
predictions
=
self
.
_detection_generator
(
raw_outputs
)
model_outputs
.
update
(
predictions
)
return
model_outputs
@
property
def
checkpoint_items
(
self
)
->
Mapping
[
str
,
Union
[
tf
.
keras
.
Model
,
tf
.
keras
.
layers
.
Layer
]]:
"""Returns a dictionary of items to be additionally checkpointed."""
items
=
dict
(
backbone
=
self
.
backbone
,
head
=
self
.
head
)
return
items
@
property
def
backbone
(
self
):
return
self
.
_backbone
@
property
def
detection_generator
(
self
):
return
self
.
_detection_generator
@
property
def
head
(
self
):
return
self
.
_head
def
get_config
(
self
)
->
Mapping
[
str
,
Any
]:
config_dict
=
{
'backbone'
:
self
.
_backbone
,
'head'
:
self
.
_head
,
'detection_generator'
:
self
.
_detection_generator
,
}
return
config_dict
@
classmethod
def
from_config
(
cls
,
config
,
custom_objects
=
None
):
return
cls
(
**
config
)
official/vision/beta/projects/centernet/modeling/centernet_model_test.py
0 → 100644
View file @
460890ed
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Test for centernet detection model."""
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
official.vision.beta.configs
import
common
from
official.vision.beta.projects.centernet.configs
import
backbones
from
official.vision.beta.projects.centernet.modeling
import
centernet_model
from
official.vision.beta.projects.centernet.modeling.backbones
import
hourglass
from
official.vision.beta.projects.centernet.modeling.heads
import
centernet_head
from
official.vision.beta.projects.centernet.modeling.layers
import
detection_generator
class
CenterNetTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
def
testBuildCenterNet
(
self
):
backbone
=
hourglass
.
build_hourglass
(
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
512
,
512
,
3
]),
backbone_config
=
backbones
.
Backbone
(
type
=
'hourglass'
),
norm_activation_config
=
common
.
NormActivation
(
use_sync_bn
=
True
)
)
task_config
=
{
'ct_heatmaps'
:
90
,
'ct_offset'
:
2
,
'ct_size'
:
2
,
}
input_levels
=
[
'2_0'
,
'2'
]
head
=
centernet_head
.
CenterNetHead
(
task_outputs
=
task_config
,
input_specs
=
backbone
.
output_specs
,
input_levels
=
input_levels
)
detection_ge
=
detection_generator
.
CenterNetDetectionGenerator
()
model
=
centernet_model
.
CenterNetModel
(
backbone
=
backbone
,
head
=
head
,
detection_generator
=
detection_ge
)
outputs
=
model
(
tf
.
zeros
((
5
,
512
,
512
,
3
)))
self
.
assertLen
(
outputs
[
'raw_output'
],
3
)
self
.
assertLen
(
outputs
[
'raw_output'
][
'ct_heatmaps'
],
2
)
self
.
assertLen
(
outputs
[
'raw_output'
][
'ct_offset'
],
2
)
self
.
assertLen
(
outputs
[
'raw_output'
][
'ct_size'
],
2
)
self
.
assertEqual
(
outputs
[
'raw_output'
][
'ct_heatmaps'
][
0
].
shape
,
(
5
,
128
,
128
,
90
))
self
.
assertEqual
(
outputs
[
'raw_output'
][
'ct_offset'
][
0
].
shape
,
(
5
,
128
,
128
,
2
))
self
.
assertEqual
(
outputs
[
'raw_output'
][
'ct_size'
][
0
].
shape
,
(
5
,
128
,
128
,
2
))
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/beta/projects/centernet/modeling/heads/centernet_head.py
0 → 100644
View file @
460890ed
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains the definitions of head for CenterNet."""
from
typing
import
Any
,
Mapping
,
Dict
,
List
import
tensorflow
as
tf
from
official.vision.beta.projects.centernet.modeling.layers
import
cn_nn_blocks
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'centernet'
)
class
CenterNetHead
(
tf
.
keras
.
Model
):
"""CenterNet Head."""
def
__init__
(
self
,
input_specs
:
Dict
[
str
,
tf
.
TensorShape
],
task_outputs
:
Mapping
[
str
,
int
],
input_levels
:
List
[
str
],
heatmap_bias
:
float
=
-
2.19
,
**
kwargs
):
"""CenterNet Head Initialization.
Args:
input_specs: A `dict` of input specifications.
task_outputs: A `dict`, with key-value pairs denoting the names of the
outputs and the desired channel depth of each output.
input_levels: list of str representing the level used as input to the
CenternetHead from the backbone. For example, ['2_0', '2'] should be
set for hourglass-104 has two hourglass-52 modules, since the output
of hourglass backbones is organized as:
'2' -> the last layer of output
'2_0' -> the first layer of output
......
'2_{num_hourglasses-2}' -> the second to last layer of output.
heatmap_bias: `float`, constant value to initialize the convolution layer
bias vector if it is responsible for generating a heatmap (not for
regressed predictions).
**kwargs: Additional keyword arguments to be passed.
Returns:
dictionary where the keys-value pairs denote the names of the output
and the respective output tensor
"""
assert
input_levels
,
f
'Please specify input levels:
{
input_levels
}
'
self
.
_input_specs
=
input_specs
self
.
_task_outputs
=
task_outputs
self
.
_input_levels
=
input_levels
self
.
_heatmap_bias
=
heatmap_bias
self
.
_num_inputs
=
len
(
input_levels
)
input_levels
=
sorted
(
self
.
_input_specs
.
keys
())
inputs
=
{
level
:
tf
.
keras
.
layers
.
Input
(
shape
=
self
.
_input_specs
[
level
][
1
:])
for
level
in
input_levels
}
outputs
=
{}
for
key
in
self
.
_task_outputs
:
# pylint: disable=g-complex-comprehension
outputs
[
key
]
=
[
cn_nn_blocks
.
CenterNetHeadConv
(
output_filters
=
self
.
_task_outputs
[
key
],
bias_init
=
self
.
_heatmap_bias
if
'heatmaps'
in
key
else
0
,
name
=
key
+
str
(
i
),
)(
inputs
[
i
])
for
i
in
input_levels
]
self
.
_output_specs
=
{
key
:
[
value
[
i
].
get_shape
()
for
i
in
range
(
self
.
_num_inputs
)]
for
key
,
value
in
outputs
.
items
()
}
super
().
__init__
(
inputs
=
inputs
,
outputs
=
outputs
,
name
=
'CenterNetHead'
,
**
kwargs
)
def
get_config
(
self
)
->
Mapping
[
str
,
Any
]:
config
=
{
'input_spec'
:
self
.
_input_specs
,
'task_outputs'
:
self
.
_task_outputs
,
'heatmap_bias'
:
self
.
_heatmap_bias
,
'input_levels'
:
self
.
_input_levels
,
}
base_config
=
super
(
CenterNetHead
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
@
classmethod
def
from_config
(
cls
,
config
,
custom_objects
=
None
):
return
cls
(
**
config
)
@
property
def
output_specs
(
self
)
->
Mapping
[
str
,
tf
.
TensorShape
]:
"""A dict of {level: TensorShape} pairs for the model output."""
return
self
.
_output_specs
official/vision/beta/projects/centernet/modeling/heads/centernet_head_test.py
0 → 100644
View file @
460890ed
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for Centernet Head."""
from
absl.testing
import
parameterized
import
numpy
as
np
import
tensorflow
as
tf
from
official.vision.beta.projects.centernet.modeling.heads
import
centernet_head
class
CenterNetHeadTest
(
tf
.
test
.
TestCase
,
parameterized
.
TestCase
):
def
test_decoder_shape
(
self
):
task_config
=
{
'ct_heatmaps'
:
90
,
'ct_offset'
:
2
,
'ct_size'
:
2
,
}
input_specs
=
{
'2_0'
:
tf
.
keras
.
layers
.
InputSpec
(
shape
=
(
None
,
128
,
128
,
256
)).
shape
,
'2'
:
tf
.
keras
.
layers
.
InputSpec
(
shape
=
(
None
,
128
,
128
,
256
)).
shape
,
}
input_levels
=
[
'2'
,
'2_0'
]
head
=
centernet_head
.
CenterNetHead
(
task_outputs
=
task_config
,
input_specs
=
input_specs
,
input_levels
=
input_levels
)
config
=
head
.
get_config
()
self
.
assertEqual
(
config
[
'heatmap_bias'
],
-
2.19
)
# Output shape tests
outputs
=
head
([
np
.
zeros
((
2
,
128
,
128
,
256
),
dtype
=
np
.
float32
),
np
.
zeros
((
2
,
128
,
128
,
256
),
dtype
=
np
.
float32
)])
self
.
assertLen
(
outputs
,
3
)
self
.
assertEqual
(
outputs
[
'ct_heatmaps'
][
0
].
shape
,
(
2
,
128
,
128
,
90
))
self
.
assertEqual
(
outputs
[
'ct_offset'
][
0
].
shape
,
(
2
,
128
,
128
,
2
))
self
.
assertEqual
(
outputs
[
'ct_size'
][
0
].
shape
,
(
2
,
128
,
128
,
2
))
# Weight initialization tests
hm_bias_vector
=
np
.
asarray
(
head
.
layers
[
2
].
weights
[
-
1
])
off_bias_vector
=
np
.
asarray
(
head
.
layers
[
4
].
weights
[
-
1
])
size_bias_vector
=
np
.
asarray
(
head
.
layers
[
6
].
weights
[
-
1
])
self
.
assertArrayNear
(
hm_bias_vector
,
np
.
repeat
(
-
2.19
,
repeats
=
90
),
err
=
1.00e-6
)
self
.
assertArrayNear
(
off_bias_vector
,
np
.
repeat
(
0
,
repeats
=
2
),
err
=
1.00e-6
)
self
.
assertArrayNear
(
size_bias_vector
,
np
.
repeat
(
0
,
repeats
=
2
),
err
=
1.00e-6
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment