Commit 460890ed authored by A. Unique TensorFlower's avatar A. Unique TensorFlower
Browse files

Internal change

PiperOrigin-RevId: 406888835
parent f2bc366e
......@@ -41,6 +41,7 @@ class Conv2DBNBlock(tf.keras.layers.Layer):
kernel_size: int = 3,
strides: int = 1,
use_bias: bool = False,
use_explicit_padding: bool = False,
activation: str = 'relu6',
kernel_initializer: str = 'VarianceScaling',
kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
......@@ -60,6 +61,9 @@ class Conv2DBNBlock(tf.keras.layers.Layer):
strides: An `int` of block stride. If greater than 1, this block will
ultimately downsample the input.
use_bias: If True, use bias in the convolution layer.
use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding
were used.
activation: A `str` name of the activation function.
kernel_initializer: A `str` for kernel initializer of convolutional
layers.
......@@ -79,6 +83,7 @@ class Conv2DBNBlock(tf.keras.layers.Layer):
self._strides = strides
self._activation = activation
self._use_bias = use_bias
self._use_explicit_padding = use_explicit_padding
self._kernel_initializer = kernel_initializer
self._kernel_regularizer = kernel_regularizer
self._bias_regularizer = bias_regularizer
......@@ -87,6 +92,10 @@ class Conv2DBNBlock(tf.keras.layers.Layer):
self._norm_momentum = norm_momentum
self._norm_epsilon = norm_epsilon
if use_explicit_padding and kernel_size > 1:
self._padding = 'valid'
else:
self._padding = 'same'
if use_sync_bn:
self._norm = tf.keras.layers.experimental.SyncBatchNormalization
else:
......@@ -102,6 +111,7 @@ class Conv2DBNBlock(tf.keras.layers.Layer):
'strides': self._strides,
'kernel_size': self._kernel_size,
'use_bias': self._use_bias,
'use_explicit_padding': self._use_explicit_padding,
'kernel_initializer': self._kernel_initializer,
'kernel_regularizer': self._kernel_regularizer,
'bias_regularizer': self._bias_regularizer,
......@@ -115,11 +125,14 @@ class Conv2DBNBlock(tf.keras.layers.Layer):
return dict(list(base_config.items()) + list(config.items()))
def build(self, input_shape):
if self._use_explicit_padding and self._kernel_size > 1:
padding_size = nn_layers.get_padding_for_kernel_size(self._kernel_size)
self._pad = tf.keras.layers.ZeroPadding2D(padding_size)
self._conv0 = tf.keras.layers.Conv2D(
filters=self._filters,
kernel_size=self._kernel_size,
strides=self._strides,
padding='same',
padding=self._padding,
use_bias=self._use_bias,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
......@@ -135,6 +148,8 @@ class Conv2DBNBlock(tf.keras.layers.Layer):
super(Conv2DBNBlock, self).build(input_shape)
def call(self, inputs, training=None):
if self._use_explicit_padding and self._kernel_size > 1:
inputs = self._pad(inputs)
x = self._conv0(inputs)
if self._use_normalization:
x = self._norm0(x)
......
......@@ -69,6 +69,7 @@ class ResidualBlock(tf.keras.layers.Layer):
kernel_regularizer=None,
bias_regularizer=None,
activation='relu',
use_explicit_padding: bool = False,
use_sync_bn=False,
norm_momentum=0.99,
norm_epsilon=0.001,
......@@ -97,6 +98,9 @@ class ResidualBlock(tf.keras.layers.Layer):
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2d.
Default to None.
activation: A `str` name of the activation function.
use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding
were used.
use_sync_bn: A `bool`. If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
......@@ -111,6 +115,7 @@ class ResidualBlock(tf.keras.layers.Layer):
self._use_projection = use_projection
self._se_ratio = se_ratio
self._resnetd_shortcut = resnetd_shortcut
self._use_explicit_padding = use_explicit_padding
self._use_sync_bn = use_sync_bn
self._activation = activation
self._stochastic_depth_drop_rate = stochastic_depth_drop_rate
......@@ -147,11 +152,17 @@ class ResidualBlock(tf.keras.layers.Layer):
epsilon=self._norm_epsilon,
trainable=self._bn_trainable)
conv1_padding = 'same'
# explicit padding here is added for centernet
if self._use_explicit_padding:
self._pad = tf.keras.layers.ZeroPadding2D(padding=(1, 1))
conv1_padding = 'valid'
self._conv1 = tf.keras.layers.Conv2D(
filters=self._filters,
kernel_size=3,
strides=self._strides,
padding='same',
padding=conv1_padding,
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
......@@ -208,6 +219,7 @@ class ResidualBlock(tf.keras.layers.Layer):
'kernel_regularizer': self._kernel_regularizer,
'bias_regularizer': self._bias_regularizer,
'activation': self._activation,
'use_explicit_padding': self._use_explicit_padding,
'use_sync_bn': self._use_sync_bn,
'norm_momentum': self._norm_momentum,
'norm_epsilon': self._norm_epsilon,
......@@ -222,6 +234,8 @@ class ResidualBlock(tf.keras.layers.Layer):
shortcut = self._shortcut(shortcut)
shortcut = self._norm0(shortcut)
if self._use_explicit_padding:
inputs = self._pad(inputs)
x = self._conv1(inputs)
x = self._norm1(x)
x = self._activation_fn(x)
......
......@@ -69,6 +69,17 @@ def round_filters(filters: int,
return int(new_filters)
def get_padding_for_kernel_size(kernel_size):
"""Compute padding size given kernel size."""
if kernel_size == 7:
return (3, 3)
elif kernel_size == 3:
return (1, 1)
else:
raise ValueError('Padding for kernel size {} not known.'.format(
kernel_size))
def hard_swish(x: tf.Tensor) -> tf.Tensor:
"""A Swish6/H-Swish activation function.
......
# Centernet
[![Paper](http://img.shields.io/badge/Paper-arXiv.1904.07850-B3181B?logo=arXiv)](https://arxiv.org/abs/1904.07850)
Centernet builds upon CornerNet, an anchor-free model for object detection.
Many other models, such as YOLO and RetinaNet, use anchor boxes. These anchor
boxes are predefined to be close to the aspect ratios and scales of the objects
in the training dataset. Anchor-based models do not predict the bounding boxes
of objects directly. They instead predict the location and size/shape
refinements to a predefined anchor box. The detection generator then computes
the final confidences, positions, and size of the detection.
CornerNet eliminates the need for anchor boxes. RetinaNet needs thousands of
anchor boxes in order to cover the most common ground truth boxes. This adds
unnecessary complexity to the model which slow down training and create
imbalances in positive and negative anchor boxes. Instead, CornerNet creates
heatmaps for each of the corners and pools them together in order to get the
final detection boxes for the objects. CenterNet removes even more complexity
by using the center instead of the corners, meaning that only one set of
heatmaps (one heatmap for each class) is needed to predict the object. CenterNet
proves that this can be done without a significant difference in accuracy.
## Enviroment setup
The code can be run on multiple GPUs or TPUs with different distribution
strategies. See the TensorFlow distributed training
[guide](https://www.tensorflow.org/guide/distributed_training) for an overview
of `tf.distribute`.
The code is compatible with TensorFlow 2.5+. See requirements.txt for all
prerequisites, and you can also install them using the following command. `pip
install -r ./official/requirements.txt`
## Training
To train the model on Coco, try the following command:
```
python3 -m official.vision.beta.projects.centernet.train \
--mode=train_and_eval \
--experiment=centernet_hourglass_coco \
--model_dir={MODEL_DIR} \
--config_file={CONFIG_FILE}
```
## Configurations
In the following table, we report the mAP measured on the `coco-val2017` set.
Backbone | Config name | mAP
:--------------- | :-----------------------------------------------| -------:
Hourglass-104 | `coco-centernet-hourglass-gpu.yaml` | 40.01
Hourglass-104 | `coco-centernet-hourglass-tpu.yaml` | 40.5
**Note:** `float16` (`bfloat16` for TPU) is used in the provided configurations.
## Cite
[Centernet](https://arxiv.org/abs/1904.07850):
```
@article{Zhou2019ObjectsAP,
title={Objects as Points},
author={Xingyi Zhou and Dequan Wang and Philipp Kr{\"a}henb{\"u}hl},
journal={ArXiv},
year={2019},
volume={abs/1904.07850}
}
```
[CornerNet](https://arxiv.org/abs/1808.01244):
```
@article{Law2019CornerNetDO,
title={CornerNet: Detecting Objects as Paired Keypoints},
author={Hei Law and J. Deng},
journal={International Journal of Computer Vision},
year={2019},
volume={128},
pages={642-656}
}
```
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""All necessary imports for registration."""
# pylint: disable=unused-import
from official.common import registry_imports
from official.vision.beta.projects.centernet.configs import centernet
from official.vision.beta.projects.centernet.modeling import centernet_model
from official.vision.beta.projects.centernet.modeling.backbones import hourglass
from official.vision.beta.projects.centernet.tasks import centernet as centernet_task
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Backbones configurations."""
import dataclasses
from official.modeling import hyperparams
from official.vision.beta.configs import backbones
@dataclasses.dataclass
class Hourglass(hyperparams.Config):
"""Hourglass config."""
model_id: int = 52
input_channel_dims: int = 128
num_hourglasses: int = 2
initial_downsample: bool = True
activation: str = 'relu'
@dataclasses.dataclass
class Backbone(backbones.Backbone):
hourglass: Hourglass = Hourglass()
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""CenterNet configuration definition."""
import dataclasses
import os
from typing import List, Optional, Tuple
from official.core import exp_factory
from official.modeling import hyperparams
from official.modeling import optimization
from official.modeling.hyperparams import config_definitions as cfg
from official.vision.beta.configs import common
from official.vision.beta.projects.centernet.configs import backbones
TfExampleDecoderLabelMap = common.TfExampleDecoderLabelMap
@dataclasses.dataclass
class TfExampleDecoder(hyperparams.Config):
regenerate_source_id: bool = False
@dataclasses.dataclass
class DataDecoder(hyperparams.OneOfConfig):
type: Optional[str] = 'simple_decoder'
simple_decoder: TfExampleDecoder = TfExampleDecoder()
label_map_decoder: TfExampleDecoderLabelMap = TfExampleDecoderLabelMap()
@dataclasses.dataclass
class Parser(hyperparams.Config):
"""Config for parser."""
bgr_ordering: bool = True
aug_rand_hflip: bool = True
aug_scale_min: float = 1.0
aug_scale_max: float = 1.0
aug_rand_saturation: bool = False
aug_rand_brightness: bool = False
aug_rand_hue: bool = False
aug_rand_contrast: bool = False
odapi_augmentation: bool = False
channel_means: Tuple[float, float, float] = dataclasses.field(
default_factory=lambda: (104.01362025, 114.03422265, 119.9165958))
channel_stds: Tuple[float, float, float] = dataclasses.field(
default_factory=lambda: (73.6027665, 69.89082075, 70.9150767))
@dataclasses.dataclass
class DataConfig(cfg.DataConfig):
"""Input config for training."""
input_path: str = ''
global_batch_size: int = 32
is_training: bool = True
dtype: str = 'float16'
decoder: DataDecoder = DataDecoder()
parser: Parser = Parser()
shuffle_buffer_size: int = 10000
file_type: str = 'tfrecord'
drop_remainder: bool = True
@dataclasses.dataclass
class DetectionLoss(hyperparams.Config):
object_center_weight: float = 1.0
offset_weight: float = 1.0
scale_weight: float = 0.1
@dataclasses.dataclass
class Losses(hyperparams.Config):
detection: DetectionLoss = DetectionLoss()
gaussian_iou: float = 0.7
class_offset: int = 1
@dataclasses.dataclass
class CenterNetHead(hyperparams.Config):
heatmap_bias: float = -2.19
input_levels: List[str] = dataclasses.field(
default_factory=lambda: ['2_0', '2'])
@dataclasses.dataclass
class CenterNetDetectionGenerator(hyperparams.Config):
max_detections: int = 100
peak_error: float = 1e-6
peak_extract_kernel_size: int = 3
class_offset: int = 1
use_nms: bool = False
nms_pre_thresh: float = 0.1
nms_thresh: float = 0.4
use_reduction_sum: bool = True
@dataclasses.dataclass
class CenterNetModel(hyperparams.Config):
"""Config for centernet model."""
num_classes: int = 90
max_num_instances: int = 128
input_size: List[int] = dataclasses.field(default_factory=list)
backbone: backbones.Backbone = backbones.Backbone(
type='hourglass', hourglass=backbones.Hourglass(model_id=52))
head: CenterNetHead = CenterNetHead()
# pylint: disable=line-too-long
detection_generator: CenterNetDetectionGenerator = CenterNetDetectionGenerator()
norm_activation: common.NormActivation = common.NormActivation(
norm_momentum=0.1, norm_epsilon=1e-5, use_sync_bn=True)
@dataclasses.dataclass
class CenterNetDetection(hyperparams.Config):
# use_center is the only option implemented currently.
use_centers: bool = True
@dataclasses.dataclass
class CenterNetSubTasks(hyperparams.Config):
detection: CenterNetDetection = CenterNetDetection()
@dataclasses.dataclass
class CenterNetTask(cfg.TaskConfig):
"""Config for centernet task."""
model: CenterNetModel = CenterNetModel()
train_data: DataConfig = DataConfig(is_training=True)
validation_data: DataConfig = DataConfig(is_training=False)
subtasks: CenterNetSubTasks = CenterNetSubTasks()
losses: Losses = Losses()
gradient_clip_norm: float = 10.0
per_category_metrics: bool = False
weight_decay: float = 5e-4
# Load checkpoints
init_checkpoint: Optional[str] = None
init_checkpoint_modules: str = 'all'
annotation_file: Optional[str] = None
def get_output_length_dict(self):
task_outputs = {}
if self.subtasks.detection and self.subtasks.detection.use_centers:
task_outputs.update({
'ct_heatmaps': self.model.num_classes,
'ct_offset': 2,
'ct_size': 2
})
else:
raise ValueError('Detection with center point is only available ')
return task_outputs
COCO_INPUT_PATH_BASE = 'coco'
COCO_TRAIN_EXAMPLES = 118287
COCO_VAL_EXAMPLES = 5000
@exp_factory.register_config_factory('centernet_hourglass_coco')
def centernet_hourglass_coco() -> cfg.ExperimentConfig:
"""COCO object detection with CenterNet."""
train_batch_size = 128
eval_batch_size = 8
steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size
config = cfg.ExperimentConfig(
task=CenterNetTask(
annotation_file=os.path.join(COCO_INPUT_PATH_BASE,
'instances_val2017.json'),
model=CenterNetModel(),
train_data=DataConfig(
input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'),
is_training=True,
global_batch_size=train_batch_size,
parser=Parser(),
shuffle_buffer_size=2),
validation_data=DataConfig(
input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'),
is_training=False,
global_batch_size=eval_batch_size,
shuffle_buffer_size=2),
),
trainer=cfg.TrainerConfig(
steps_per_loop=steps_per_epoch,
summary_interval=steps_per_epoch,
checkpoint_interval=steps_per_epoch,
train_steps=150 * steps_per_epoch,
validation_steps=COCO_VAL_EXAMPLES // eval_batch_size,
validation_interval=steps_per_epoch,
optimizer_config=optimization.OptimizationConfig({
'optimizer': {
'type': 'adam',
'adam': {
'epsilon': 1e-7
}
},
'learning_rate': {
'type': 'cosine',
'cosine': {
'initial_learning_rate': 0.001,
'decay_steps': 150 * steps_per_epoch
}
},
'warmup': {
'type': 'linear',
'linear': {
'warmup_steps': 2000,
}
}
})),
restrictions=[
'task.train_data.is_training != None',
'task.validation_data.is_training != None'
])
return config
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for centernet."""
from absl.testing import parameterized
import tensorflow as tf
from official.core import config_definitions as cfg
from official.core import exp_factory
from official.vision.beta.projects.centernet.common import registry_imports # pylint: disable=unused-import
from official.vision.beta.projects.centernet.configs import centernet as exp_cfg
class CenterNetConfigTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.parameters(('centernet_hourglass_coco',))
def test_centernet_configs(self, config_name):
config = exp_factory.get_exp_config(config_name)
self.assertIsInstance(config, cfg.ExperimentConfig)
self.assertIsInstance(config.task, exp_cfg.CenterNetTask)
self.assertIsInstance(config.task.model, exp_cfg.CenterNetModel)
self.assertIsInstance(config.task.train_data, exp_cfg.DataConfig)
config.task.train_data.is_training = None
with self.assertRaises(KeyError):
config.validate()
if __name__ == '__main__':
tf.test.main()
# COCO AP 40.01% for float16 precision is achieved with the configuration below.
runtime:
distribution_strategy: 'mirrored'
mixed_precision_dtype: 'float16'
loss_scale: 'dynamic'
num_gpus: 8
task:
model:
num_classes: 90
max_num_instances: 128
input_size: [512, 512, 3]
backbone:
type: hourglass
hourglass:
model_id: 52
num_hourglasses: 2
head:
heatmap_bias: -2.19
input_levels: ['2_0', '2']
detection_generator:
max_detections: 100
peak_error: 0.000001
peak_extract_kernel_size: 3
use_nms: false
nms_pre_thresh: 0.1
nms_thresh: 0.4
class_offset: 1
norm_activation:
norm_epsilon: 0.00001
norm_momentum: 0.1
use_sync_bn: true
losses:
detection:
offset_weight: 1.0
scale_weight: 0.1
gaussian_iou: 0.7
class_offset: 1
per_category_metrics: false
weight_decay: 0.0005
gradient_clip_norm: 10.0
annotation_file: 'coco/instances_val2017.json'
init_checkpoint: '/placer/prod/scratch/home/tf-model-garden-dev/vision/centernet/extremenet_hg104_512x512_coco17/2021-10-19'
init_checkpoint_modules: 'backbone'
train_data:
input_path: 'coco/train*'
drop_remainder: true
dtype: 'float16'
global_batch_size: 64
is_training: true
parser:
aug_rand_hflip: true
aug_scale_min: 0.6
aug_scale_max: 1.3
aug_rand_saturation: true
aug_rand_brightness: true
aug_rand_hue: true
aug_rand_contrast: true
odapi_augmentation: true
validation_data:
input_path: 'coco/val*'
drop_remainder: false
dtype: 'float16'
global_batch_size: 16
is_training: false
trainer:
train_steps: 280000
validation_steps: 312 # 5000 / 16
steps_per_loop: 1848 # 118287 / 128
validation_interval: 1848
summary_interval: 1848
checkpoint_interval: 1848
optimizer_config:
learning_rate:
type: 'cosine'
cosine:
initial_learning_rate: 0.0005
decay_steps: 280000
optimizer:
type: adam
adam:
epsilon: 0.0000001
warmup:
type: 'linear'
linear:
warmup_steps: 2000
# COCO AP 40.6% for float16 precision is achieved with the configuration below.
# Expected COCO AP for float32 from OD API is 41.92 +/- 0.16.
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
task:
model:
num_classes: 90
max_num_instances: 128
input_size: [512, 512, 3]
backbone:
type: hourglass
hourglass:
model_id: 52
num_hourglasses: 2
head:
heatmap_bias: -2.19
input_levels: ['2_0', '2']
detection_generator:
max_detections: 100
peak_error: 0.000001
peak_extract_kernel_size: 3
use_nms: false
nms_pre_thresh: 0.1
nms_thresh: 0.4
class_offset: 1
norm_activation:
norm_epsilon: 0.00001
norm_momentum: 0.1
use_sync_bn: true
losses:
detection:
offset_weight: 1.0
scale_weight: 0.1
gaussian_iou: 0.7
class_offset: 1
per_category_metrics: false
weight_decay: 0.0005
gradient_clip_norm: 10.0
annotation_file: 'coco/instances_val2017.json'
init_checkpoint: '/placer/prod/scratch/home/tf-model-garden-dev/vision/centernet/extremenet_hg104_512x512_coco17/2021-10-19'
init_checkpoint_modules: 'backbone'
train_data:
input_path: 'coco/train*'
drop_remainder: true
dtype: 'bfloat16'
global_batch_size: 128
is_training: true
parser:
aug_rand_hflip: true
aug_scale_min: 0.6
aug_scale_max: 1.3
aug_rand_saturation: true
aug_rand_brightness: true
aug_rand_hue: true
aug_rand_contrast: true
odapi_augmentation: true
validation_data:
input_path: 'coco/val*'
drop_remainder: false
dtype: 'bfloat16'
global_batch_size: 16
is_training: false
trainer:
train_steps: 140000
validation_steps: 78 # 5000 / 16
steps_per_loop: 924 # 118287 / 128
validation_interval: 924
summary_interval: 924
checkpoint_interval: 924
optimizer_config:
learning_rate:
type: 'cosine'
cosine:
initial_learning_rate: 0.001
decay_steps: 140000
optimizer:
type: adam
adam:
epsilon: 0.0000001
warmup:
type: 'linear'
linear:
warmup_steps: 2000
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Data parser and processing for Centernet."""
from typing import Tuple
import tensorflow as tf
from official.vision.beta.dataloaders import parser
from official.vision.beta.dataloaders import utils
from official.vision.beta.ops import box_ops
from official.vision.beta.ops import preprocess_ops
from official.vision.beta.projects.centernet.ops import box_list
from official.vision.beta.projects.centernet.ops import box_list_ops
from official.vision.beta.projects.centernet.ops import preprocess_ops as cn_prep_ops
CHANNEL_MEANS = (104.01362025, 114.03422265, 119.9165958)
CHANNEL_STDS = (73.6027665, 69.89082075, 70.9150767)
class CenterNetParser(parser.Parser):
"""Parse an image and its annotations into a dictionary of tensors."""
def __init__(self,
output_width: int = 512,
output_height: int = 512,
max_num_instances: int = 128,
bgr_ordering: bool = True,
aug_rand_hflip=True,
aug_scale_min=1.0,
aug_scale_max=1.0,
aug_rand_saturation=False,
aug_rand_brightness=False,
aug_rand_hue=False,
aug_rand_contrast=False,
odapi_augmentation=False,
channel_means: Tuple[float, float, float] = CHANNEL_MEANS,
channel_stds: Tuple[float, float, float] = CHANNEL_STDS,
dtype: str = 'float32'):
"""Initializes parameters for parsing annotations in the dataset.
Args:
output_width: A `Tensor` or `int` for width of output image.
output_height: A `Tensor` or `int` for height of output image.
max_num_instances: An `int` number of maximum number of instances
in an image.
bgr_ordering: `bool`, if set will change the channel ordering to be in the
[blue, red, green] order.
aug_rand_hflip: `bool`, if True, augment training with random horizontal
flip.
aug_scale_min: `float`, the minimum scale applied to `output_size` for
data augmentation during training.
aug_scale_max: `float`, the maximum scale applied to `output_size` for
data augmentation during training.
aug_rand_saturation: `bool`, if True, augment training with random
saturation.
aug_rand_brightness: `bool`, if True, augment training with random
brightness.
aug_rand_hue: `bool`, if True, augment training with random hue.
aug_rand_contrast: `bool`, if True, augment training with random contrast.
odapi_augmentation: `bool`, if Ture, use OD API preprocessing.
channel_means: A tuple of floats, denoting the mean of each channel
which will be subtracted from it.
channel_stds: A tuple of floats, denoting the standard deviation of each
channel. Each channel will be divided by its standard deviation value.
dtype: `str`, data type. One of {`bfloat16`, `float32`, `float16`}.
Raises:
Exception: if datatype is not supported.
"""
self._output_width = output_width
self._output_height = output_height
self._max_num_instances = max_num_instances
self._bgr_ordering = bgr_ordering
self._channel_means = channel_means
self._channel_stds = channel_stds
if dtype == 'float16':
self._dtype = tf.float16
elif dtype == 'bfloat16':
self._dtype = tf.bfloat16
elif dtype == 'float32':
self._dtype = tf.float32
else:
raise Exception(
'Unsupported datatype used in parser only '
'{float16, bfloat16, or float32}')
# Data augmentation.
self._aug_rand_hflip = aug_rand_hflip
self._aug_scale_min = aug_scale_min
self._aug_scale_max = aug_scale_max
self._aug_rand_saturation = aug_rand_saturation
self._aug_rand_brightness = aug_rand_brightness
self._aug_rand_hue = aug_rand_hue
self._aug_rand_contrast = aug_rand_contrast
self._odapi_augmentation = odapi_augmentation
def _build_label(self,
boxes,
classes,
image_info,
unpad_image_shape,
data):
# Sets up groundtruth data for evaluation.
groundtruths = {
'source_id': data['source_id'],
'height': data['height'],
'width': data['width'],
'num_detections': tf.shape(data['groundtruth_classes'])[0],
'boxes': box_ops.denormalize_boxes(
data['groundtruth_boxes'], tf.shape(input=data['image'])[0:2]),
'classes': data['groundtruth_classes'],
'areas': data['groundtruth_area'],
'is_crowds': tf.cast(data['groundtruth_is_crowd'], tf.int32),
}
groundtruths['source_id'] = utils.process_source_id(
groundtruths['source_id'])
groundtruths = utils.pad_groundtruths_to_fixed_size(
groundtruths, self._max_num_instances)
labels = {
'boxes': preprocess_ops.clip_or_pad_to_fixed_size(
boxes, self._max_num_instances, -1),
'classes': preprocess_ops.clip_or_pad_to_fixed_size(
classes, self._max_num_instances, -1),
'image_info': image_info,
'unpad_image_shapes': unpad_image_shape,
'groundtruths': groundtruths
}
return labels
def _parse_train_data(self, data):
"""Generates images and labels that are usable for model training.
We use random flip, random scaling (between 0.6 to 1.3), cropping,
and color jittering as data augmentation
Args:
data: the decoded tensor dictionary from TfExampleDecoder.
Returns:
images: the image tensor.
labels: a dict of Tensors that contains labels.
"""
image = tf.cast(data['image'], dtype=tf.float32)
boxes = data['groundtruth_boxes']
classes = data['groundtruth_classes']
image_shape = tf.shape(input=image)[0:2]
if self._aug_rand_hflip:
image, boxes, _ = preprocess_ops.random_horizontal_flip(image, boxes)
# Image augmentation
if not self._odapi_augmentation:
# Color and lighting jittering
if self._aug_rand_hue:
image = tf.image.random_hue(
image=image, max_delta=.02)
if self._aug_rand_contrast:
image = tf.image.random_contrast(
image=image, lower=0.8, upper=1.25)
if self._aug_rand_saturation:
image = tf.image.random_saturation(
image=image, lower=0.8, upper=1.25)
if self._aug_rand_brightness:
image = tf.image.random_brightness(
image=image, max_delta=.2)
image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=255.0)
# Converts boxes from normalized coordinates to pixel coordinates.
boxes = box_ops.denormalize_boxes(boxes, image_shape)
# Resizes and crops image.
image, image_info = preprocess_ops.resize_and_crop_image(
image,
[self._output_height, self._output_width],
padded_size=[self._output_height, self._output_width],
aug_scale_min=self._aug_scale_min,
aug_scale_max=self._aug_scale_max)
unpad_image_shape = tf.cast(tf.shape(image), tf.float32)
# Resizes and crops boxes.
image_scale = image_info[2, :]
offset = image_info[3, :]
boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_scale,
image_info[1, :], offset)
else:
# Color and lighting jittering
if self._aug_rand_hue:
image = cn_prep_ops.random_adjust_hue(
image=image, max_delta=.02)
if self._aug_rand_contrast:
image = cn_prep_ops.random_adjust_contrast(
image=image, min_delta=0.8, max_delta=1.25)
if self._aug_rand_saturation:
image = cn_prep_ops.random_adjust_saturation(
image=image, min_delta=0.8, max_delta=1.25)
if self._aug_rand_brightness:
image = cn_prep_ops.random_adjust_brightness(
image=image, max_delta=.2)
sc_image, sc_boxes, classes = cn_prep_ops.random_square_crop_by_scale(
image=image,
boxes=boxes,
labels=classes,
scale_min=self._aug_scale_min,
scale_max=self._aug_scale_max)
image, unpad_image_shape = cn_prep_ops.resize_to_range(
image=sc_image,
min_dimension=self._output_width,
max_dimension=self._output_width,
pad_to_max_dimension=True)
preprocessed_shape = tf.cast(tf.shape(image), tf.float32)
unpad_image_shape = tf.cast(unpad_image_shape, tf.float32)
im_box = tf.stack([
0.0,
0.0,
preprocessed_shape[0] / unpad_image_shape[0],
preprocessed_shape[1] / unpad_image_shape[1]
])
realigned_bboxes = box_list_ops.change_coordinate_frame(
boxlist=box_list.BoxList(sc_boxes),
window=im_box)
valid_boxes = box_list_ops.assert_or_prune_invalid_boxes(
realigned_bboxes.get())
boxes = box_list_ops.to_absolute_coordinates(
boxlist=box_list.BoxList(valid_boxes),
height=self._output_height,
width=self._output_width).get()
image_info = tf.stack([
tf.cast(image_shape, dtype=tf.float32),
tf.constant([self._output_height, self._output_width],
dtype=tf.float32),
tf.cast(tf.shape(sc_image)[0:2] / image_shape, dtype=tf.float32),
tf.constant([0., 0.])
])
# Filters out ground truth boxes that are all zeros.
indices = box_ops.get_non_empty_box_indices(boxes)
boxes = tf.gather(boxes, indices)
classes = tf.gather(classes, indices)
labels = self._build_label(
unpad_image_shape=unpad_image_shape,
boxes=boxes,
classes=classes,
image_info=image_info,
data=data)
if self._bgr_ordering:
red, green, blue = tf.unstack(image, num=3, axis=2)
image = tf.stack([blue, green, red], axis=2)
image = preprocess_ops.normalize_image(
image=image,
offset=self._channel_means,
scale=self._channel_stds)
image = tf.cast(image, self._dtype)
return image, labels
def _parse_eval_data(self, data):
"""Generates images and labels that are usable for model evaluation.
Args:
data: the decoded tensor dictionary from TfExampleDecoder.
Returns:
images: the image tensor.
labels: a dict of Tensors that contains labels.
"""
image = tf.cast(data['image'], dtype=tf.float32)
boxes = data['groundtruth_boxes']
classes = data['groundtruth_classes']
image_shape = tf.shape(input=image)[0:2]
# Converts boxes from normalized coordinates to pixel coordinates.
boxes = box_ops.denormalize_boxes(boxes, image_shape)
# Resizes and crops image.
image, image_info = preprocess_ops.resize_and_crop_image(
image,
[self._output_height, self._output_width],
padded_size=[self._output_height, self._output_width],
aug_scale_min=1.0,
aug_scale_max=1.0)
unpad_image_shape = tf.cast(tf.shape(image), tf.float32)
# Resizes and crops boxes.
image_scale = image_info[2, :]
offset = image_info[3, :]
boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_scale,
image_info[1, :], offset)
# Filters out ground truth boxes that are all zeros.
indices = box_ops.get_non_empty_box_indices(boxes)
boxes = tf.gather(boxes, indices)
classes = tf.gather(classes, indices)
labels = self._build_label(
unpad_image_shape=unpad_image_shape,
boxes=boxes,
classes=classes,
image_info=image_info,
data=data)
if self._bgr_ordering:
red, green, blue = tf.unstack(image, num=3, axis=2)
image = tf.stack([blue, green, red], axis=2)
image = preprocess_ops.normalize_image(
image=image,
offset=self._channel_means,
scale=self._channel_stds)
image = tf.cast(image, self._dtype)
return image, labels
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Losses for centernet model."""
import tensorflow as tf
class PenaltyReducedLogisticFocalLoss(object):
"""Penalty-reduced pixelwise logistic regression with focal loss."""
def __init__(self, alpha=2.0, beta=4.0, sigmoid_clip_value=1e-4):
"""Constructor.
The loss is defined in Equation (1) of the Objects as Points[1] paper.
Although the loss is defined per-pixel in the output space, this class
assumes that each pixel is an anchor to be compatible with the base class.
[1]: https://arxiv.org/abs/1904.07850
Args:
alpha: Focussing parameter of the focal loss. Increasing this will
decrease the loss contribution of the well classified examples.
beta: The local penalty reduction factor. Increasing this will decrease
the contribution of loss due to negative pixels near the keypoint.
sigmoid_clip_value: The sigmoid operation used internally will be clipped
between [sigmoid_clip_value, 1 - sigmoid_clip_value)
"""
self._alpha = alpha
self._beta = beta
self._sigmoid_clip_value = sigmoid_clip_value
super(PenaltyReducedLogisticFocalLoss, self).__init__()
def __call__(self, prediction_tensor, target_tensor, weights=1.0):
"""Compute loss function.
In all input tensors, `num_anchors` is the total number of pixels in the
the output space.
Args:
prediction_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing the predicted unscaled logits for each class.
The function will compute sigmoid on this tensor internally.
target_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing a tensor with the 'splatted' keypoints,
possibly using a gaussian kernel. This function assumes that
the target is bounded between [0, 1].
weights: a float tensor of shape, either [batch_size, num_anchors,
num_classes] or [batch_size, num_anchors, 1]. If the shape is
[batch_size, num_anchors, 1], all the classses are equally weighted.
Returns:
loss: a float tensor of shape [batch_size, num_anchors, num_classes]
representing the value of the loss function.
"""
with tf.name_scope('prlf_loss'):
is_present_tensor = tf.math.equal(target_tensor, 1.0)
prediction_tensor = tf.clip_by_value(tf.sigmoid(prediction_tensor),
self._sigmoid_clip_value,
1 - self._sigmoid_clip_value)
positive_loss = (tf.math.pow((1 - prediction_tensor), self._alpha) *
tf.math.log(prediction_tensor))
negative_loss = (tf.math.pow((1 - target_tensor), self._beta) *
tf.math.pow(prediction_tensor, self._alpha) *
tf.math.log(1 - prediction_tensor))
loss = -tf.where(is_present_tensor, positive_loss, negative_loss)
return loss * weights
class L1LocalizationLoss(object):
"""L1 loss or absolute difference."""
def __call__(self, prediction_tensor, target_tensor, weights=1.0):
"""Compute loss function.
When used in a per-pixel manner, each pixel should be given as an anchor.
Args:
prediction_tensor: A float tensor of shape [batch_size, num_anchors]
representing the (encoded) predicted locations of objects.
target_tensor: A float tensor of shape [batch_size, num_anchors]
representing the regression targets
weights: a float tensor of shape [batch_size, num_anchors]
Returns:
loss: a float tensor of shape [batch_size, num_anchors] tensor
representing the value of the loss function.
"""
with tf.name_scope('l1l_loss'):
return tf.compat.v1.losses.absolute_difference(
labels=target_tensor,
predictions=prediction_tensor,
weights=weights,
reduction=tf.losses.Reduction.NONE
)
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for losses of centernet model."""
import numpy as np
import tensorflow as tf
from official.vision.beta.projects.centernet.losses import centernet_losses
LOG_2 = np.log(2)
LOG_3 = np.log(3)
class L1LocalizationLossTest(tf.test.TestCase):
def test_returns_correct_loss(self):
def graph_fn():
loss = centernet_losses.L1LocalizationLoss()
pred = [[0.1, 0.2], [0.7, 0.5]]
target = [[0.9, 1.0], [0.1, 0.4]]
weights = [[1.0, 0.0], [1.0, 1.0]]
return loss(pred, target, weights=weights)
computed_value = graph_fn()
self.assertAllClose(computed_value, [[0.8, 0.0], [0.6, 0.1]], rtol=1e-6)
class PenaltyReducedLogisticFocalLossTest(tf.test.TestCase):
"""Testing loss function."""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._prediction = np.array([
# First batch
[[1 / 2, 1 / 4, 3 / 4],
[3 / 4, 1 / 3, 1 / 3]],
# Second Batch
[[0.0, 1.0, 1 / 2],
[3 / 4, 2 / 3, 1 / 3]]], np.float32)
self._prediction = np.log(self._prediction / (1 - self._prediction))
self._target = np.array([
# First batch
[[1.0, 0.91, 1.0],
[0.36, 0.84, 1.0]],
# Second Batch
[[0.01, 1.0, 0.75],
[0.96, 1.0, 1.0]]], np.float32)
def test_returns_correct_loss(self):
def graph_fn(prediction, target):
weights = tf.constant([
[[1.0], [1.0]],
[[1.0], [1.0]],
])
loss = centernet_losses.PenaltyReducedLogisticFocalLoss(
alpha=2.0, beta=0.5)
computed_value = loss(prediction, target, weights=weights)
return computed_value
computed_value = graph_fn(self._prediction, self._target)
expected_value = np.array([
# First batch
[[1 / 4 * LOG_2,
0.3 * 0.0625 * (2 * LOG_2 - LOG_3),
1 / 16 * (2 * LOG_2 - LOG_3)],
[0.8 * 9 / 16 * 2 * LOG_2,
0.4 * 1 / 9 * (LOG_3 - LOG_2),
4 / 9 * LOG_3]],
# Second Batch
[[0.0,
0.0,
1 / 2 * 1 / 4 * LOG_2],
[0.2 * 9 / 16 * 2 * LOG_2,
1 / 9 * (LOG_3 - LOG_2),
4 / 9 * LOG_3]]])
self.assertAllClose(expected_value, computed_value, rtol=1e-3, atol=1e-3)
def test_returns_correct_loss_weighted(self):
def graph_fn(prediction, target):
weights = tf.constant([
[[1.0, 0.0, 1.0], [0.0, 0.0, 1.0]],
[[1.0, 1.0, 1.0], [0.0, 0.0, 0.0]],
])
loss = centernet_losses.PenaltyReducedLogisticFocalLoss(
alpha=2.0, beta=0.5)
computed_value = loss(prediction, target, weights=weights)
return computed_value
computed_value = graph_fn(self._prediction, self._target)
expected_value = np.array([
# First batch
[[1 / 4 * LOG_2,
0.0,
1 / 16 * (2 * LOG_2 - LOG_3)],
[0.0,
0.0,
4 / 9 * LOG_3]],
# Second Batch
[[0.0,
0.0,
1 / 2 * 1 / 4 * LOG_2],
[0.0,
0.0,
0.0]]])
self.assertAllClose(expected_value, computed_value, rtol=1e-3, atol=1e-3)
if __name__ == '__main__':
tf.test.main()
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Build Hourglass backbone."""
from typing import Optional
import tensorflow as tf
from official.modeling import hyperparams
from official.vision.beta.modeling.backbones import factory
from official.vision.beta.modeling.backbones import mobilenet
from official.vision.beta.modeling.layers import nn_blocks
from official.vision.beta.projects.centernet.modeling.layers import cn_nn_blocks
HOURGLASS_SPECS = {
10: {
'blocks_per_stage': [1, 1],
'channel_dims_per_stage': [2, 2]
},
20: {
'blocks_per_stage': [1, 2, 2],
'channel_dims_per_stage': [2, 2, 3]
},
32: {
'blocks_per_stage': [2, 2, 2, 2],
'channel_dims_per_stage': [2, 2, 3, 3]
},
52: {
'blocks_per_stage': [2, 2, 2, 2, 2, 4],
'channel_dims_per_stage': [2, 2, 3, 3, 3, 4]
},
100: {
'blocks_per_stage': [4, 4, 4, 4, 4, 8],
'channel_dims_per_stage': [2, 2, 3, 3, 3, 4]
},
}
@tf.keras.utils.register_keras_serializable(package='centernet')
class Hourglass(tf.keras.Model):
"""CenterNet Hourglass backbone."""
def __init__(
self,
model_id: int,
input_channel_dims: int,
input_specs=tf.keras.layers.InputSpec(shape=[None, None, None, 3]),
num_hourglasses: int = 1,
initial_downsample: bool = True,
activation: str = 'relu',
use_sync_bn: bool = True,
norm_momentum=0.1,
norm_epsilon=1e-5,
kernel_initializer: str = 'VarianceScaling',
kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
**kwargs):
"""Initialize Hourglass backbone.
Args:
model_id: An `int` of the scale of Hourglass backbone model.
input_channel_dims: `int`, number of filters used to downsample the
input image.
input_specs: A `tf.keras.layers.InputSpec` of specs of the input tensor.
num_hourglasses: `int``, number of hourglass blocks in backbone. For
example, hourglass-104 has two hourglass-52 modules.
initial_downsample: `bool`, whether or not to downsample the input.
activation: A `str` name of the activation function.
use_sync_bn: If True, use synchronized batch normalization.
norm_momentum: `float`, momentum for the batch normalization layers.
norm_epsilon: `float`, epsilon for the batch normalization layers.
kernel_initializer: A `str` for kernel initializer of conv layers.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default to None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
Default to None.
**kwargs: Additional keyword arguments to be passed.
"""
self._input_channel_dims = input_channel_dims
self._model_id = model_id
self._num_hourglasses = num_hourglasses
self._initial_downsample = initial_downsample
self._activation = activation
self._kernel_initializer = kernel_initializer
self._kernel_regularizer = kernel_regularizer
self._bias_regularizer = bias_regularizer
self._use_sync_bn = use_sync_bn
self._norm_momentum = norm_momentum
self._norm_epsilon = norm_epsilon
specs = HOURGLASS_SPECS[model_id]
self._blocks_per_stage = specs['blocks_per_stage']
self._channel_dims_per_stage = [item * self._input_channel_dims
for item in specs['channel_dims_per_stage']]
inputs = tf.keras.layers.Input(shape=input_specs.shape[1:])
inp_filters = self._channel_dims_per_stage[0]
# Downsample the input
if initial_downsample:
prelayer_kernel_size = 7
prelayer_strides = 2
else:
prelayer_kernel_size = 3
prelayer_strides = 1
x_downsampled = mobilenet.Conv2DBNBlock(
filters=self._input_channel_dims,
kernel_size=prelayer_kernel_size,
strides=prelayer_strides,
use_explicit_padding=True,
activation=self._activation,
bias_regularizer=self._bias_regularizer,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
use_sync_bn=self._use_sync_bn,
norm_momentum=self._norm_momentum,
norm_epsilon=self._norm_epsilon)(inputs)
x_downsampled = nn_blocks.ResidualBlock(
filters=inp_filters,
use_projection=True,
use_explicit_padding=True,
strides=prelayer_strides,
bias_regularizer=self._bias_regularizer,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
use_sync_bn=self._use_sync_bn,
norm_momentum=self._norm_momentum,
norm_epsilon=self._norm_epsilon)(x_downsampled)
all_heatmaps = {}
for i in range(num_hourglasses):
# Create an hourglass stack
x_hg = cn_nn_blocks.HourglassBlock(
channel_dims_per_stage=self._channel_dims_per_stage,
blocks_per_stage=self._blocks_per_stage,
)(x_downsampled)
x_hg = mobilenet.Conv2DBNBlock(
filters=inp_filters,
kernel_size=3,
strides=1,
use_explicit_padding=True,
activation=self._activation,
bias_regularizer=self._bias_regularizer,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
use_sync_bn=self._use_sync_bn,
norm_momentum=self._norm_momentum,
norm_epsilon=self._norm_epsilon
)(x_hg)
# Given two down-sampling blocks above, the starting level is set to 2
# To make it compatible with implementation of remaining backbones, the
# output of hourglass backbones is organized as
# '2' -> the last layer of output
# '2_0' -> the first layer of output
# ......
# '2_{num_hourglasses-2}' -> the second to last layer of output
if i < num_hourglasses - 1:
all_heatmaps['2_{}'.format(i)] = x_hg
else:
all_heatmaps['2'] = x_hg
# Intermediate conv and residual layers between hourglasses
if i < num_hourglasses - 1:
inter_hg_conv1 = mobilenet.Conv2DBNBlock(
filters=inp_filters,
kernel_size=1,
strides=1,
activation='identity',
bias_regularizer=self._bias_regularizer,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
use_sync_bn=self._use_sync_bn,
norm_momentum=self._norm_momentum,
norm_epsilon=self._norm_epsilon
)(x_downsampled)
inter_hg_conv2 = mobilenet.Conv2DBNBlock(
filters=inp_filters,
kernel_size=1,
strides=1,
activation='identity',
bias_regularizer=self._bias_regularizer,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
use_sync_bn=self._use_sync_bn,
norm_momentum=self._norm_momentum,
norm_epsilon=self._norm_epsilon
)(x_hg)
x_downsampled = tf.keras.layers.Add()([inter_hg_conv1, inter_hg_conv2])
x_downsampled = tf.keras.layers.ReLU()(x_downsampled)
x_downsampled = nn_blocks.ResidualBlock(
filters=inp_filters,
use_projection=False,
use_explicit_padding=True,
strides=1,
bias_regularizer=self._bias_regularizer,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
use_sync_bn=self._use_sync_bn,
norm_momentum=self._norm_momentum,
norm_epsilon=self._norm_epsilon
)(x_downsampled)
self._output_specs = {l: all_heatmaps[l].get_shape() for l in all_heatmaps}
super().__init__(inputs=inputs, outputs=all_heatmaps, **kwargs)
def get_config(self):
config = {
'model_id': self._model_id,
'input_channel_dims': self._input_channel_dims,
'num_hourglasses': self._num_hourglasses,
'initial_downsample': self._initial_downsample,
'kernel_initializer': self._kernel_initializer,
'kernel_regularizer': self._kernel_regularizer,
'bias_regularizer': self._bias_regularizer,
'use_sync_bn': self._use_sync_bn,
'norm_momentum': self._norm_momentum,
'norm_epsilon': self._norm_epsilon
}
config.update(super(Hourglass, self).get_config())
return config
@property
def num_hourglasses(self):
return self._num_hourglasses
@property
def output_specs(self):
return self._output_specs
@factory.register_backbone_builder('hourglass')
def build_hourglass(
input_specs: tf.keras.layers.InputSpec,
backbone_config: hyperparams.Config,
norm_activation_config: hyperparams.Config,
l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None
) -> tf.keras.Model:
"""Builds Hourglass backbone from a configuration."""
backbone_type = backbone_config.type
backbone_cfg = backbone_config.get()
assert backbone_type == 'hourglass', (f'Inconsistent backbone type '
f'{backbone_type}')
return Hourglass(
model_id=backbone_cfg.model_id,
input_channel_dims=backbone_cfg.input_channel_dims,
num_hourglasses=backbone_cfg.num_hourglasses,
input_specs=input_specs,
initial_downsample=backbone_cfg.initial_downsample,
activation=norm_activation_config.activation,
use_sync_bn=norm_activation_config.use_sync_bn,
norm_momentum=norm_activation_config.norm_momentum,
norm_epsilon=norm_activation_config.norm_epsilon,
kernel_regularizer=l2_regularizer,
)
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for hourglass module."""
from absl.testing import parameterized
import numpy as np
import tensorflow as tf
from official.vision.beta.configs import common
from official.vision.beta.projects.centernet.common import registry_imports # pylint: disable=unused-import
from official.vision.beta.projects.centernet.configs import backbones
from official.vision.beta.projects.centernet.modeling.backbones import hourglass
class HourglassTest(tf.test.TestCase, parameterized.TestCase):
def test_hourglass(self):
backbone = hourglass.build_hourglass(
input_specs=tf.keras.layers.InputSpec(shape=[None, 512, 512, 3]),
backbone_config=backbones.Backbone(type='hourglass'),
norm_activation_config=common.NormActivation(use_sync_bn=True)
)
inputs = np.zeros((2, 512, 512, 3), dtype=np.float32)
outputs = backbone(inputs)
self.assertEqual(outputs['2_0'].shape, (2, 128, 128, 256))
self.assertEqual(outputs['2'].shape, (2, 128, 128, 256))
if __name__ == '__main__':
tf.test.main()
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Centernet detection models."""
from typing import Mapping, Union, Any
import tensorflow as tf
class CenterNetModel(tf.keras.Model):
"""CenterNet Model."""
def __init__(self,
backbone: tf.keras.Model,
head: tf.keras.Model,
detection_generator: tf.keras.layers.Layer,
**kwargs):
"""CenterNet Model.
Args:
backbone: a backbone network.
head: a projection head for centernet.
detection_generator: a detection generator for centernet.
**kwargs: keyword arguments to be passed.
"""
super(CenterNetModel, self).__init__(**kwargs)
# model components
self._backbone = backbone
self._detection_generator = detection_generator
self._head = head
def call(self,
inputs: tf.Tensor,
training: bool = None,
**kwargs) -> Mapping[str, tf.Tensor]:
features = self._backbone(inputs)
raw_outputs = self._head(features)
model_outputs = {'raw_output': raw_outputs}
if not training:
predictions = self._detection_generator(raw_outputs)
model_outputs.update(predictions)
return model_outputs
@property
def checkpoint_items(
self) -> Mapping[str, Union[tf.keras.Model, tf.keras.layers.Layer]]:
"""Returns a dictionary of items to be additionally checkpointed."""
items = dict(backbone=self.backbone, head=self.head)
return items
@property
def backbone(self):
return self._backbone
@property
def detection_generator(self):
return self._detection_generator
@property
def head(self):
return self._head
def get_config(self) -> Mapping[str, Any]:
config_dict = {
'backbone': self._backbone,
'head': self._head,
'detection_generator': self._detection_generator,
}
return config_dict
@classmethod
def from_config(cls, config, custom_objects=None):
return cls(**config)
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Test for centernet detection model."""
from absl.testing import parameterized
import tensorflow as tf
from official.vision.beta.configs import common
from official.vision.beta.projects.centernet.configs import backbones
from official.vision.beta.projects.centernet.modeling import centernet_model
from official.vision.beta.projects.centernet.modeling.backbones import hourglass
from official.vision.beta.projects.centernet.modeling.heads import centernet_head
from official.vision.beta.projects.centernet.modeling.layers import detection_generator
class CenterNetTest(parameterized.TestCase, tf.test.TestCase):
def testBuildCenterNet(self):
backbone = hourglass.build_hourglass(
input_specs=tf.keras.layers.InputSpec(shape=[None, 512, 512, 3]),
backbone_config=backbones.Backbone(type='hourglass'),
norm_activation_config=common.NormActivation(use_sync_bn=True)
)
task_config = {
'ct_heatmaps': 90,
'ct_offset': 2,
'ct_size': 2,
}
input_levels = ['2_0', '2']
head = centernet_head.CenterNetHead(
task_outputs=task_config,
input_specs=backbone.output_specs,
input_levels=input_levels)
detection_ge = detection_generator.CenterNetDetectionGenerator()
model = centernet_model.CenterNetModel(
backbone=backbone,
head=head,
detection_generator=detection_ge
)
outputs = model(tf.zeros((5, 512, 512, 3)))
self.assertLen(outputs['raw_output'], 3)
self.assertLen(outputs['raw_output']['ct_heatmaps'], 2)
self.assertLen(outputs['raw_output']['ct_offset'], 2)
self.assertLen(outputs['raw_output']['ct_size'], 2)
self.assertEqual(outputs['raw_output']['ct_heatmaps'][0].shape,
(5, 128, 128, 90))
self.assertEqual(outputs['raw_output']['ct_offset'][0].shape,
(5, 128, 128, 2))
self.assertEqual(outputs['raw_output']['ct_size'][0].shape,
(5, 128, 128, 2))
if __name__ == '__main__':
tf.test.main()
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains the definitions of head for CenterNet."""
from typing import Any, Mapping, Dict, List
import tensorflow as tf
from official.vision.beta.projects.centernet.modeling.layers import cn_nn_blocks
@tf.keras.utils.register_keras_serializable(package='centernet')
class CenterNetHead(tf.keras.Model):
"""CenterNet Head."""
def __init__(self,
input_specs: Dict[str, tf.TensorShape],
task_outputs: Mapping[str, int],
input_levels: List[str],
heatmap_bias: float = -2.19,
**kwargs):
"""CenterNet Head Initialization.
Args:
input_specs: A `dict` of input specifications.
task_outputs: A `dict`, with key-value pairs denoting the names of the
outputs and the desired channel depth of each output.
input_levels: list of str representing the level used as input to the
CenternetHead from the backbone. For example, ['2_0', '2'] should be
set for hourglass-104 has two hourglass-52 modules, since the output
of hourglass backbones is organized as:
'2' -> the last layer of output
'2_0' -> the first layer of output
......
'2_{num_hourglasses-2}' -> the second to last layer of output.
heatmap_bias: `float`, constant value to initialize the convolution layer
bias vector if it is responsible for generating a heatmap (not for
regressed predictions).
**kwargs: Additional keyword arguments to be passed.
Returns:
dictionary where the keys-value pairs denote the names of the output
and the respective output tensor
"""
assert input_levels, f'Please specify input levels: {input_levels}'
self._input_specs = input_specs
self._task_outputs = task_outputs
self._input_levels = input_levels
self._heatmap_bias = heatmap_bias
self._num_inputs = len(input_levels)
input_levels = sorted(self._input_specs.keys())
inputs = {level: tf.keras.layers.Input(shape=self._input_specs[level][1:])
for level in input_levels}
outputs = {}
for key in self._task_outputs:
# pylint: disable=g-complex-comprehension
outputs[key] = [
cn_nn_blocks.CenterNetHeadConv(
output_filters=self._task_outputs[key],
bias_init=self._heatmap_bias if 'heatmaps' in key else 0,
name=key + str(i),
)(inputs[i])
for i in input_levels
]
self._output_specs = {
key: [value[i].get_shape() for i in range(self._num_inputs)]
for key, value in outputs.items()
}
super().__init__(inputs=inputs, outputs=outputs,
name='CenterNetHead', **kwargs)
def get_config(self) -> Mapping[str, Any]:
config = {
'input_spec': self._input_specs,
'task_outputs': self._task_outputs,
'heatmap_bias': self._heatmap_bias,
'input_levels': self._input_levels,
}
base_config = super(CenterNetHead, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
@classmethod
def from_config(cls, config, custom_objects=None):
return cls(**config)
@property
def output_specs(self) -> Mapping[str, tf.TensorShape]:
"""A dict of {level: TensorShape} pairs for the model output."""
return self._output_specs
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for Centernet Head."""
from absl.testing import parameterized
import numpy as np
import tensorflow as tf
from official.vision.beta.projects.centernet.modeling.heads import centernet_head
class CenterNetHeadTest(tf.test.TestCase, parameterized.TestCase):
def test_decoder_shape(self):
task_config = {
'ct_heatmaps': 90,
'ct_offset': 2,
'ct_size': 2,
}
input_specs = {
'2_0': tf.keras.layers.InputSpec(shape=(None, 128, 128, 256)).shape,
'2': tf.keras.layers.InputSpec(shape=(None, 128, 128, 256)).shape,
}
input_levels = ['2', '2_0']
head = centernet_head.CenterNetHead(
task_outputs=task_config,
input_specs=input_specs,
input_levels=input_levels)
config = head.get_config()
self.assertEqual(config['heatmap_bias'], -2.19)
# Output shape tests
outputs = head([np.zeros((2, 128, 128, 256), dtype=np.float32),
np.zeros((2, 128, 128, 256), dtype=np.float32)])
self.assertLen(outputs, 3)
self.assertEqual(outputs['ct_heatmaps'][0].shape, (2, 128, 128, 90))
self.assertEqual(outputs['ct_offset'][0].shape, (2, 128, 128, 2))
self.assertEqual(outputs['ct_size'][0].shape, (2, 128, 128, 2))
# Weight initialization tests
hm_bias_vector = np.asarray(head.layers[2].weights[-1])
off_bias_vector = np.asarray(head.layers[4].weights[-1])
size_bias_vector = np.asarray(head.layers[6].weights[-1])
self.assertArrayNear(hm_bias_vector,
np.repeat(-2.19, repeats=90), err=1.00e-6)
self.assertArrayNear(off_bias_vector,
np.repeat(0, repeats=2), err=1.00e-6)
self.assertArrayNear(size_bias_vector,
np.repeat(0, repeats=2), err=1.00e-6)
if __name__ == '__main__':
tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment