Commit c6d7d57d authored by A. Unique TensorFlower's avatar A. Unique TensorFlower
Browse files

Merge pull request #10251 from PurdueDualityLab:loss_fn_pr

PiperOrigin-RevId: 396512110
parents 31fb7a65 7f90664e
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
This diff is collapsed.
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for yolo heads."""
from absl.testing import parameterized
import tensorflow as tf
from official.vision.beta.projects.yolo.losses import yolo_loss
class YoloDecoderTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(
(True),
(False),
)
def test_loss_init(self, scaled):
"""Test creation of YOLO family models."""
def inpdict(input_shape, dtype=tf.float32):
inputs = {}
for key in input_shape:
inputs[key] = tf.ones(input_shape[key], dtype=dtype)
return inputs
tf.keras.backend.set_image_data_format('channels_last')
input_shape = {
'3': [1, 52, 52, 255],
'4': [1, 26, 26, 255],
'5': [1, 13, 13, 255]
}
classes = 80
masks = {'3': [0, 1, 2], '4': [3, 4, 5], '5': [6, 7, 8]}
anchors = [[12.0, 19.0], [31.0, 46.0], [96.0, 54.0], [46.0, 114.0],
[133.0, 127.0], [79.0, 225.0], [301.0, 150.0], [172.0, 286.0],
[348.0, 340.0]]
keys = ['3', '4', '5']
path_strides = {key: 2**int(key) for key in keys}
loss = yolo_loss.YoloLoss(
keys,
classes,
anchors,
masks=masks,
path_strides=path_strides,
truth_thresholds={key: 1.0 for key in keys},
ignore_thresholds={key: 0.7 for key in keys},
loss_types={key: 'ciou' for key in keys},
iou_normalizers={key: 0.05 for key in keys},
cls_normalizers={key: 0.5 for key in keys},
obj_normalizers={key: 1.0 for key in keys},
objectness_smooths={key: 1.0 for key in keys},
box_types={key: 'scaled' for key in keys},
scale_xys={key: 2.0 for key in keys},
max_deltas={key: 30.0 for key in keys},
label_smoothing=0.0,
use_scaled_loss=scaled,
update_on_repeat=True)
count = inpdict({
'3': [1, 52, 52, 3, 1],
'4': [1, 26, 26, 3, 1],
'5': [1, 13, 13, 3, 1]
})
ind = inpdict({
'3': [1, 300, 3],
'4': [1, 300, 3],
'5': [1, 300, 3]
}, tf.int32)
truths = inpdict({'3': [1, 300, 8], '4': [1, 300, 8], '5': [1, 300, 8]})
boxes = tf.ones([1, 300, 4], dtype=tf.float32)
classes = tf.ones([1, 300], dtype=tf.float32)
gt = {
'true_conf': count,
'inds': ind,
'upds': truths,
'bbox': boxes,
'classes': classes
}
_, _, _ = loss(gt, inpdict(input_shape))
if __name__ == '__main__':
tf.test.main()
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# Lint as: python3
"""Contains definitions of Darknet Backbone Networks. """Contains definitions of Darknet Backbone Networks.
The models are inspired by ResNet and CSPNet. The models are inspired by ResNet and CSPNet.
...@@ -390,7 +389,7 @@ class Darknet(tf.keras.Model): ...@@ -390,7 +389,7 @@ class Darknet(tf.keras.Model):
norm_momentum=0.99, norm_momentum=0.99,
norm_epsilon=0.001, norm_epsilon=0.001,
dilate=False, dilate=False,
kernel_initializer='glorot_uniform', kernel_initializer='VarianceScaling',
kernel_regularizer=None, kernel_regularizer=None,
bias_regularizer=None, bias_regularizer=None,
**kwargs): **kwargs):
...@@ -507,10 +506,12 @@ class Darknet(tf.keras.Model): ...@@ -507,10 +506,12 @@ class Darknet(tf.keras.Model):
self._default_dict['name'] = f'{name}_csp_down' self._default_dict['name'] = f'{name}_csp_down'
if self._dilate: if self._dilate:
self._default_dict['dilation_rate'] = config.dilation_rate self._default_dict['dilation_rate'] = config.dilation_rate
degrid = int(tf.math.log(float(config.dilation_rate)) / tf.math.log(2.))
else: else:
self._default_dict['dilation_rate'] = 1 self._default_dict['dilation_rate'] = 1
degrid = 0
# swap/add dilation # swap/add dialation
x, x_route = nn_blocks.CSPRoute( x, x_route = nn_blocks.CSPRoute(
filters=config.filters, filters=config.filters,
filter_scale=csp_filter_scale, filter_scale=csp_filter_scale,
...@@ -518,7 +519,7 @@ class Darknet(tf.keras.Model): ...@@ -518,7 +519,7 @@ class Darknet(tf.keras.Model):
**self._default_dict)( **self._default_dict)(
inputs) inputs)
dilated_reps = config.repetitions - self._default_dict['dilation_rate'] // 2 dilated_reps = config.repetitions - degrid
for i in range(dilated_reps): for i in range(dilated_reps):
self._default_dict['name'] = f'{name}_{i}' self._default_dict['name'] = f'{name}_{i}'
x = nn_blocks.DarkResidual( x = nn_blocks.DarkResidual(
...@@ -528,8 +529,8 @@ class Darknet(tf.keras.Model): ...@@ -528,8 +529,8 @@ class Darknet(tf.keras.Model):
x) x)
for i in range(dilated_reps, config.repetitions): for i in range(dilated_reps, config.repetitions):
self._default_dict[ self._default_dict['dilation_rate'] = max(
'dilation_rate'] = self._default_dict['dilation_rate'] // 2 1, self._default_dict['dilation_rate'] // 2)
self._default_dict[ self._default_dict[
'name'] = f"{name}_{i}_degridded_{self._default_dict['dilation_rate']}" 'name'] = f"{name}_{i}_degridded_{self._default_dict['dilation_rate']}"
x = nn_blocks.DarkResidual( x = nn_blocks.DarkResidual(
...@@ -592,8 +593,8 @@ class Darknet(tf.keras.Model): ...@@ -592,8 +593,8 @@ class Darknet(tf.keras.Model):
filters=config.filters, downsample=True, **self._default_dict)( filters=config.filters, downsample=True, **self._default_dict)(
inputs) inputs)
dilated_reps = config.repetitions - ( dilated_reps = config.repetitions - self._default_dict[
self._default_dict['dilation_rate'] // 2) - 1 'dilation_rate'] // 2 - 1
for i in range(dilated_reps): for i in range(dilated_reps):
self._default_dict['name'] = f'{name}_{i}' self._default_dict['name'] = f'{name}_{i}'
x = nn_blocks.DarkResidual( x = nn_blocks.DarkResidual(
...@@ -661,12 +662,13 @@ class Darknet(tf.keras.Model): ...@@ -661,12 +662,13 @@ class Darknet(tf.keras.Model):
@factory.register_backbone_builder('darknet') @factory.register_backbone_builder('darknet')
def build_darknet( def build_darknet(
input_specs: tf.keras.layers.InputSpec, input_specs: tf.keras.layers.InputSpec,
backbone_config: hyperparams.Config, backbone_cfg: hyperparams.Config,
norm_activation_config: hyperparams.Config, norm_activation_config: hyperparams.Config,
l2_regularizer: tf.keras.regularizers.Regularizer = None) -> tf.keras.Model: l2_regularizer: tf.keras.regularizers.Regularizer = None) -> tf.keras.Model:
"""Builds darknet.""" """Builds darknet."""
backbone_cfg = backbone_config.get() backbone_cfg = backbone_cfg.get()
model = Darknet( model = Darknet(
model_id=backbone_cfg.model_id, model_id=backbone_cfg.model_id,
min_level=backbone_cfg.min_level, min_level=backbone_cfg.min_level,
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# Lint as: python3
"""Feature Pyramid Network and Path Aggregation variants used in YOLO.""" """Feature Pyramid Network and Path Aggregation variants used in YOLO."""
import tensorflow as tf import tensorflow as tf
...@@ -39,7 +38,7 @@ class YoloFPN(tf.keras.layers.Layer): ...@@ -39,7 +38,7 @@ class YoloFPN(tf.keras.layers.Layer):
use_sync_bn=False, use_sync_bn=False,
norm_momentum=0.99, norm_momentum=0.99,
norm_epsilon=0.001, norm_epsilon=0.001,
kernel_initializer='glorot_uniform', kernel_initializer='VarianceScaling',
kernel_regularizer=None, kernel_regularizer=None,
bias_regularizer=None, bias_regularizer=None,
**kwargs): **kwargs):
...@@ -184,7 +183,7 @@ class YoloPAN(tf.keras.layers.Layer): ...@@ -184,7 +183,7 @@ class YoloPAN(tf.keras.layers.Layer):
use_sync_bn=False, use_sync_bn=False,
norm_momentum=0.99, norm_momentum=0.99,
norm_epsilon=0.001, norm_epsilon=0.001,
kernel_initializer='glorot_uniform', kernel_initializer='VarianceScaling',
kernel_regularizer=None, kernel_regularizer=None,
bias_regularizer=None, bias_regularizer=None,
fpn_input=True, fpn_input=True,
...@@ -206,7 +205,7 @@ class YoloPAN(tf.keras.layers.Layer): ...@@ -206,7 +205,7 @@ class YoloPAN(tf.keras.layers.Layer):
by zero. by zero.
kernel_initializer: kernel_initializer for convolutional layers. kernel_initializer: kernel_initializer for convolutional layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D. kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d. bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
fpn_input: `bool`, for whether the input into this fucntion is an FPN or fpn_input: `bool`, for whether the input into this fucntion is an FPN or
a backbone. a backbone.
fpn_filter_scale: `int`, scaling factor for the FPN filters. fpn_filter_scale: `int`, scaling factor for the FPN filters.
...@@ -374,7 +373,7 @@ class YoloDecoder(tf.keras.Model): ...@@ -374,7 +373,7 @@ class YoloDecoder(tf.keras.Model):
use_sync_bn=False, use_sync_bn=False,
norm_momentum=0.99, norm_momentum=0.99,
norm_epsilon=0.001, norm_epsilon=0.001,
kernel_initializer='glorot_uniform', kernel_initializer='VarianceScaling',
kernel_regularizer=None, kernel_regularizer=None,
bias_regularizer=None, bias_regularizer=None,
**kwargs): **kwargs):
...@@ -389,8 +388,8 @@ class YoloDecoder(tf.keras.Model): ...@@ -389,8 +388,8 @@ class YoloDecoder(tf.keras.Model):
use_fpn: `bool`, use the FPN found in the YoloV4 model. use_fpn: `bool`, use the FPN found in the YoloV4 model.
use_spatial_attention: `bool`, use the spatial attention module. use_spatial_attention: `bool`, use the spatial attention module.
csp_stack: `bool`, CSPize the FPN. csp_stack: `bool`, CSPize the FPN.
fpn_depth: `int`, number of layers ot use in each FPN path fpn_depth: `int`, number of layers ot use in each FPN path if you choose
if you choose to use an FPN. to use an FPN.
fpn_filter_scale: `int`, scaling factor for the FPN filters. fpn_filter_scale: `int`, scaling factor for the FPN filters.
path_process_len: `int`, number of layers ot use in each Decoder path. path_process_len: `int`, number of layers ot use in each Decoder path.
max_level_process_len: `int`, number of layers ot use in the largest max_level_process_len: `int`, number of layers ot use in the largest
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# Lint as: python3
"""Yolo heads.""" """Yolo heads."""
import tensorflow as tf import tensorflow as tf
...@@ -30,10 +29,11 @@ class YoloHead(tf.keras.layers.Layer): ...@@ -30,10 +29,11 @@ class YoloHead(tf.keras.layers.Layer):
output_extras=0, output_extras=0,
norm_momentum=0.99, norm_momentum=0.99,
norm_epsilon=0.001, norm_epsilon=0.001,
kernel_initializer='glorot_uniform', kernel_initializer='VarianceScaling',
kernel_regularizer=None, kernel_regularizer=None,
bias_regularizer=None, bias_regularizer=None,
activation=None, activation=None,
smart_bias=False,
**kwargs): **kwargs):
"""Yolo Prediction Head initialization function. """Yolo Prediction Head initialization function.
...@@ -52,6 +52,7 @@ class YoloHead(tf.keras.layers.Layer): ...@@ -52,6 +52,7 @@ class YoloHead(tf.keras.layers.Layer):
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D. kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d. bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
activation: `str`, the activation function to use typically leaky or mish. activation: `str`, the activation function to use typically leaky or mish.
smart_bias: `bool` whether or not use smart bias.
**kwargs: keyword arguments to be passed. **kwargs: keyword arguments to be passed.
""" """
...@@ -68,6 +69,7 @@ class YoloHead(tf.keras.layers.Layer): ...@@ -68,6 +69,7 @@ class YoloHead(tf.keras.layers.Layer):
self._output_extras = output_extras self._output_extras = output_extras
self._output_conv = (classes + output_extras + 5) * boxes_per_level self._output_conv = (classes + output_extras + 5) * boxes_per_level
self._smart_bias = smart_bias
self._base_config = dict( self._base_config = dict(
activation=activation, activation=activation,
...@@ -85,10 +87,29 @@ class YoloHead(tf.keras.layers.Layer): ...@@ -85,10 +87,29 @@ class YoloHead(tf.keras.layers.Layer):
use_bn=False, use_bn=False,
**self._base_config) **self._base_config)
def bias_init(self, scale, inshape, isize=640, no_per_conf=8):
def bias(shape, dtype):
init = tf.keras.initializers.Zeros()
base = init(shape, dtype=dtype)
if self._smart_bias:
base = tf.reshape(base, [self._boxes_per_level, -1])
box, conf, classes = tf.split(base, [4, 1, -1], axis=-1)
conf += tf.math.log(no_per_conf / ((isize / scale)**2))
classes += tf.math.log(0.6 / (self._classes - 0.99))
base = tf.concat([box, conf, classes], axis=-1)
base = tf.reshape(base, [-1])
return base
return bias
def build(self, input_shape): def build(self, input_shape):
self._head = dict() self._head = dict()
for key in self._key_list: for key in self._key_list:
self._head[key] = nn_blocks.ConvBN(**self._conv_config) scale = 2**int(key)
self._head[key] = nn_blocks.ConvBN(
bias_initializer=self.bias_init(scale, input_shape[key][-1]),
**self._conv_config)
def call(self, inputs): def call(self, inputs):
outputs = dict() outputs = dict()
...@@ -107,6 +128,10 @@ class YoloHead(tf.keras.layers.Layer): ...@@ -107,6 +128,10 @@ class YoloHead(tf.keras.layers.Layer):
'Model has to be built before number of boxes can be determined.') 'Model has to be built before number of boxes can be determined.')
return (self._max_level - self._min_level + 1) * self._boxes_per_level return (self._max_level - self._min_level + 1) * self._boxes_per_level
@property
def num_heads(self):
return self._max_level - self._min_level + 1
def get_config(self): def get_config(self):
config = dict( config = dict(
min_level=self._min_level, min_level=self._min_level,
......
...@@ -15,7 +15,10 @@ ...@@ -15,7 +15,10 @@
"""Contains common building blocks for yolo layer (detection layer).""" """Contains common building blocks for yolo layer (detection layer)."""
import tensorflow as tf import tensorflow as tf
from official.vision.beta.modeling.layers import detection_generator
from official.vision.beta.projects.yolo.losses import yolo_loss
from official.vision.beta.projects.yolo.ops import box_ops from official.vision.beta.projects.yolo.ops import box_ops
from official.vision.beta.projects.yolo.ops import loss_utils
@tf.keras.utils.register_keras_serializable(package='yolo') @tf.keras.utils.register_keras_serializable(package='yolo')
...@@ -36,11 +39,11 @@ class YoloLayer(tf.keras.Model): ...@@ -36,11 +39,11 @@ class YoloLayer(tf.keras.Model):
cls_normalizer=1.0, cls_normalizer=1.0,
obj_normalizer=1.0, obj_normalizer=1.0,
use_scaled_loss=False, use_scaled_loss=False,
darknet=None, update_on_repeat=False,
pre_nms_points=5000, pre_nms_points=5000,
label_smoothing=0.0, label_smoothing=0.0,
max_boxes=200, max_boxes=200,
new_cords=False, box_type='original',
path_scale=None, path_scale=None,
scale_xy=None, scale_xy=None,
nms_type='greedy', nms_type='greedy',
...@@ -70,14 +73,25 @@ class YoloLayer(tf.keras.Model): ...@@ -70,14 +73,25 @@ class YoloLayer(tf.keras.Model):
obj_normalizer: `float` for how much to scale loss on the detection map. obj_normalizer: `float` for how much to scale loss on the detection map.
use_scaled_loss: `bool` for whether to use the scaled loss use_scaled_loss: `bool` for whether to use the scaled loss
or the traditional loss. or the traditional loss.
darknet: `bool` for whether to use the DarkNet or PyTorch loss function update_on_repeat: `bool` indicating how you would like to handle repeated
implementation. indexes in a given [j, i] index. Setting this to True will give more
consistent MAP, setting it to falls will improve recall by 1-2% but will
sacrifice some MAP.
pre_nms_points: `int` number of top candidate detections per class before pre_nms_points: `int` number of top candidate detections per class before
NMS. NMS.
label_smoothing: `float` for how much to smooth the loss on the classes. label_smoothing: `float` for how much to smooth the loss on the classes.
max_boxes: `int` for the maximum number of boxes retained over all max_boxes: `int` for the maximum number of boxes retained over all
classes. classes.
new_cords: `bool` for using the ScaledYOLOv4 coordinates. box_type: `str`, there are 3 different box types that will affect training
differently {original, scaled and anchor_free}. The original method
decodes the boxes by applying an exponential to the model width and
height maps, then scaling the maps by the anchor boxes. This method is
used in Yolo-v4, Yolo-v3, and all its counterparts. The Scale method
squares the width and height and scales both by a fixed factor of 4.
This method is used in the Scale Yolo models, as well as Yolov4-CSP.
Finally, anchor_free is like the original method but will not apply an
activation function to the boxes, this is used for some of the newer
anchor free versions of YOLO.
path_scale: `dict` for the size of the input tensors. Defaults to path_scale: `dict` for the size of the input tensors. Defaults to
precalulated values from the `mask`. precalulated values from the `mask`.
scale_xy: dictionary `float` values inidcating how far each pixel can see scale_xy: dictionary `float` values inidcating how far each pixel can see
...@@ -91,18 +105,6 @@ class YoloLayer(tf.keras.Model): ...@@ -91,18 +105,6 @@ class YoloLayer(tf.keras.Model):
objectness_smooth: `float` for how much to smooth the loss on the objectness_smooth: `float` for how much to smooth the loss on the
detection map. detection map.
**kwargs: Addtional keyword arguments. **kwargs: Addtional keyword arguments.
Return:
loss: `float` for the actual loss.
box_loss: `float` loss on the boxes used for metrics.
conf_loss: `float` loss on the confidence used for metrics.
class_loss: `float` loss on the classes used for metrics.
avg_iou: `float` metric for the average iou between predictions
and ground truth.
avg_obj: `float` metric for the average confidence of the model
for predictions.
recall50: `float` metric for how accurate the model is.
precision50: `float` metric for how precise the model is.
""" """
super().__init__(**kwargs) super().__init__(**kwargs)
self._masks = masks self._masks = masks
...@@ -121,29 +123,18 @@ class YoloLayer(tf.keras.Model): ...@@ -121,29 +123,18 @@ class YoloLayer(tf.keras.Model):
self._loss_type = loss_type self._loss_type = loss_type
self._use_scaled_loss = use_scaled_loss self._use_scaled_loss = use_scaled_loss
self._darknet = darknet self._update_on_repeat = update_on_repeat
self._pre_nms_points = pre_nms_points self._pre_nms_points = pre_nms_points
self._label_smoothing = label_smoothing self._label_smoothing = label_smoothing
self._keys = list(masks.keys()) self._keys = list(masks.keys())
self._len_keys = len(self._keys) self._len_keys = len(self._keys)
self._new_cords = new_cords self._box_type = box_type
self._path_scale = path_scale or { self._path_scale = path_scale or {
key: 2**int(key) for key, _ in masks.items() key: 2**int(key) for key, _ in masks.items()
} }
self._nms_types = { self._nms_type = nms_type
'greedy': 1,
'iou': 2,
'giou': 3,
'ciou': 4,
'diou': 5,
'class_independent': 6,
'weighted_diou': 7
}
self._nms_type = self._nms_types[nms_type]
self._scale_xy = scale_xy or {key: 1.0 for key, _ in masks.items()} self._scale_xy = scale_xy or {key: 1.0 for key, _ in masks.items()}
self._generator = {} self._generator = {}
...@@ -156,27 +147,33 @@ class YoloLayer(tf.keras.Model): ...@@ -156,27 +147,33 @@ class YoloLayer(tf.keras.Model):
return return
def get_generators(self, anchors, path_scale, path_key): def get_generators(self, anchors, path_scale, path_key):
return None anchor_generator = loss_utils.GridGenerator(
anchors, scale_anchors=path_scale)
def rm_nan_inf(self, x, val=0.0): return anchor_generator
x = tf.where(tf.math.is_nan(x), tf.cast(val, dtype=x.dtype), x)
x = tf.where(tf.math.is_inf(x), tf.cast(val, dtype=x.dtype), x)
return x
def parse_prediction_path(self, key, inputs): def parse_prediction_path(self, key, inputs):
shape_ = tf.shape(inputs)
shape = inputs.get_shape().as_list() shape = inputs.get_shape().as_list()
height, width = shape[1], shape[2] batchsize, height, width = shape_[0], shape[1], shape[2]
if height is None or width is None:
height, width = shape_[1], shape_[2]
generator = self._generator[key]
len_mask = self._len_mask[key] len_mask = self._len_mask[key]
scale_xy = self._scale_xy[key]
# reshape the yolo output to (batchsize, # reshape the yolo output to (batchsize,
# width, # width,
# height, # height,
# number_anchors, # number_anchors,
# remaining_points) # remaining_points)
data = tf.reshape(inputs, [-1, height, width, len_mask, self._classes + 5]) data = tf.reshape(inputs, [-1, height, width, len_mask, self._classes + 5])
# use the grid generator to get the formatted anchor boxes and grid points
# in shape [1, height, width, 2]
centers, anchors = generator(height, width, batchsize, dtype=data.dtype)
# split the yolo detections into boxes, object score map, classes # split the yolo detections into boxes, object score map, classes
boxes, obns_scores, class_scores = tf.split( boxes, obns_scores, class_scores = tf.split(
data, [4, 1, self._classes], axis=-1) data, [4, 1, self._classes], axis=-1)
...@@ -184,25 +181,32 @@ class YoloLayer(tf.keras.Model): ...@@ -184,25 +181,32 @@ class YoloLayer(tf.keras.Model):
# determine the number of classes # determine the number of classes
classes = class_scores.get_shape().as_list()[-1] classes = class_scores.get_shape().as_list()[-1]
# configurable to use the new coordinates in scaled Yolo v4 or not
_, _, boxes = loss_utils.get_predicted_box(
tf.cast(height, data.dtype),
tf.cast(width, data.dtype),
boxes,
anchors,
centers,
scale_xy,
stride=self._path_scale[key],
darknet=False,
box_type=self._box_type[key])
# convert boxes from yolo(x, y, w. h) to tensorflow(ymin, xmin, ymax, xmax) # convert boxes from yolo(x, y, w. h) to tensorflow(ymin, xmin, ymax, xmax)
boxes = box_ops.xcycwh_to_yxyx(boxes) boxes = box_ops.xcycwh_to_yxyx(boxes)
# activate and detection map # activate and detection map
obns_scores = tf.math.sigmoid(obns_scores) obns_scores = tf.math.sigmoid(obns_scores)
# threshold the detection map
obns_mask = tf.cast(obns_scores > self._thresh, obns_scores.dtype)
# convert detection map to class detection probabailities # convert detection map to class detection probabailities
class_scores = tf.math.sigmoid(class_scores) * obns_mask * obns_scores class_scores = tf.math.sigmoid(class_scores) * obns_scores
class_scores *= tf.cast(class_scores > self._thresh, class_scores.dtype)
fill = height * width * len_mask
# platten predictions to [batchsize, N, -1] for non max supression # platten predictions to [batchsize, N, -1] for non max supression
fill = height * width * len_mask
boxes = tf.reshape(boxes, [-1, fill, 4]) boxes = tf.reshape(boxes, [-1, fill, 4])
class_scores = tf.reshape(class_scores, [-1, fill, classes]) class_scores = tf.reshape(class_scores, [-1, fill, classes])
obns_scores = tf.reshape(obns_scores, [-1, fill]) obns_scores = tf.reshape(obns_scores, [-1, fill])
return obns_scores, boxes, class_scores return obns_scores, boxes, class_scores
def call(self, inputs): def call(self, inputs):
...@@ -224,26 +228,49 @@ class YoloLayer(tf.keras.Model): ...@@ -224,26 +228,49 @@ class YoloLayer(tf.keras.Model):
# colate all predicitons # colate all predicitons
boxes = tf.concat(boxes, axis=1) boxes = tf.concat(boxes, axis=1)
object_scores = tf.keras.backend.concatenate(object_scores, axis=1) object_scores = tf.concat(object_scores, axis=1)
class_scores = tf.keras.backend.concatenate(class_scores, axis=1) class_scores = tf.concat(class_scores, axis=1)
# get masks to threshold all the predicitons
object_mask = tf.cast(object_scores > self._thresh, object_scores.dtype)
class_mask = tf.cast(class_scores > self._thresh, class_scores.dtype)
# apply thresholds mask to all the predicitons
object_scores *= object_mask
class_scores *= (tf.expand_dims(object_mask, axis=-1) * class_mask)
# apply nms
if self._nms_type == 'greedy':
# greedy NMS # greedy NMS
boxes = tf.cast(boxes, dtype=tf.float32) boxes = tf.cast(boxes, dtype=tf.float32)
class_scores = tf.cast(class_scores, dtype=tf.float32) class_scores = tf.cast(class_scores, dtype=tf.float32)
nms_items = tf.image.combined_non_max_suppression( boxes, object_scores_, class_scores, num_detections = (
tf.image.combined_non_max_suppression(
tf.expand_dims(boxes, axis=-2), tf.expand_dims(boxes, axis=-2),
class_scores, class_scores,
self._pre_nms_points, self._pre_nms_points,
self._max_boxes, self._max_boxes,
iou_threshold=self._nms_thresh, iou_threshold=self._nms_thresh,
score_threshold=self._thresh) score_threshold=self._thresh))
# cast the boxes and predicitons abck to original datatype # cast the boxes and predicitons abck to original datatype
boxes = tf.cast(nms_items.nmsed_boxes, object_scores.dtype) boxes = tf.cast(boxes, object_scores.dtype)
class_scores = tf.cast(nms_items.nmsed_classes, object_scores.dtype) class_scores = tf.cast(class_scores, object_scores.dtype)
object_scores = tf.cast(nms_items.nmsed_scores, object_scores.dtype) object_scores = tf.cast(object_scores_, object_scores.dtype)
else:
# compute the number of valid detections # TPU NMS
num_detections = tf.math.reduce_sum(tf.math.ceil(object_scores), axis=-1) boxes = tf.cast(boxes, dtype=tf.float32)
class_scores = tf.cast(class_scores, dtype=tf.float32)
(boxes, confidence, classes,
num_detections) = detection_generator._generate_detections_v2( # pylint:disable=protected-access
tf.expand_dims(boxes, axis=-2),
class_scores,
pre_nms_top_k=self._pre_nms_points,
max_num_detections=self._max_boxes,
nms_iou_threshold=self._nms_thresh,
pre_nms_score_threshold=self._thresh)
boxes = tf.cast(boxes, object_scores.dtype)
class_scores = tf.cast(classes, object_scores.dtype)
object_scores = tf.cast(confidence, object_scores.dtype)
# format and return # format and return
return { return {
...@@ -258,9 +285,28 @@ class YoloLayer(tf.keras.Model): ...@@ -258,9 +285,28 @@ class YoloLayer(tf.keras.Model):
"""Generates a dictionary of losses to apply to each path. """Generates a dictionary of losses to apply to each path.
Done in the detection generator because all parameters are the same Done in the detection generator because all parameters are the same
across both loss and detection generator. across both loss and detection generator
""" """
return None loss = yolo_loss.YoloLoss(
keys=self._keys,
classes=self._classes,
anchors=self._anchors,
masks=self._masks,
path_strides=self._path_scale,
truth_thresholds=self._truth_thresh,
ignore_thresholds=self._ignore_thresh,
loss_types=self._loss_type,
iou_normalizers=self._iou_normalizer,
cls_normalizers=self._cls_normalizer,
obj_normalizers=self._obj_normalizer,
objectness_smooths=self._objectness_smooth,
box_types=self._box_type,
max_deltas=self._max_delta,
scale_xys=self._scale_xy,
use_scaled_loss=self._use_scaled_loss,
update_on_repeat=self._update_on_repeat,
label_smoothing=self._label_smoothing)
return loss
def get_config(self): def get_config(self):
return { return {
......
...@@ -39,7 +39,10 @@ class YoloDecoderTest(parameterized.TestCase, tf.test.TestCase): ...@@ -39,7 +39,10 @@ class YoloDecoderTest(parameterized.TestCase, tf.test.TestCase):
anchors = [[12.0, 19.0], [31.0, 46.0], [96.0, 54.0], [46.0, 114.0], anchors = [[12.0, 19.0], [31.0, 46.0], [96.0, 54.0], [46.0, 114.0],
[133.0, 127.0], [79.0, 225.0], [301.0, 150.0], [172.0, 286.0], [133.0, 127.0], [79.0, 225.0], [301.0, 150.0], [172.0, 286.0],
[348.0, 340.0]] [348.0, 340.0]]
layer = dg.YoloLayer(masks, anchors, classes, max_boxes=10) box_type = {key: 'scaled' for key in masks.keys()}
layer = dg.YoloLayer(
masks, anchors, classes, box_type=box_type, max_boxes=10)
inputs = {} inputs = {}
for key in input_shape: for key in input_shape:
......
...@@ -12,9 +12,7 @@ ...@@ -12,9 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# Lint as: python3
"""Contains common building blocks for yolo neural networks.""" """Contains common building blocks for yolo neural networks."""
from typing import Callable, List
import tensorflow as tf import tensorflow as tf
from official.modeling import tf_utils from official.modeling import tf_utils
from official.vision.beta.ops import spatial_transform_ops from official.vision.beta.ops import spatial_transform_ops
...@@ -48,7 +46,7 @@ class ConvBN(tf.keras.layers.Layer): ...@@ -48,7 +46,7 @@ class ConvBN(tf.keras.layers.Layer):
strides=(1, 1), strides=(1, 1),
padding='same', padding='same',
dilation_rate=(1, 1), dilation_rate=(1, 1),
kernel_initializer='glorot_uniform', kernel_initializer='VarianceScaling',
bias_initializer='zeros', bias_initializer='zeros',
bias_regularizer=None, bias_regularizer=None,
kernel_regularizer=None, kernel_regularizer=None,
...@@ -97,7 +95,14 @@ class ConvBN(tf.keras.layers.Layer): ...@@ -97,7 +95,14 @@ class ConvBN(tf.keras.layers.Layer):
self._strides = strides self._strides = strides
self._padding = padding self._padding = padding
self._dilation_rate = dilation_rate self._dilation_rate = dilation_rate
if kernel_initializer == 'VarianceScaling':
# to match pytorch initialization method
self._kernel_initializer = tf.keras.initializers.VarianceScaling(
scale=1 / 3, mode='fan_in', distribution='uniform')
else:
self._kernel_initializer = kernel_initializer self._kernel_initializer = kernel_initializer
self._bias_initializer = bias_initializer self._bias_initializer = bias_initializer
self._kernel_regularizer = kernel_regularizer self._kernel_regularizer = kernel_regularizer
...@@ -194,7 +199,7 @@ class DarkResidual(tf.keras.layers.Layer): ...@@ -194,7 +199,7 @@ class DarkResidual(tf.keras.layers.Layer):
filters=1, filters=1,
filter_scale=2, filter_scale=2,
dilation_rate=1, dilation_rate=1,
kernel_initializer='glorot_uniform', kernel_initializer='VarianceScaling',
bias_initializer='zeros', bias_initializer='zeros',
kernel_regularizer=None, kernel_regularizer=None,
bias_regularizer=None, bias_regularizer=None,
...@@ -366,7 +371,7 @@ class CSPTiny(tf.keras.layers.Layer): ...@@ -366,7 +371,7 @@ class CSPTiny(tf.keras.layers.Layer):
def __init__(self, def __init__(self,
filters=1, filters=1,
kernel_initializer='glorot_uniform', kernel_initializer='VarianceScaling',
bias_initializer='zeros', bias_initializer='zeros',
bias_regularizer=None, bias_regularizer=None,
kernel_regularizer=None, kernel_regularizer=None,
...@@ -532,7 +537,7 @@ class CSPRoute(tf.keras.layers.Layer): ...@@ -532,7 +537,7 @@ class CSPRoute(tf.keras.layers.Layer):
filters, filters,
filter_scale=2, filter_scale=2,
activation='mish', activation='mish',
kernel_initializer='glorot_uniform', kernel_initializer='VarianceScaling',
bias_initializer='zeros', bias_initializer='zeros',
bias_regularizer=None, bias_regularizer=None,
kernel_regularizer=None, kernel_regularizer=None,
...@@ -661,7 +666,7 @@ class CSPConnect(tf.keras.layers.Layer): ...@@ -661,7 +666,7 @@ class CSPConnect(tf.keras.layers.Layer):
drop_first=False, drop_first=False,
activation='mish', activation='mish',
kernel_size=(1, 1), kernel_size=(1, 1),
kernel_initializer='glorot_uniform', kernel_initializer='VarianceScaling',
bias_initializer='zeros', bias_initializer='zeros',
bias_regularizer=None, bias_regularizer=None,
kernel_regularizer=None, kernel_regularizer=None,
...@@ -761,122 +766,6 @@ class CSPConnect(tf.keras.layers.Layer): ...@@ -761,122 +766,6 @@ class CSPConnect(tf.keras.layers.Layer):
return x return x
class CSPStack(tf.keras.layers.Layer):
"""CSP Stack layer.
CSP full stack, combines the route and the connect in case you dont want to
jsut quickly wrap an existing callable or list of layers to
make it a cross stage partial. Added for ease of use. you should be able
to wrap any layer stack with a CSP independent of wether it belongs
to the Darknet family. if filter_scale = 2, then the blocks in the stack
passed into the the CSP stack should also have filters = filters/filter_scale
Cross Stage Partial networks (CSPNets) were proposed in:
[1] Chien-Yao Wang, Hong-Yuan Mark Liao, I-Hau Yeh, Yueh-Hua Wu,
Ping-Yang Chen, Jun-Wei Hsieh
CSPNet: A New Backbone that can Enhance Learning Capability of CNN.
arXiv:1911.11929
"""
def __init__(self,
filters,
model_to_wrap=None,
filter_scale=2,
activation='mish',
kernel_initializer='glorot_uniform',
bias_initializer='zeros',
bias_regularizer=None,
kernel_regularizer=None,
downsample=True,
use_bn=True,
use_sync_bn=False,
norm_momentum=0.99,
norm_epsilon=0.001,
**kwargs):
"""CSPStack layer initializer.
Args:
filters: integer for output depth, or the number of features to learn.
model_to_wrap: callable Model or a list of callable objects that will
process the output of CSPRoute, and be input into CSPConnect.
list will be called sequentially.
filter_scale: integer dictating (filters//2) or the number of filters in
the partial feature stack.
activation: string for activation function to use in layer.
kernel_initializer: string to indicate which function to use to initialize
weights.
bias_initializer: string to indicate which function to use to initialize
bias.
bias_regularizer: string to indicate which function to use to regularizer
bias.
kernel_regularizer: string to indicate which function to use to
regularizer weights.
downsample: down_sample the input.
use_bn: boolean for whether to use batch normalization.
use_sync_bn: boolean for whether sync batch normalization statistics
of all batch norm layers to the models global statistics
(across all input batches).
norm_momentum: float for moment to use for batch normalization.
norm_epsilon: float for batch normalization epsilon.
**kwargs: Keyword Arguments.
Raises:
TypeError: model_to_wrap is not a layer or a list of layers
"""
super().__init__(**kwargs)
# layer params
self._filters = filters
self._filter_scale = filter_scale
self._activation = activation
self._downsample = downsample
# convoultion params
self._kernel_initializer = kernel_initializer
self._bias_initializer = bias_initializer
self._kernel_regularizer = kernel_regularizer
self._bias_regularizer = bias_regularizer
self._use_bn = use_bn
self._use_sync_bn = use_sync_bn
self._norm_momentum = norm_momentum
self._norm_epsilon = norm_epsilon
if model_to_wrap is None:
self._model_to_wrap = []
elif isinstance(model_to_wrap, Callable):
self._model_to_wrap = [model_to_wrap]
elif isinstance(model_to_wrap, List):
self._model_to_wrap = model_to_wrap
else:
raise TypeError(
'the input to the CSPStack must be a list of layers that we can' +
'iterate through, or \n a callable')
def build(self, input_shape):
dark_conv_args = {
'filters': self._filters,
'filter_scale': self._filter_scale,
'activation': self._activation,
'kernel_initializer': self._kernel_initializer,
'bias_initializer': self._bias_initializer,
'bias_regularizer': self._bias_regularizer,
'use_bn': self._use_bn,
'use_sync_bn': self._use_sync_bn,
'norm_momentum': self._norm_momentum,
'norm_epsilon': self._norm_epsilon,
'kernel_regularizer': self._kernel_regularizer,
}
self._route = CSPRoute(downsample=self._downsample, **dark_conv_args)
self._connect = CSPConnect(**dark_conv_args)
def call(self, inputs, training=None):
x, x_route = self._route(inputs)
for layer in self._model_to_wrap:
x = layer(x)
x = self._connect([x, x_route])
return x
@tf.keras.utils.register_keras_serializable(package='yolo') @tf.keras.utils.register_keras_serializable(package='yolo')
class PathAggregationBlock(tf.keras.layers.Layer): class PathAggregationBlock(tf.keras.layers.Layer):
"""Path Aggregation block.""" """Path Aggregation block."""
...@@ -884,7 +773,7 @@ class PathAggregationBlock(tf.keras.layers.Layer): ...@@ -884,7 +773,7 @@ class PathAggregationBlock(tf.keras.layers.Layer):
def __init__(self, def __init__(self,
filters=1, filters=1,
drop_final=True, drop_final=True,
kernel_initializer='glorot_uniform', kernel_initializer='VarianceScaling',
bias_initializer='zeros', bias_initializer='zeros',
bias_regularizer=None, bias_regularizer=None,
kernel_regularizer=None, kernel_regularizer=None,
...@@ -1120,7 +1009,7 @@ class SAM(tf.keras.layers.Layer): ...@@ -1120,7 +1009,7 @@ class SAM(tf.keras.layers.Layer):
strides=(1, 1), strides=(1, 1),
padding='same', padding='same',
dilation_rate=(1, 1), dilation_rate=(1, 1),
kernel_initializer='glorot_uniform', kernel_initializer='VarianceScaling',
bias_initializer='zeros', bias_initializer='zeros',
bias_regularizer=None, bias_regularizer=None,
kernel_regularizer=None, kernel_regularizer=None,
...@@ -1192,7 +1081,7 @@ class CAM(tf.keras.layers.Layer): ...@@ -1192,7 +1081,7 @@ class CAM(tf.keras.layers.Layer):
def __init__(self, def __init__(self,
reduction_ratio=1.0, reduction_ratio=1.0,
kernel_initializer='glorot_uniform', kernel_initializer='VarianceScaling',
bias_initializer='zeros', bias_initializer='zeros',
bias_regularizer=None, bias_regularizer=None,
kernel_regularizer=None, kernel_regularizer=None,
...@@ -1285,7 +1174,7 @@ class CBAM(tf.keras.layers.Layer): ...@@ -1285,7 +1174,7 @@ class CBAM(tf.keras.layers.Layer):
strides=(1, 1), strides=(1, 1),
padding='same', padding='same',
dilation_rate=(1, 1), dilation_rate=(1, 1),
kernel_initializer='glorot_uniform', kernel_initializer='VarianceScaling',
bias_initializer='zeros', bias_initializer='zeros',
bias_regularizer=None, bias_regularizer=None,
kernel_regularizer=None, kernel_regularizer=None,
...@@ -1354,8 +1243,7 @@ class DarkRouteProcess(tf.keras.layers.Layer): ...@@ -1354,8 +1243,7 @@ class DarkRouteProcess(tf.keras.layers.Layer):
insert_spp = False)(x) insert_spp = False)(x)
""" """
def __init__( def __init__(self,
self,
filters=2, filters=2,
repetitions=2, repetitions=2,
insert_spp=False, insert_spp=False,
...@@ -1363,7 +1251,7 @@ class DarkRouteProcess(tf.keras.layers.Layer): ...@@ -1363,7 +1251,7 @@ class DarkRouteProcess(tf.keras.layers.Layer):
insert_cbam=False, insert_cbam=False,
csp_stack=0, csp_stack=0,
csp_scale=2, csp_scale=2,
kernel_initializer='glorot_uniform', kernel_initializer='VarianceScaling',
bias_initializer='zeros', bias_initializer='zeros',
bias_regularizer=None, bias_regularizer=None,
kernel_regularizer=None, kernel_regularizer=None,
......
...@@ -106,86 +106,6 @@ class CSPRouteTest(tf.test.TestCase, parameterized.TestCase): ...@@ -106,86 +106,6 @@ class CSPRouteTest(tf.test.TestCase, parameterized.TestCase):
self.assertNotIn(None, grad) self.assertNotIn(None, grad)
class CSPStackTest(tf.test.TestCase, parameterized.TestCase):
def build_layer(self, layer_type, filters, filter_scale, count, stack_type,
downsample):
if stack_type is not None:
layers = []
if layer_type == 'residual':
for _ in range(count):
layers.append(
nn_blocks.DarkResidual(
filters=filters // filter_scale, filter_scale=filter_scale))
else:
for _ in range(count):
layers.append(nn_blocks.ConvBN(filters=filters))
if stack_type == 'model':
layers = tf.keras.Sequential(layers=layers)
else:
layers = None
stack = nn_blocks.CSPStack(
filters=filters,
filter_scale=filter_scale,
downsample=downsample,
model_to_wrap=layers)
return stack
@parameterized.named_parameters(
('no_stack', 224, 224, 64, 2, 'residual', None, 0, True),
('residual_stack', 224, 224, 64, 2, 'residual', 'list', 2, True),
('conv_stack', 224, 224, 64, 2, 'conv', 'list', 3, False),
('callable_no_scale', 224, 224, 64, 1, 'residual', 'model', 5, False))
def test_pass_through(self, width, height, filters, mod, layer_type,
stack_type, count, downsample):
x = tf.keras.Input(shape=(width, height, filters))
test_layer = self.build_layer(layer_type, filters, mod, count, stack_type,
downsample)
outx = test_layer(x)
print(outx)
print(outx.shape.as_list())
if downsample:
self.assertAllEqual(outx.shape.as_list(),
[None, width // 2, height // 2, filters])
else:
self.assertAllEqual(outx.shape.as_list(), [None, width, height, filters])
@parameterized.named_parameters(
('no_stack', 224, 224, 64, 2, 'residual', None, 0, True),
('residual_stack', 224, 224, 64, 2, 'residual', 'list', 2, True),
('conv_stack', 224, 224, 64, 2, 'conv', 'list', 3, False),
('callable_no_scale', 224, 224, 64, 1, 'residual', 'model', 5, False))
def test_gradient_pass_though(self, width, height, filters, mod, layer_type,
stack_type, count, downsample):
loss = tf.keras.losses.MeanSquaredError()
optimizer = tf.keras.optimizers.SGD()
init = tf.random_normal_initializer()
x = tf.Variable(
initial_value=init(shape=(1, width, height, filters), dtype=tf.float32))
if not downsample:
y = tf.Variable(
initial_value=init(
shape=(1, width, height, filters), dtype=tf.float32))
else:
y = tf.Variable(
initial_value=init(
shape=(1, width // 2, height // 2, filters), dtype=tf.float32))
test_layer = self.build_layer(layer_type, filters, mod, count, stack_type,
downsample)
with tf.GradientTape() as tape:
x_hat = test_layer(x)
grad_loss = loss(x_hat, y)
grad = tape.gradient(grad_loss, test_layer.trainable_variables)
optimizer.apply_gradients(zip(grad, test_layer.trainable_variables))
self.assertNotIn(None, grad)
class ConvBNTest(tf.test.TestCase, parameterized.TestCase): class ConvBNTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.named_parameters( @parameterized.named_parameters(
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
import tensorflow as tf import tensorflow as tf
# Static base Yolo Models that do not require configuration # static base Yolo Models that do not require configuration
# similar to a backbone model id. # similar to a backbone model id.
# this is done greatly simplify the model config # this is done greatly simplify the model config
...@@ -85,26 +85,27 @@ class Yolo(tf.keras.Model): ...@@ -85,26 +85,27 @@ class Yolo(tf.keras.Model):
"""Detection initialization function. """Detection initialization function.
Args: Args:
backbone: `tf.keras.Model`, a backbone network. backbone: `tf.keras.Model` a backbone network.
decoder: `tf.keras.Model`, a decoder network. decoder: `tf.keras.Model` a decoder network.
head: `YoloHead`, the YOLO head. head: `RetinaNetHead`, the RetinaNet head.
detection_generator: `tf.keras.Model`, the detection generator. detection_generator: the detection generator.
**kwargs: keyword arguments to be passed. **kwargs: keyword arguments to be passed.
""" """
super().__init__(**kwargs) super(Yolo, self).__init__(**kwargs)
self._config_dict = { self._config_dict = {
"backbone": backbone, "backbone": backbone,
"decoder": decoder, "decoder": decoder,
"head": head, "head": head,
"detection_generator": detection_generator "filter": detection_generator
} }
# model components # model components
self._backbone = backbone self._backbone = backbone
self._decoder = decoder self._decoder = decoder
self._head = head self._head = head
self._detection_generator = detection_generator self._filter = detection_generator
return
def call(self, inputs, training=False): def call(self, inputs, training=False):
maps = self._backbone(inputs) maps = self._backbone(inputs)
...@@ -114,7 +115,7 @@ class Yolo(tf.keras.Model): ...@@ -114,7 +115,7 @@ class Yolo(tf.keras.Model):
return {"raw_output": raw_predictions} return {"raw_output": raw_predictions}
else: else:
# Post-processing. # Post-processing.
predictions = self._detection_generator(raw_predictions) predictions = self._filter(raw_predictions)
predictions.update({"raw_output": raw_predictions}) predictions.update({"raw_output": raw_predictions})
return predictions return predictions
...@@ -131,8 +132,8 @@ class Yolo(tf.keras.Model): ...@@ -131,8 +132,8 @@ class Yolo(tf.keras.Model):
return self._head return self._head
@property @property
def detection_generator(self): def filter(self):
return self._detection_generator return self._filter
def get_config(self): def get_config(self):
return self._config_dict return self._config_dict
...@@ -140,3 +141,29 @@ class Yolo(tf.keras.Model): ...@@ -140,3 +141,29 @@ class Yolo(tf.keras.Model):
@classmethod @classmethod
def from_config(cls, config): def from_config(cls, config):
return cls(**config) return cls(**config)
def get_weight_groups(self, train_vars):
"""Sort the list of trainable variables into groups for optimization.
Args:
train_vars: a list of tf.Variables that need to get sorted into their
respective groups.
Returns:
weights: a list of tf.Variables for the weights.
bias: a list of tf.Variables for the bias.
other: a list of tf.Variables for the other operations.
"""
bias = []
weights = []
other = []
for var in train_vars:
if "bias" in var.name:
bias.append(var)
elif "beta" in var.name:
bias.append(var)
elif "kernel" in var.name or "weight" in var.name:
weights.append(var)
else:
other.append(var)
return weights, bias, other
...@@ -38,51 +38,26 @@ def yxyx_to_xcycwh(box: tf.Tensor): ...@@ -38,51 +38,26 @@ def yxyx_to_xcycwh(box: tf.Tensor):
return box return box
@tf.custom_gradient def xcycwh_to_yxyx(box: tf.Tensor):
def _xcycwh_to_yxyx(box: tf.Tensor, scale):
"""Private function to allow custom gradients with defaults."""
with tf.name_scope('xcycwh_to_yxyx'):
xy, wh = tf.split(box, 2, axis=-1)
xy_min = xy - wh / 2
xy_max = xy + wh / 2
x_min, y_min = tf.split(xy_min, 2, axis=-1)
x_max, y_max = tf.split(xy_max, 2, axis=-1)
box = tf.concat([y_min, x_min, y_max, x_max], axis=-1)
def delta(dbox):
# y_min = top, x_min = left, y_max = bottom, x_max = right
dt, dl, db, dr = tf.split(dbox, 4, axis=-1)
dx = dl + dr
dy = dt + db
dw = (dr - dl) / scale
dh = (db - dt) / scale
dbox = tf.concat([dx, dy, dw, dh], axis=-1)
return dbox, 0.0
return box, delta
def xcycwh_to_yxyx(box: tf.Tensor, darknet=False):
"""Converts boxes from x_center, y_center, width, height to yxyx format. """Converts boxes from x_center, y_center, width, height to yxyx format.
Args: Args:
box: any `Tensor` whose last dimension is 4 representing the coordinates of box: any `Tensor` whose last dimension is 4 representing the coordinates of
boxes in x_center, y_center, width, height. boxes in x_center, y_center, width, height.
darknet: `bool`, if True a scale of 1.0 is used.
Returns: Returns:
box: a `Tensor` whose shape is the same as `box` in new format. box: a `Tensor` whose shape is the same as `box` in new format.
""" """
if darknet: with tf.name_scope('xcycwh_to_yxyx'):
scale = 1.0 xy, wh = tf.split(box, 2, axis=-1)
else: xy_min = xy - wh / 2
scale = 2.0 xy_max = xy + wh / 2
box = _xcycwh_to_yxyx(box, scale) x_min, y_min = tf.split(xy_min, 2, axis=-1)
x_max, y_max = tf.split(xy_max, 2, axis=-1)
box = tf.concat([y_min, x_min, y_max, x_max], axis=-1)
return box return box
# IOU
def intersect_and_union(box1, box2, yxyx=False): def intersect_and_union(box1, box2, yxyx=False):
"""Calculates the intersection and union between box1 and box2. """Calculates the intersection and union between box1 and box2.
...@@ -98,8 +73,9 @@ def intersect_and_union(box1, box2, yxyx=False): ...@@ -98,8 +73,9 @@ def intersect_and_union(box1, box2, yxyx=False):
intersection: a `Tensor` who represents the intersection. intersection: a `Tensor` who represents the intersection.
union: a `Tensor` who represents the union. union: a `Tensor` who represents the union.
""" """
if not yxyx: if not yxyx:
box1_area = tf.reduce_prod(tf.split(box1, 2, axis=-1)[-1], axis=-1)
box2_area = tf.reduce_prod(tf.split(box2, 2, axis=-1)[-1], axis=-1)
box1 = xcycwh_to_yxyx(box1) box1 = xcycwh_to_yxyx(box1)
box2 = xcycwh_to_yxyx(box2) box2 = xcycwh_to_yxyx(box2)
...@@ -110,13 +86,14 @@ def intersect_and_union(box1, box2, yxyx=False): ...@@ -110,13 +86,14 @@ def intersect_and_union(box1, box2, yxyx=False):
intersect_wh = tf.math.maximum(intersect_maxes - intersect_mins, 0.0) intersect_wh = tf.math.maximum(intersect_maxes - intersect_mins, 0.0)
intersection = tf.reduce_prod(intersect_wh, axis=-1) intersection = tf.reduce_prod(intersect_wh, axis=-1)
if yxyx:
box1_area = tf.reduce_prod(b1ma - b1mi, axis=-1) box1_area = tf.reduce_prod(b1ma - b1mi, axis=-1)
box2_area = tf.reduce_prod(b2ma - b2mi, axis=-1) box2_area = tf.reduce_prod(b2ma - b2mi, axis=-1)
union = box1_area + box2_area - intersection union = box1_area + box2_area - intersection
return intersection, union return intersection, union
def smallest_encompassing_box(box1, box2, yxyx=False): def smallest_encompassing_box(box1, box2, yxyx=False, clip=False):
"""Calculates the smallest box that encompasses box1 and box2. """Calculates the smallest box that encompasses box1 and box2.
Args: Args:
...@@ -126,6 +103,7 @@ def smallest_encompassing_box(box1, box2, yxyx=False): ...@@ -126,6 +103,7 @@ def smallest_encompassing_box(box1, box2, yxyx=False):
boxes. boxes.
yxyx: a `bool` indicating whether the input box is of the format x_center yxyx: a `bool` indicating whether the input box is of the format x_center
y_center, width, height or y_min, x_min, y_max, x_max. y_center, width, height or y_min, x_min, y_max, x_max.
clip: a `bool`, whether or not to clip boxes.
Returns: Returns:
box_c: a `Tensor` whose last dimension is 4 representing the coordinates of box_c: a `Tensor` whose last dimension is 4 representing the coordinates of
...@@ -141,15 +119,15 @@ def smallest_encompassing_box(box1, box2, yxyx=False): ...@@ -141,15 +119,15 @@ def smallest_encompassing_box(box1, box2, yxyx=False):
bcmi = tf.math.minimum(b1mi, b2mi) bcmi = tf.math.minimum(b1mi, b2mi)
bcma = tf.math.maximum(b1ma, b2ma) bcma = tf.math.maximum(b1ma, b2ma)
bca = tf.reduce_prod(bcma - bcmi, keepdims=True, axis=-1)
box_c = tf.concat([bcmi, bcma], axis=-1) box_c = tf.concat([bcmi, bcma], axis=-1)
if not yxyx: if not yxyx:
box_c = yxyx_to_xcycwh(box_c) box_c = yxyx_to_xcycwh(box_c)
box_c = tf.where(bca == 0.0, tf.zeros_like(box_c), box_c) if clip:
return box_c bca = tf.reduce_prod(bcma - bcmi, keepdims=True, axis=-1)
box_c = tf.where(bca <= 0.0, tf.zeros_like(box_c), box_c)
return bcmi, bcma, box_c
def compute_iou(box1, box2, yxyx=False): def compute_iou(box1, box2, yxyx=False):
...@@ -166,15 +144,13 @@ def compute_iou(box1, box2, yxyx=False): ...@@ -166,15 +144,13 @@ def compute_iou(box1, box2, yxyx=False):
Returns: Returns:
iou: a `Tensor` who represents the intersection over union. iou: a `Tensor` who represents the intersection over union.
""" """
# get box corners
with tf.name_scope('iou'): with tf.name_scope('iou'):
intersection, union = intersect_and_union(box1, box2, yxyx=yxyx) intersection, union = intersect_and_union(box1, box2, yxyx=yxyx)
iou = math_ops.divide_no_nan(intersection, union) iou = math_ops.divide_no_nan(intersection, union)
iou = math_ops.rm_nan_inf(iou, val=0.0)
return iou return iou
def compute_giou(box1, box2, yxyx=False, darknet=False): def compute_giou(box1, box2, yxyx=False):
"""Calculates the General intersection over union between box1 and box2. """Calculates the General intersection over union between box1 and box2.
Args: Args:
...@@ -184,38 +160,30 @@ def compute_giou(box1, box2, yxyx=False, darknet=False): ...@@ -184,38 +160,30 @@ def compute_giou(box1, box2, yxyx=False, darknet=False):
boxes. boxes.
yxyx: a `bool` indicating whether the input box is of the format x_center yxyx: a `bool` indicating whether the input box is of the format x_center
y_center, width, height or y_min, x_min, y_max, x_max. y_center, width, height or y_min, x_min, y_max, x_max.
darknet: a `bool` indicating whether the calling function is the YOLO
darknet loss.
Returns: Returns:
giou: a `Tensor` who represents the General intersection over union. giou: a `Tensor` who represents the General intersection over union.
""" """
with tf.name_scope('giou'): with tf.name_scope('giou'):
# get IOU
if not yxyx: if not yxyx:
box1 = xcycwh_to_yxyx(box1, darknet=darknet) yxyx1 = xcycwh_to_yxyx(box1)
box2 = xcycwh_to_yxyx(box2, darknet=darknet) yxyx2 = xcycwh_to_yxyx(box2)
yxyx = True else:
yxyx1, yxyx2 = box1, box2
intersection, union = intersect_and_union(box1, box2, yxyx=yxyx) cmi, cma, _ = smallest_encompassing_box(yxyx1, yxyx2, yxyx=True)
intersection, union = intersect_and_union(yxyx1, yxyx2, yxyx=True)
iou = math_ops.divide_no_nan(intersection, union) iou = math_ops.divide_no_nan(intersection, union)
iou = math_ops.rm_nan_inf(iou, val=0.0)
# find the smallest box to encompase both box1 and box2 bcwh = cma - cmi
boxc = smallest_encompassing_box(box1, box2, yxyx=yxyx) c = tf.math.reduce_prod(bcwh, axis=-1)
if yxyx:
boxc = yxyx_to_xcycwh(boxc)
_, cwch = tf.split(boxc, 2, axis=-1)
c = tf.math.reduce_prod(cwch, axis=-1)
# compute giou
regularization = math_ops.divide_no_nan((c - union), c) regularization = math_ops.divide_no_nan((c - union), c)
giou = iou - regularization giou = iou - regularization
giou = tf.clip_by_value(giou, clip_value_min=-1.0, clip_value_max=1.0)
return iou, giou return iou, giou
def compute_diou(box1, box2, beta=1.0, yxyx=False, darknet=False): def compute_diou(box1, box2, beta=1.0, yxyx=False):
"""Calculates the distance intersection over union between box1 and box2. """Calculates the distance intersection over union between box1 and box2.
Args: Args:
...@@ -227,8 +195,6 @@ def compute_diou(box1, box2, beta=1.0, yxyx=False, darknet=False): ...@@ -227,8 +195,6 @@ def compute_diou(box1, box2, beta=1.0, yxyx=False, darknet=False):
regularization term. regularization term.
yxyx: a `bool` indicating whether the input box is of the format x_center yxyx: a `bool` indicating whether the input box is of the format x_center
y_center, width, height or y_min, x_min, y_max, x_max. y_center, width, height or y_min, x_min, y_max, x_max.
darknet: a `bool` indicating whether the calling function is the YOLO
darknet loss.
Returns: Returns:
diou: a `Tensor` who represents the distance intersection over union. diou: a `Tensor` who represents the distance intersection over union.
...@@ -236,30 +202,27 @@ def compute_diou(box1, box2, beta=1.0, yxyx=False, darknet=False): ...@@ -236,30 +202,27 @@ def compute_diou(box1, box2, beta=1.0, yxyx=False, darknet=False):
with tf.name_scope('diou'): with tf.name_scope('diou'):
# compute center distance # compute center distance
if not yxyx: if not yxyx:
box1 = xcycwh_to_yxyx(box1, darknet=darknet) xycc1, xycc2 = box1, box2
box2 = xcycwh_to_yxyx(box2, darknet=darknet) yxyx1 = xcycwh_to_yxyx(box1)
yxyx = True yxyx2 = xcycwh_to_yxyx(box2)
else:
intersection, union = intersect_and_union(box1, box2, yxyx=yxyx) yxyx1, yxyx2 = box1, box2
boxc = smallest_encompassing_box(box1, box2, yxyx=yxyx) xycc1 = yxyx_to_xcycwh(box1)
xycc2 = yxyx_to_xcycwh(box2)
cmi, cma, _ = smallest_encompassing_box(yxyx1, yxyx2, yxyx=True)
intersection, union = intersect_and_union(yxyx1, yxyx2, yxyx=True)
iou = math_ops.divide_no_nan(intersection, union) iou = math_ops.divide_no_nan(intersection, union)
iou = math_ops.rm_nan_inf(iou, val=0.0)
if yxyx:
boxc = yxyx_to_xcycwh(boxc)
box1 = yxyx_to_xcycwh(box1)
box2 = yxyx_to_xcycwh(box2)
b1xy, _ = tf.split(box1, 2, axis=-1) b1xy, _ = tf.split(xycc1, 2, axis=-1)
b2xy, _ = tf.split(box2, 2, axis=-1) b2xy, _ = tf.split(xycc2, 2, axis=-1)
_, bcwh = tf.split(boxc, 2, axis=-1) bcwh = cma - cmi
center_dist = tf.reduce_sum((b1xy - b2xy)**2, axis=-1) center_dist = tf.reduce_sum((b1xy - b2xy)**2, axis=-1)
c_diag = tf.reduce_sum(bcwh**2, axis=-1) c_diag = tf.reduce_sum(bcwh**2, axis=-1)
regularization = math_ops.divide_no_nan(center_dist, c_diag) regularization = math_ops.divide_no_nan(center_dist, c_diag)
diou = iou - regularization**beta diou = iou - regularization**beta
diou = tf.clip_by_value(diou, clip_value_min=-1.0, clip_value_max=1.0)
return iou, diou return iou, diou
...@@ -280,33 +243,48 @@ def compute_ciou(box1, box2, yxyx=False, darknet=False): ...@@ -280,33 +243,48 @@ def compute_ciou(box1, box2, yxyx=False, darknet=False):
ciou: a `Tensor` who represents the complete intersection over union. ciou: a `Tensor` who represents the complete intersection over union.
""" """
with tf.name_scope('ciou'): with tf.name_scope('ciou'):
# compute DIOU and IOU if not yxyx:
iou, diou = compute_diou(box1, box2, yxyx=yxyx, darknet=darknet) xycc1, xycc2 = box1, box2
yxyx1 = xcycwh_to_yxyx(box1)
yxyx2 = xcycwh_to_yxyx(box2)
else:
yxyx1, yxyx2 = box1, box2
xycc1 = yxyx_to_xcycwh(box1)
xycc2 = yxyx_to_xcycwh(box2)
if yxyx: # Build the smallest encomapssing box.
box1 = yxyx_to_xcycwh(box1) cmi, cma, _ = smallest_encompassing_box(yxyx1, yxyx2, yxyx=True)
box2 = yxyx_to_xcycwh(box2) intersection, union = intersect_and_union(yxyx1, yxyx2, yxyx=True)
iou = math_ops.divide_no_nan(intersection, union)
_, _, b1w, b1h = tf.split(box1, 4, axis=-1)
_, _, b2w, b2h = tf.split(box1, 4, axis=-1) b1xy, b1w, b1h = tf.split(xycc1, [2, 1, 1], axis=-1)
b2xy, b2w, b2h = tf.split(xycc2, [2, 1, 1], axis=-1)
# computer aspect ratio consistency bchw = cma - cmi
terma = tf.cast(math_ops.divide_no_nan(b1w, b1h), tf.float32)
termb = tf.cast(math_ops.divide_no_nan(b2w, b2h), tf.float32) # Center regularization
arcterm = tf.square(tf.math.atan(terma) - tf.math.atan(termb)) center_dist = tf.reduce_sum((b1xy - b2xy)**2, axis=-1)
v = tf.squeeze(4 * arcterm / (math.pi**2), axis=-1) c_diag = tf.reduce_sum(bchw**2, axis=-1)
v = tf.cast(v, b1w.dtype) regularization = math_ops.divide_no_nan(center_dist, c_diag)
a = tf.stop_gradient(math_ops.divide_no_nan(v, ((1 - iou) + v))) # Computer aspect ratio consistency
ciou = diou - (v * a) terma = math_ops.divide_no_nan(b1w, b1h) # gt
ciou = tf.clip_by_value(ciou, clip_value_min=-1.0, clip_value_max=1.0) termb = math_ops.divide_no_nan(b2w, b2h) # pred
arcterm = tf.squeeze(
tf.math.pow(tf.math.atan(termb) - tf.math.atan(terma), 2), axis=-1)
v = (4 / math.pi**2) * arcterm
# Compute the aspect ratio weight, should be treated as a constant
a = tf.stop_gradient(math_ops.divide_no_nan(v, 1 - iou + v))
if darknet:
grad_scale = tf.stop_gradient(tf.square(b2w) + tf.square(b2h))
v *= tf.squeeze(grad_scale, axis=-1)
ciou = iou - regularization - (v * a)
return iou, ciou return iou, ciou
def aggregated_comparitive_iou(boxes1, def aggregated_comparitive_iou(boxes1, boxes2=None, iou_type=0, beta=0.6):
boxes2=None,
iou_type=0,
beta=0.6):
"""Calculates the IOU between two set of boxes. """Calculates the IOU between two set of boxes.
Similar to bbox_overlap but far more versitile. Similar to bbox_overlap but far more versitile.
...@@ -333,11 +311,11 @@ def aggregated_comparitive_iou(boxes1, ...@@ -333,11 +311,11 @@ def aggregated_comparitive_iou(boxes1,
else: else:
boxes2 = tf.transpose(boxes1, perm=(0, 2, 1, 3)) boxes2 = tf.transpose(boxes1, perm=(0, 2, 1, 3))
if iou_type == 0: # diou if iou_type == 0 or iou_type == 'diou': # diou
_, iou = compute_diou(boxes1, boxes2, beta=beta, yxyx=True) _, iou = compute_diou(boxes1, boxes2, beta=beta, yxyx=True)
elif iou_type == 1: # giou elif iou_type == 1 or iou_type == 'giou': # giou
_, iou = compute_giou(boxes1, boxes2, yxyx=True) _, iou = compute_giou(boxes1, boxes2, yxyx=True)
elif iou_type == 2: # ciou elif iou_type == 2 or iou_type == 'ciou': # ciou
_, iou = compute_ciou(boxes1, boxes2, yxyx=True) _, iou = compute_ciou(boxes1, boxes2, yxyx=True)
else: else:
iou = compute_iou(boxes1, boxes2, yxyx=True) iou = compute_iou(boxes1, boxes2, yxyx=True)
......
This diff is collapsed.
...@@ -58,25 +58,4 @@ def divide_no_nan(a, b): ...@@ -58,25 +58,4 @@ def divide_no_nan(a, b):
Returns: Returns:
a `Tensor` representing a divided by b, with all nan values removed. a `Tensor` representing a divided by b, with all nan values removed.
""" """
zero = tf.cast(0.0, b.dtype) return a / (b + 1e-9)
return tf.where(b == zero, zero, a / b)
def mul_no_nan(x, y):
"""Nan safe multiply operation.
Built to allow model compilation in tflite and
to allow one tensor to mask another. Where ever x is zero the
multiplication is not computed and the value is replaced with a zero. This is
required because 0 * nan = nan. This can make computation unstable in some
cases where the intended behavior is for zero to mean ignore.
Args:
x: any `Tensor` of any type.
y: any `Tensor` of any type with the same shape as tensor x.
Returns:
a `Tensor` representing x times y, where x is used to safely mask the
tensor y.
"""
return tf.where(x == 0, tf.cast(0, x.dtype), x * y)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment