Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
c6d7d57d
Commit
c6d7d57d
authored
Sep 13, 2021
by
A. Unique TensorFlower
Browse files
Merge pull request #10251 from PurdueDualityLab:loss_fn_pr
PiperOrigin-RevId: 396512110
parents
31fb7a65
7f90664e
Changes
14
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
1790 additions
and
457 deletions
+1790
-457
official/vision/beta/projects/yolo/losses/__init__.py
official/vision/beta/projects/yolo/losses/__init__.py
+14
-0
official/vision/beta/projects/yolo/losses/yolo_loss.py
official/vision/beta/projects/yolo/losses/yolo_loss.py
+714
-0
official/vision/beta/projects/yolo/losses/yolo_loss_test.py
official/vision/beta/projects/yolo/losses/yolo_loss_test.py
+98
-0
official/vision/beta/projects/yolo/modeling/backbones/darknet.py
...l/vision/beta/projects/yolo/modeling/backbones/darknet.py
+12
-10
official/vision/beta/projects/yolo/modeling/decoders/yolo_decoder.py
...sion/beta/projects/yolo/modeling/decoders/yolo_decoder.py
+6
-7
official/vision/beta/projects/yolo/modeling/heads/yolo_head.py
...ial/vision/beta/projects/yolo/modeling/heads/yolo_head.py
+28
-3
official/vision/beta/projects/yolo/modeling/layers/detection_generator.py
...beta/projects/yolo/modeling/layers/detection_generator.py
+114
-68
official/vision/beta/projects/yolo/modeling/layers/detection_generator_test.py
...projects/yolo/modeling/layers/detection_generator_test.py
+4
-1
official/vision/beta/projects/yolo/modeling/layers/nn_blocks.py
...al/vision/beta/projects/yolo/modeling/layers/nn_blocks.py
+37
-149
official/vision/beta/projects/yolo/modeling/layers/nn_blocks_test.py
...sion/beta/projects/yolo/modeling/layers/nn_blocks_test.py
+0
-80
official/vision/beta/projects/yolo/modeling/yolo_model.py
official/vision/beta/projects/yolo/modeling/yolo_model.py
+38
-11
official/vision/beta/projects/yolo/ops/box_ops.py
official/vision/beta/projects/yolo/ops/box_ops.py
+84
-106
official/vision/beta/projects/yolo/ops/loss_utils.py
official/vision/beta/projects/yolo/ops/loss_utils.py
+640
-0
official/vision/beta/projects/yolo/ops/math_ops.py
official/vision/beta/projects/yolo/ops/math_ops.py
+1
-22
No files found.
official/vision/beta/projects/yolo/losses/__init__.py
0 → 100644
View file @
c6d7d57d
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
official/vision/beta/projects/yolo/losses/yolo_loss.py
0 → 100755
View file @
c6d7d57d
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Yolo Loss function."""
import
abc
import
collections
import
functools
import
tensorflow
as
tf
from
official.vision.beta.projects.yolo.ops
import
box_ops
from
official.vision.beta.projects.yolo.ops
import
loss_utils
from
official.vision.beta.projects.yolo.ops
import
math_ops
class
YoloLossBase
(
object
,
metaclass
=
abc
.
ABCMeta
):
"""Parameters for the YOLO loss functions used at each detection generator.
This base class implements the base functionality required to implement a Yolo
Loss function.
"""
def
__init__
(
self
,
classes
,
mask
,
anchors
,
path_stride
=
1
,
ignore_thresh
=
0.7
,
truth_thresh
=
1.0
,
loss_type
=
'ciou'
,
iou_normalizer
=
1.0
,
cls_normalizer
=
1.0
,
obj_normalizer
=
1.0
,
label_smoothing
=
0.0
,
objectness_smooth
=
True
,
update_on_repeat
=
False
,
box_type
=
'original'
,
scale_x_y
=
1.0
,
max_delta
=
10
):
"""Loss Function Initialization.
Args:
classes: `int` for the number of classes
mask: `List[int]` for the output level that this specific model output
level
anchors: `List[List[int]]` for the anchor boxes that are used in the model
at all levels. For anchor free prediction set the anchor list to be the
same as the image resolution.
path_stride: `int` for how much to scale this level to get the orginal
input shape.
ignore_thresh: `float` for the IOU value over which the loss is not
propagated, and a detection is assumed to have been made.
truth_thresh: `float` for the IOU value over which the loss is propagated
despite a detection being made.
loss_type: `str` for the typeof iou loss to use with in {ciou, diou, giou,
iou}.
iou_normalizer: `float` for how much to scale the loss on the IOU or the
boxes.
cls_normalizer: `float` for how much to scale the loss on the classes.
obj_normalizer: `float` for how much to scale loss on the detection map.
label_smoothing: `float` for how much to smooth the loss on the classes.
objectness_smooth: `float` for how much to smooth the loss on the
detection map.
update_on_repeat: `bool` for whether to replace with the newest or the
best value when an index is consumed by multiple objects.
box_type: `bool` for which scaling type to use.
scale_x_y: dictionary `float` values inidcating how far each pixel can see
outside of its containment of 1.0. a value of 1.2 indicates there is a
20% extended radius around each pixel that this specific pixel can
predict values for a center at. the center can range from 0 - value/2 to
1 + value/2, this value is set in the yolo filter, and resused here.
there should be one value for scale_xy for each level from min_level to
max_level.
max_delta: gradient clipping to apply to the box loss.
"""
self
.
_loss_type
=
loss_type
self
.
_classes
=
tf
.
constant
(
tf
.
cast
(
classes
,
dtype
=
tf
.
int32
))
self
.
_num
=
tf
.
cast
(
len
(
mask
),
dtype
=
tf
.
int32
)
self
.
_truth_thresh
=
truth_thresh
self
.
_ignore_thresh
=
ignore_thresh
self
.
_masks
=
mask
self
.
_anchors
=
anchors
self
.
_iou_normalizer
=
iou_normalizer
self
.
_cls_normalizer
=
cls_normalizer
self
.
_obj_normalizer
=
obj_normalizer
self
.
_scale_x_y
=
scale_x_y
self
.
_max_delta
=
max_delta
self
.
_label_smoothing
=
tf
.
cast
(
label_smoothing
,
tf
.
float32
)
self
.
_objectness_smooth
=
float
(
objectness_smooth
)
self
.
_update_on_repeat
=
update_on_repeat
self
.
_box_type
=
box_type
self
.
_path_stride
=
path_stride
box_kwargs
=
dict
(
stride
=
self
.
_path_stride
,
scale_xy
=
self
.
_scale_x_y
,
box_type
=
self
.
_box_type
,
max_delta
=
self
.
_max_delta
)
self
.
_decode_boxes
=
functools
.
partial
(
loss_utils
.
get_predicted_box
,
**
box_kwargs
)
self
.
_search_pairs
=
lambda
pred_boxes
,
pred_classes
,
boxes
,
classes
,
scale
,
yxyx
:
(
None
,
None
,
None
,
None
)
# pylint:disable=line-too-long
self
.
_build_per_path_attributes
()
self
.
_build_per_path_attributes
()
def
box_loss
(
self
,
true_box
,
pred_box
,
darknet
=
False
):
"""Call iou function and use it to compute the loss for the box maps."""
if
self
.
_loss_type
==
'giou'
:
iou
,
liou
=
box_ops
.
compute_giou
(
true_box
,
pred_box
)
elif
self
.
_loss_type
==
'ciou'
:
iou
,
liou
=
box_ops
.
compute_ciou
(
true_box
,
pred_box
,
darknet
=
darknet
)
else
:
liou
=
iou
=
box_ops
.
compute_iou
(
true_box
,
pred_box
)
loss_box
=
1
-
liou
return
iou
,
liou
,
loss_box
def
_tiled_global_box_search
(
self
,
pred_boxes
,
pred_classes
,
boxes
,
classes
,
true_conf
,
smoothed
,
scale
=
None
):
"""Search of all groundtruths to associate groundtruths to predictions."""
# Search all predictions against ground truths to find mathcing boxes for
# each pixel.
_
,
_
,
iou_max
,
_
=
self
.
_search_pairs
(
pred_boxes
,
pred_classes
,
boxes
,
classes
,
scale
=
scale
,
yxyx
=
True
)
if
iou_max
is
None
:
return
true_conf
,
tf
.
ones_like
(
true_conf
)
# Find the exact indexes to ignore and keep.
ignore_mask
=
tf
.
cast
(
iou_max
<
self
.
_ignore_thresh
,
pred_boxes
.
dtype
)
iou_mask
=
iou_max
>
self
.
_ignore_thresh
if
not
smoothed
:
# Ignore all pixels where a box was not supposed to be predicted but a
# high confidence box was predicted.
obj_mask
=
true_conf
+
(
1
-
true_conf
)
*
ignore_mask
else
:
# Replace pixels in the tre confidence map with the max iou predicted
# with in that cell.
obj_mask
=
tf
.
ones_like
(
true_conf
)
iou_
=
(
1
-
self
.
_objectness_smooth
)
+
self
.
_objectness_smooth
*
iou_max
iou_
=
tf
.
where
(
iou_max
>
0
,
iou_
,
tf
.
zeros_like
(
iou_
))
true_conf
=
tf
.
where
(
iou_mask
,
iou_
,
true_conf
)
# Stop gradient so while loop is not tracked.
obj_mask
=
tf
.
stop_gradient
(
obj_mask
)
true_conf
=
tf
.
stop_gradient
(
true_conf
)
return
true_conf
,
obj_mask
def
__call__
(
self
,
true_counts
,
inds
,
y_true
,
boxes
,
classes
,
y_pred
):
"""Call function to compute the loss and a set of metrics per FPN level.
Args:
true_counts: `Tensor` of shape [batchsize, height, width, num_anchors]
represeneting how many boxes are in a given pixel [j, i] in the output
map.
inds: `Tensor` of shape [batchsize, None, 3] indicating the location [j,
i] that a given box is associatied with in the FPN prediction map.
y_true: `Tensor` of shape [batchsize, None, 8] indicating the actual box
associated with each index in the inds tensor list.
boxes: `Tensor` of shape [batchsize, None, 4] indicating the original
ground truth boxes for each image as they came from the decoder used for
bounding box search.
classes: `Tensor` of shape [batchsize, None, 1] indicating the original
ground truth classes for each image as they came from the decoder used
for bounding box search.
y_pred: `Tensor` of shape [batchsize, height, width, output_depth] holding
the models output at a specific FPN level.
Returns:
loss: `float` for the actual loss.
box_loss: `float` loss on the boxes used for metrics.
conf_loss: `float` loss on the confidence used for metrics.
class_loss: `float` loss on the classes used for metrics.
avg_iou: `float` metric for the average iou between predictions and ground
truth.
avg_obj: `float` metric for the average confidence of the model for
predictions.
"""
(
loss
,
box_loss
,
conf_loss
,
class_loss
,
mean_loss
,
iou
,
pred_conf
,
ind_mask
,
grid_mask
)
=
self
.
_compute_loss
(
true_counts
,
inds
,
y_true
,
boxes
,
classes
,
y_pred
)
# Temporary metrics
box_loss
=
tf
.
stop_gradient
(
0.05
*
box_loss
/
self
.
_iou_normalizer
)
# Metric compute using done here to save time and resources.
sigmoid_conf
=
tf
.
stop_gradient
(
tf
.
sigmoid
(
pred_conf
))
iou
=
tf
.
stop_gradient
(
iou
)
avg_iou
=
loss_utils
.
average_iou
(
loss_utils
.
apply_mask
(
tf
.
squeeze
(
ind_mask
,
axis
=-
1
),
iou
))
avg_obj
=
loss_utils
.
average_iou
(
tf
.
squeeze
(
sigmoid_conf
,
axis
=-
1
)
*
grid_mask
)
return
(
loss
,
box_loss
,
conf_loss
,
class_loss
,
mean_loss
,
tf
.
stop_gradient
(
avg_iou
),
tf
.
stop_gradient
(
avg_obj
))
@
abc
.
abstractmethod
def
_build_per_path_attributes
(
self
):
"""Additional initialization required for each YOLO loss version."""
...
@
abc
.
abstractmethod
def
_compute_loss
(
self
,
true_counts
,
inds
,
y_true
,
boxes
,
classes
,
y_pred
):
"""The actual logic to apply to the raw model for optimization."""
...
def
post_path_aggregation
(
self
,
loss
,
ground_truths
,
predictions
):
# pylint:disable=unused-argument
"""This method allows for post processing of a loss value.
After the loss has been aggregated across all the FPN levels some post
proceessing may need to occur to poroperly scale the loss. The default
behavior is to pass the loss through with no alterations.
Args:
loss: `tf.float` scalar for the actual loss.
ground_truths: `Dict` holding all the ground truth tensors.
predictions: `Dict` holding all the predicted values.
Returns:
loss: `tf.float` scalar for the scaled loss.
"""
return
loss
@
abc
.
abstractmethod
def
cross_replica_aggregation
(
self
,
loss
,
num_replicas_in_sync
):
"""This controls how the loss should be aggregated across replicas."""
...
@
tf
.
custom_gradient
def
grad_sigmoid
(
values
):
"""This function scales the gradient as if a signmoid was applied.
This is used in the Darknet Loss when the choosen box type is the scaled
coordinate type. This function is used to match the propagated gradient to
match that of the Darkent Yolov4 model. This is an Identity operation that
allows us to add some extra steps to the back propagation.
Args:
values: A tensor of any shape.
Returns:
values: The unaltered input tensor.
delta: A custom gradient function that adds the sigmoid step to the
backpropagation.
"""
def
delta
(
dy
):
t
=
tf
.
math
.
sigmoid
(
values
)
return
dy
*
t
*
(
1
-
t
)
return
values
,
delta
class
DarknetLoss
(
YoloLossBase
):
"""This class implements the full logic for the standard Yolo models."""
def
_build_per_path_attributes
(
self
):
"""Paramterization of pair wise search and grid generators.
Objects created here are used for box decoding and dynamic ground truth
association.
"""
self
.
_anchor_generator
=
loss_utils
.
GridGenerator
(
masks
=
self
.
_masks
,
anchors
=
self
.
_anchors
,
scale_anchors
=
self
.
_path_stride
)
if
self
.
_ignore_thresh
>
0.0
:
self
.
_search_pairs
=
loss_utils
.
PairWiseSearch
(
iou_type
=
'iou'
,
any_match
=
True
,
min_conf
=
0.25
)
return
def
_compute_loss
(
self
,
true_counts
,
inds
,
y_true
,
boxes
,
classes
,
y_pred
):
"""Per FPN path loss logic used for Yolov3, Yolov4, and Yolo-Tiny."""
if
self
.
_box_type
==
'scaled'
:
# Darknet Model Propagates a sigmoid once in back prop so we replicate
# that behaviour
y_pred
=
grad_sigmoid
(
y_pred
)
# Generate and store constants and format output.
shape
=
tf
.
shape
(
true_counts
)
batch_size
,
width
,
height
,
num
=
shape
[
0
],
shape
[
1
],
shape
[
2
],
shape
[
3
]
fwidth
=
tf
.
cast
(
width
,
tf
.
float32
)
fheight
=
tf
.
cast
(
height
,
tf
.
float32
)
grid_points
,
anchor_grid
=
self
.
_anchor_generator
(
width
,
height
,
batch_size
,
dtype
=
tf
.
float32
)
# Cast all input compontnts to float32 and stop gradient to save memory.
boxes
=
tf
.
stop_gradient
(
tf
.
cast
(
boxes
,
tf
.
float32
))
classes
=
tf
.
stop_gradient
(
tf
.
cast
(
classes
,
tf
.
float32
))
y_true
=
tf
.
stop_gradient
(
tf
.
cast
(
y_true
,
tf
.
float32
))
true_counts
=
tf
.
stop_gradient
(
tf
.
cast
(
true_counts
,
tf
.
float32
))
true_conf
=
tf
.
stop_gradient
(
tf
.
clip_by_value
(
true_counts
,
0.0
,
1.0
))
grid_points
=
tf
.
stop_gradient
(
grid_points
)
anchor_grid
=
tf
.
stop_gradient
(
anchor_grid
)
# Split all the ground truths to use as seperate items in loss computation.
(
true_box
,
ind_mask
,
true_class
,
_
,
_
)
=
tf
.
split
(
y_true
,
[
4
,
1
,
1
,
1
,
1
],
axis
=-
1
)
true_conf
=
tf
.
squeeze
(
true_conf
,
axis
=-
1
)
true_class
=
tf
.
squeeze
(
true_class
,
axis
=-
1
)
grid_mask
=
true_conf
# Splits all predictions.
y_pred
=
tf
.
cast
(
tf
.
reshape
(
y_pred
,
[
batch_size
,
width
,
height
,
num
,
-
1
]),
tf
.
float32
)
pred_box
,
pred_conf
,
pred_class
=
tf
.
split
(
y_pred
,
[
4
,
1
,
-
1
],
axis
=-
1
)
# Decode the boxes to be used for loss compute.
_
,
_
,
pred_box
=
self
.
_decode_boxes
(
fwidth
,
fheight
,
pred_box
,
anchor_grid
,
grid_points
,
darknet
=
True
)
# If the ignore threshold is enabled, search all boxes ignore all
# IOU valeus larger than the ignore threshold that are not in the
# noted ground truth list.
if
self
.
_ignore_thresh
!=
0.0
:
(
true_conf
,
obj_mask
)
=
self
.
_tiled_global_box_search
(
pred_box
,
tf
.
stop_gradient
(
tf
.
sigmoid
(
pred_class
)),
boxes
,
classes
,
true_conf
,
smoothed
=
self
.
_objectness_smooth
>
0
)
# Build the one hot class list that are used for class loss.
true_class
=
tf
.
one_hot
(
tf
.
cast
(
true_class
,
tf
.
int32
),
depth
=
tf
.
shape
(
pred_class
)[
-
1
],
dtype
=
pred_class
.
dtype
)
true_classes
=
tf
.
stop_gradient
(
loss_utils
.
apply_mask
(
ind_mask
,
true_class
))
# Reorganize the one hot class list as a grid.
true_class
=
loss_utils
.
build_grid
(
inds
,
true_classes
,
pred_class
,
ind_mask
,
update
=
False
)
true_class
=
tf
.
stop_gradient
(
true_class
)
# Use the class mask to find the number of objects located in
# each predicted grid cell/pixel.
counts
=
true_class
counts
=
tf
.
reduce_sum
(
counts
,
axis
=-
1
,
keepdims
=
True
)
reps
=
tf
.
gather_nd
(
counts
,
inds
,
batch_dims
=
1
)
reps
=
tf
.
squeeze
(
reps
,
axis
=-
1
)
reps
=
tf
.
stop_gradient
(
tf
.
where
(
reps
==
0.0
,
tf
.
ones_like
(
reps
),
reps
))
# Compute the loss for only the cells in which the boxes are located.
pred_box
=
loss_utils
.
apply_mask
(
ind_mask
,
tf
.
gather_nd
(
pred_box
,
inds
,
batch_dims
=
1
))
iou
,
_
,
box_loss
=
self
.
box_loss
(
true_box
,
pred_box
,
darknet
=
True
)
box_loss
=
loss_utils
.
apply_mask
(
tf
.
squeeze
(
ind_mask
,
axis
=-
1
),
box_loss
)
box_loss
=
math_ops
.
divide_no_nan
(
box_loss
,
reps
)
box_loss
=
tf
.
cast
(
tf
.
reduce_sum
(
box_loss
,
axis
=
1
),
dtype
=
y_pred
.
dtype
)
# Compute the sigmoid binary cross entropy for the class maps.
class_loss
=
tf
.
reduce_mean
(
loss_utils
.
sigmoid_bce
(
tf
.
expand_dims
(
true_class
,
axis
=-
1
),
tf
.
expand_dims
(
pred_class
,
axis
=-
1
),
self
.
_label_smoothing
),
axis
=-
1
)
# Apply normalization to the class losses.
if
self
.
_cls_normalizer
<
1.0
:
# Build a mask based on the true class locations.
cls_norm_mask
=
true_class
# Apply the classes weight to class indexes were one_hot is one.
class_loss
*=
((
1
-
cls_norm_mask
)
+
cls_norm_mask
*
self
.
_cls_normalizer
)
# Mask to the class loss and compute the sum over all the objects.
class_loss
=
tf
.
reduce_sum
(
class_loss
,
axis
=-
1
)
class_loss
=
loss_utils
.
apply_mask
(
grid_mask
,
class_loss
)
class_loss
=
math_ops
.
rm_nan_inf
(
class_loss
,
val
=
0.0
)
class_loss
=
tf
.
cast
(
tf
.
reduce_sum
(
class_loss
,
axis
=
(
1
,
2
,
3
)),
dtype
=
y_pred
.
dtype
)
# Compute the sigmoid binary cross entropy for the confidence maps.
bce
=
tf
.
reduce_mean
(
loss_utils
.
sigmoid_bce
(
tf
.
expand_dims
(
true_conf
,
axis
=-
1
),
pred_conf
,
0.0
),
axis
=-
1
)
# Mask the confidence loss and take the sum across all the grid cells.
if
self
.
_ignore_thresh
!=
0.0
:
bce
=
loss_utils
.
apply_mask
(
obj_mask
,
bce
)
conf_loss
=
tf
.
cast
(
tf
.
reduce_sum
(
bce
,
axis
=
(
1
,
2
,
3
)),
dtype
=
y_pred
.
dtype
)
# Apply the weights to each loss.
box_loss
*=
self
.
_iou_normalizer
conf_loss
*=
self
.
_obj_normalizer
# Add all the losses together then take the mean over the batches.
loss
=
box_loss
+
class_loss
+
conf_loss
loss
=
tf
.
reduce_mean
(
loss
)
# Reduce the mean of the losses to use as a metric.
box_loss
=
tf
.
reduce_mean
(
box_loss
)
conf_loss
=
tf
.
reduce_mean
(
conf_loss
)
class_loss
=
tf
.
reduce_mean
(
class_loss
)
return
(
loss
,
box_loss
,
conf_loss
,
class_loss
,
loss
,
iou
,
pred_conf
,
ind_mask
,
grid_mask
)
def
cross_replica_aggregation
(
self
,
loss
,
num_replicas_in_sync
):
"""This method is not specific to each loss path, but each loss type."""
return
loss
/
num_replicas_in_sync
class
ScaledLoss
(
YoloLossBase
):
"""This class implements the full logic for the scaled Yolo models."""
def
_build_per_path_attributes
(
self
):
"""Paramterization of pair wise search and grid generators.
Objects created here are used for box decoding and dynamic ground truth
association.
"""
self
.
_anchor_generator
=
loss_utils
.
GridGenerator
(
masks
=
self
.
_masks
,
anchors
=
self
.
_anchors
,
scale_anchors
=
self
.
_path_stride
)
if
self
.
_ignore_thresh
>
0.0
:
self
.
_search_pairs
=
loss_utils
.
PairWiseSearch
(
iou_type
=
self
.
_loss_type
,
any_match
=
False
,
min_conf
=
0.25
)
return
def
_compute_loss
(
self
,
true_counts
,
inds
,
y_true
,
boxes
,
classes
,
y_pred
):
"""Per FPN path loss logic for Yolov4-csp, Yolov4-Large, and Yolov5."""
# Generate shape constants.
shape
=
tf
.
shape
(
true_counts
)
batch_size
,
width
,
height
,
num
=
shape
[
0
],
shape
[
1
],
shape
[
2
],
shape
[
3
]
fwidth
=
tf
.
cast
(
width
,
tf
.
float32
)
fheight
=
tf
.
cast
(
height
,
tf
.
float32
)
# Cast all input compontnts to float32 and stop gradient to save memory.
y_true
=
tf
.
cast
(
y_true
,
tf
.
float32
)
true_counts
=
tf
.
cast
(
true_counts
,
tf
.
float32
)
true_conf
=
tf
.
clip_by_value
(
true_counts
,
0.0
,
1.0
)
grid_points
,
anchor_grid
=
self
.
_anchor_generator
(
width
,
height
,
batch_size
,
dtype
=
tf
.
float32
)
# Split the y_true list.
(
true_box
,
ind_mask
,
true_class
,
_
,
_
)
=
tf
.
split
(
y_true
,
[
4
,
1
,
1
,
1
,
1
],
axis
=-
1
)
grid_mask
=
true_conf
=
tf
.
squeeze
(
true_conf
,
axis
=-
1
)
true_class
=
tf
.
squeeze
(
true_class
,
axis
=-
1
)
num_objs
=
tf
.
cast
(
tf
.
reduce_sum
(
ind_mask
),
dtype
=
y_pred
.
dtype
)
# Split up the predicitons.
y_pred
=
tf
.
cast
(
tf
.
reshape
(
y_pred
,
[
batch_size
,
width
,
height
,
num
,
-
1
]),
tf
.
float32
)
pred_box
,
pred_conf
,
pred_class
=
tf
.
split
(
y_pred
,
[
4
,
1
,
-
1
],
axis
=-
1
)
# Decode the boxes for loss compute.
scale
,
pred_box
,
_
=
self
.
_decode_boxes
(
fwidth
,
fheight
,
pred_box
,
anchor_grid
,
grid_points
,
darknet
=
False
)
# If the ignore threshold is enabled, search all boxes ignore all
# IOU valeus larger than the ignore threshold that are not in the
# noted ground truth list.
if
self
.
_ignore_thresh
!=
0.0
:
(
_
,
obj_mask
)
=
self
.
_tiled_global_box_search
(
pred_box
,
tf
.
stop_gradient
(
tf
.
sigmoid
(
pred_class
)),
boxes
,
classes
,
true_conf
,
smoothed
=
False
,
scale
=
scale
)
# Scale and shift and select the ground truth boxes
# and predictions to the prediciton domain.
offset
=
tf
.
cast
(
tf
.
gather_nd
(
grid_points
,
inds
,
batch_dims
=
1
),
true_box
.
dtype
)
offset
=
tf
.
concat
([
offset
,
tf
.
zeros_like
(
offset
)],
axis
=-
1
)
true_box
=
loss_utils
.
apply_mask
(
ind_mask
,
(
scale
*
true_box
)
-
offset
)
pred_box
=
loss_utils
.
apply_mask
(
ind_mask
,
tf
.
gather_nd
(
pred_box
,
inds
,
batch_dims
=
1
))
# Select the correct/used prediction classes.
true_class
=
tf
.
one_hot
(
tf
.
cast
(
true_class
,
tf
.
int32
),
depth
=
tf
.
shape
(
pred_class
)[
-
1
],
dtype
=
pred_class
.
dtype
)
true_class
=
loss_utils
.
apply_mask
(
ind_mask
,
true_class
)
pred_class
=
loss_utils
.
apply_mask
(
ind_mask
,
tf
.
gather_nd
(
pred_class
,
inds
,
batch_dims
=
1
))
# Compute the box loss.
_
,
iou
,
box_loss
=
self
.
box_loss
(
true_box
,
pred_box
,
darknet
=
False
)
box_loss
=
loss_utils
.
apply_mask
(
tf
.
squeeze
(
ind_mask
,
axis
=-
1
),
box_loss
)
box_loss
=
math_ops
.
divide_no_nan
(
tf
.
reduce_sum
(
box_loss
),
num_objs
)
# Use the box IOU to build the map for confidence loss computation.
iou
=
tf
.
maximum
(
tf
.
stop_gradient
(
iou
),
0.0
)
smoothed_iou
=
((
(
1
-
self
.
_objectness_smooth
)
*
tf
.
cast
(
ind_mask
,
iou
.
dtype
))
+
self
.
_objectness_smooth
*
tf
.
expand_dims
(
iou
,
axis
=-
1
))
smoothed_iou
=
loss_utils
.
apply_mask
(
ind_mask
,
smoothed_iou
)
true_conf
=
loss_utils
.
build_grid
(
inds
,
smoothed_iou
,
pred_conf
,
ind_mask
,
update
=
self
.
_update_on_repeat
)
true_conf
=
tf
.
squeeze
(
true_conf
,
axis
=-
1
)
# Compute the cross entropy loss for the confidence map.
bce
=
tf
.
keras
.
losses
.
binary_crossentropy
(
tf
.
expand_dims
(
true_conf
,
axis
=-
1
),
pred_conf
,
from_logits
=
True
)
if
self
.
_ignore_thresh
!=
0.0
:
bce
=
loss_utils
.
apply_mask
(
obj_mask
,
bce
)
conf_loss
=
tf
.
reduce_mean
(
bce
)
# Compute the cross entropy loss for the class maps.
class_loss
=
tf
.
keras
.
losses
.
binary_crossentropy
(
true_class
,
pred_class
,
label_smoothing
=
self
.
_label_smoothing
,
from_logits
=
True
)
class_loss
=
loss_utils
.
apply_mask
(
tf
.
squeeze
(
ind_mask
,
axis
=-
1
),
class_loss
)
class_loss
=
math_ops
.
divide_no_nan
(
tf
.
reduce_sum
(
class_loss
),
num_objs
)
# Apply the weights to each loss.
box_loss
*=
self
.
_iou_normalizer
class_loss
*=
self
.
_cls_normalizer
conf_loss
*=
self
.
_obj_normalizer
# Add all the losses together then take the sum over the batches.
mean_loss
=
box_loss
+
class_loss
+
conf_loss
loss
=
mean_loss
*
tf
.
cast
(
batch_size
,
mean_loss
.
dtype
)
return
(
loss
,
box_loss
,
conf_loss
,
class_loss
,
mean_loss
,
iou
,
pred_conf
,
ind_mask
,
grid_mask
)
def
post_path_aggregation
(
self
,
loss
,
ground_truths
,
predictions
):
"""This method allows for post processing of a loss value.
By default the model will have about 3 FPN levels {3, 4, 5}, on
larger model that have more like 4 or 5 FPN levels the loss needs to
be scaled such that the total update is scaled to the same effective
magintude as the model with 3 FPN levels. This helps to prevent gradient
explosions.
Args:
loss: `tf.float` scalar for the actual loss.
ground_truths: `Dict` holding all the ground truth tensors.
predictions: `Dict` holding all the predicted values.
Returns:
loss: `tf.float` scalar for the scaled loss.
"""
scale
=
tf
.
stop_gradient
(
3
/
len
(
list
(
predictions
.
keys
())))
return
loss
*
scale
def
cross_replica_aggregation
(
self
,
loss
,
num_replicas_in_sync
):
"""In the scaled loss, take the sum of the loss across replicas."""
return
loss
class
YoloLoss
:
"""This class implements the aggregated loss across YOLO model FPN levels."""
def
__init__
(
self
,
keys
,
classes
,
anchors
,
masks
=
None
,
path_strides
=
None
,
truth_thresholds
=
None
,
ignore_thresholds
=
None
,
loss_types
=
None
,
iou_normalizers
=
None
,
cls_normalizers
=
None
,
obj_normalizers
=
None
,
objectness_smooths
=
None
,
box_types
=
None
,
scale_xys
=
None
,
max_deltas
=
None
,
label_smoothing
=
0.0
,
use_scaled_loss
=
False
,
update_on_repeat
=
True
):
"""Loss Function Initialization.
Args:
keys: `List[str]` indicating the name of the FPN paths that need to be
optimized.
classes: `int` for the number of classes
anchors: `List[List[int]]` for the anchor boxes that are used in the model
at all levels. For anchor free prediction set the anchor list to be the
same as the image resolution.
masks: `List[int]` for the output level that this specific model output
level
path_strides: `Dict[int]` for how much to scale this level to get the
orginal input shape for each FPN path.
truth_thresholds: `Dict[float]` for the IOU value over which the loss is
propagated despite a detection being made for each FPN path.
ignore_thresholds: `Dict[float]` for the IOU value over which the loss is
not propagated, and a detection is assumed to have been made for each
FPN path.
loss_types: `Dict[str]` for the typeof iou loss to use with in {ciou,
diou, giou, iou} for each FPN path.
iou_normalizers: `Dict[float]` for how much to scale the loss on the IOU
or the boxes for each FPN path.
cls_normalizers: `Dict[float]` for how much to scale the loss on the
classes for each FPN path.
obj_normalizers: `Dict[float]` for how much to scale loss on the detection
map for each FPN path.
objectness_smooths: `Dict[float]` for how much to smooth the loss on the
detection map for each FPN path.
box_types: `Dict[bool]` for which scaling type to use for each FPN path.
scale_xys: `Dict[float]` values inidcating how far each pixel can see
outside of its containment of 1.0. a value of 1.2 indicates there is a
20% extended radius around each pixel that this specific pixel can
predict values for a center at. the center can range from 0 - value/2 to
1 + value/2, this value is set in the yolo filter, and resused here.
there should be one value for scale_xy for each level from min_level to
max_level. One for each FPN path.
max_deltas: `Dict[float]` for gradient clipping to apply to the box loss
for each FPN path.
label_smoothing: `Dict[float]` for how much to smooth the loss on the
classes for each FPN path.
use_scaled_loss: `bool` for whether to use the scaled loss or the
traditional loss.
update_on_repeat: `bool` for whether to replace with the newest or the
best value when an index is consumed by multiple objects.
"""
losses
=
{
'darknet'
:
DarknetLoss
,
'scaled'
:
ScaledLoss
}
if
use_scaled_loss
:
loss_type
=
'scaled'
else
:
loss_type
=
'darknet'
self
.
_loss_dict
=
{}
for
key
in
keys
:
self
.
_loss_dict
[
key
]
=
losses
[
loss_type
](
classes
=
classes
,
anchors
=
anchors
,
mask
=
masks
[
key
],
truth_thresh
=
truth_thresholds
[
key
],
ignore_thresh
=
ignore_thresholds
[
key
],
loss_type
=
loss_types
[
key
],
iou_normalizer
=
iou_normalizers
[
key
],
cls_normalizer
=
cls_normalizers
[
key
],
obj_normalizer
=
obj_normalizers
[
key
],
box_type
=
box_types
[
key
],
objectness_smooth
=
objectness_smooths
[
key
],
max_delta
=
max_deltas
[
key
],
path_stride
=
path_strides
[
key
],
scale_x_y
=
scale_xys
[
key
],
update_on_repeat
=
update_on_repeat
,
label_smoothing
=
label_smoothing
)
def
__call__
(
self
,
ground_truth
,
predictions
,
use_reduced_logs
=
True
):
metric_dict
=
collections
.
defaultdict
(
dict
)
metric_dict
[
'net'
][
'box'
]
=
0
metric_dict
[
'net'
][
'class'
]
=
0
metric_dict
[
'net'
][
'conf'
]
=
0
loss_val
,
metric_loss
=
0
,
0
num_replicas_in_sync
=
tf
.
distribute
.
get_strategy
().
num_replicas_in_sync
for
key
in
predictions
.
keys
():
(
loss
,
loss_box
,
loss_conf
,
loss_class
,
mean_loss
,
avg_iou
,
avg_obj
)
=
self
.
_loss_dict
[
key
](
ground_truth
[
'true_conf'
][
key
],
ground_truth
[
'inds'
][
key
],
ground_truth
[
'upds'
][
key
],
ground_truth
[
'bbox'
],
ground_truth
[
'classes'
],
predictions
[
key
])
# after computing the loss, scale loss as needed for aggregation
# across FPN levels
loss
=
self
.
_loss_dict
[
key
].
post_path_aggregation
(
loss
,
ground_truth
,
predictions
)
# after completing the scaling of the loss on each replica, handle
# scaling the loss for mergeing the loss across replicas
loss
=
self
.
_loss_dict
[
key
].
cross_replica_aggregation
(
loss
,
num_replicas_in_sync
)
loss_val
+=
loss
# detach all the below gradients: none of them should make a
# contribution to the gradient form this point forwards
metric_loss
+=
tf
.
stop_gradient
(
mean_loss
)
metric_dict
[
key
][
'loss'
]
=
tf
.
stop_gradient
(
mean_loss
)
metric_dict
[
key
][
'avg_iou'
]
=
tf
.
stop_gradient
(
avg_iou
)
metric_dict
[
key
][
'avg_obj'
]
=
tf
.
stop_gradient
(
avg_obj
)
if
not
use_reduced_logs
:
metric_dict
[
key
][
'conf_loss'
]
=
tf
.
stop_gradient
(
loss_conf
)
metric_dict
[
key
][
'box_loss'
]
=
tf
.
stop_gradient
(
loss_box
)
metric_dict
[
key
][
'class_loss'
]
=
tf
.
stop_gradient
(
loss_class
)
metric_dict
[
'net'
][
'box'
]
+=
tf
.
stop_gradient
(
loss_box
)
metric_dict
[
'net'
][
'class'
]
+=
tf
.
stop_gradient
(
loss_class
)
metric_dict
[
'net'
][
'conf'
]
+=
tf
.
stop_gradient
(
loss_conf
)
return
loss_val
,
metric_loss
,
metric_dict
official/vision/beta/projects/yolo/losses/yolo_loss_test.py
0 → 100755
View file @
c6d7d57d
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for yolo heads."""
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
official.vision.beta.projects.yolo.losses
import
yolo_loss
class
YoloDecoderTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
True
),
(
False
),
)
def
test_loss_init
(
self
,
scaled
):
"""Test creation of YOLO family models."""
def
inpdict
(
input_shape
,
dtype
=
tf
.
float32
):
inputs
=
{}
for
key
in
input_shape
:
inputs
[
key
]
=
tf
.
ones
(
input_shape
[
key
],
dtype
=
dtype
)
return
inputs
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
input_shape
=
{
'3'
:
[
1
,
52
,
52
,
255
],
'4'
:
[
1
,
26
,
26
,
255
],
'5'
:
[
1
,
13
,
13
,
255
]
}
classes
=
80
masks
=
{
'3'
:
[
0
,
1
,
2
],
'4'
:
[
3
,
4
,
5
],
'5'
:
[
6
,
7
,
8
]}
anchors
=
[[
12.0
,
19.0
],
[
31.0
,
46.0
],
[
96.0
,
54.0
],
[
46.0
,
114.0
],
[
133.0
,
127.0
],
[
79.0
,
225.0
],
[
301.0
,
150.0
],
[
172.0
,
286.0
],
[
348.0
,
340.0
]]
keys
=
[
'3'
,
'4'
,
'5'
]
path_strides
=
{
key
:
2
**
int
(
key
)
for
key
in
keys
}
loss
=
yolo_loss
.
YoloLoss
(
keys
,
classes
,
anchors
,
masks
=
masks
,
path_strides
=
path_strides
,
truth_thresholds
=
{
key
:
1.0
for
key
in
keys
},
ignore_thresholds
=
{
key
:
0.7
for
key
in
keys
},
loss_types
=
{
key
:
'ciou'
for
key
in
keys
},
iou_normalizers
=
{
key
:
0.05
for
key
in
keys
},
cls_normalizers
=
{
key
:
0.5
for
key
in
keys
},
obj_normalizers
=
{
key
:
1.0
for
key
in
keys
},
objectness_smooths
=
{
key
:
1.0
for
key
in
keys
},
box_types
=
{
key
:
'scaled'
for
key
in
keys
},
scale_xys
=
{
key
:
2.0
for
key
in
keys
},
max_deltas
=
{
key
:
30.0
for
key
in
keys
},
label_smoothing
=
0.0
,
use_scaled_loss
=
scaled
,
update_on_repeat
=
True
)
count
=
inpdict
({
'3'
:
[
1
,
52
,
52
,
3
,
1
],
'4'
:
[
1
,
26
,
26
,
3
,
1
],
'5'
:
[
1
,
13
,
13
,
3
,
1
]
})
ind
=
inpdict
({
'3'
:
[
1
,
300
,
3
],
'4'
:
[
1
,
300
,
3
],
'5'
:
[
1
,
300
,
3
]
},
tf
.
int32
)
truths
=
inpdict
({
'3'
:
[
1
,
300
,
8
],
'4'
:
[
1
,
300
,
8
],
'5'
:
[
1
,
300
,
8
]})
boxes
=
tf
.
ones
([
1
,
300
,
4
],
dtype
=
tf
.
float32
)
classes
=
tf
.
ones
([
1
,
300
],
dtype
=
tf
.
float32
)
gt
=
{
'true_conf'
:
count
,
'inds'
:
ind
,
'upds'
:
truths
,
'bbox'
:
boxes
,
'classes'
:
classes
}
_
,
_
,
_
=
loss
(
gt
,
inpdict
(
input_shape
))
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/beta/projects/yolo/modeling/backbones/darknet.py
View file @
c6d7d57d
...
@@ -12,7 +12,6 @@
...
@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
# Lint as: python3
"""Contains definitions of Darknet Backbone Networks.
"""Contains definitions of Darknet Backbone Networks.
The models are inspired by ResNet and CSPNet.
The models are inspired by ResNet and CSPNet.
...
@@ -390,7 +389,7 @@ class Darknet(tf.keras.Model):
...
@@ -390,7 +389,7 @@ class Darknet(tf.keras.Model):
norm_momentum
=
0.99
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
norm_epsilon
=
0.001
,
dilate
=
False
,
dilate
=
False
,
kernel_initializer
=
'
glorot_uniform
'
,
kernel_initializer
=
'
VarianceScaling
'
,
kernel_regularizer
=
None
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
bias_regularizer
=
None
,
**
kwargs
):
**
kwargs
):
...
@@ -507,10 +506,12 @@ class Darknet(tf.keras.Model):
...
@@ -507,10 +506,12 @@ class Darknet(tf.keras.Model):
self
.
_default_dict
[
'name'
]
=
f
'
{
name
}
_csp_down'
self
.
_default_dict
[
'name'
]
=
f
'
{
name
}
_csp_down'
if
self
.
_dilate
:
if
self
.
_dilate
:
self
.
_default_dict
[
'dilation_rate'
]
=
config
.
dilation_rate
self
.
_default_dict
[
'dilation_rate'
]
=
config
.
dilation_rate
degrid
=
int
(
tf
.
math
.
log
(
float
(
config
.
dilation_rate
))
/
tf
.
math
.
log
(
2.
))
else
:
else
:
self
.
_default_dict
[
'dilation_rate'
]
=
1
self
.
_default_dict
[
'dilation_rate'
]
=
1
degrid
=
0
# swap/add dilation
# swap/add di
a
lation
x
,
x_route
=
nn_blocks
.
CSPRoute
(
x
,
x_route
=
nn_blocks
.
CSPRoute
(
filters
=
config
.
filters
,
filters
=
config
.
filters
,
filter_scale
=
csp_filter_scale
,
filter_scale
=
csp_filter_scale
,
...
@@ -518,7 +519,7 @@ class Darknet(tf.keras.Model):
...
@@ -518,7 +519,7 @@ class Darknet(tf.keras.Model):
**
self
.
_default_dict
)(
**
self
.
_default_dict
)(
inputs
)
inputs
)
dilated_reps
=
config
.
repetitions
-
self
.
_default_dict
[
'dilation_rate'
]
//
2
dilated_reps
=
config
.
repetitions
-
degrid
for
i
in
range
(
dilated_reps
):
for
i
in
range
(
dilated_reps
):
self
.
_default_dict
[
'name'
]
=
f
'
{
name
}
_
{
i
}
'
self
.
_default_dict
[
'name'
]
=
f
'
{
name
}
_
{
i
}
'
x
=
nn_blocks
.
DarkResidual
(
x
=
nn_blocks
.
DarkResidual
(
...
@@ -528,8 +529,8 @@ class Darknet(tf.keras.Model):
...
@@ -528,8 +529,8 @@ class Darknet(tf.keras.Model):
x
)
x
)
for
i
in
range
(
dilated_reps
,
config
.
repetitions
):
for
i
in
range
(
dilated_reps
,
config
.
repetitions
):
self
.
_default_dict
[
self
.
_default_dict
[
'dilation_rate'
]
=
max
(
'dilation_rate'
]
=
self
.
_default_dict
[
'dilation_rate'
]
//
2
1
,
self
.
_default_dict
[
'dilation_rate'
]
//
2
)
self
.
_default_dict
[
self
.
_default_dict
[
'name'
]
=
f
"
{
name
}
_
{
i
}
_degridded_
{
self
.
_default_dict
[
'dilation_rate'
]
}
"
'name'
]
=
f
"
{
name
}
_
{
i
}
_degridded_
{
self
.
_default_dict
[
'dilation_rate'
]
}
"
x
=
nn_blocks
.
DarkResidual
(
x
=
nn_blocks
.
DarkResidual
(
...
@@ -592,8 +593,8 @@ class Darknet(tf.keras.Model):
...
@@ -592,8 +593,8 @@ class Darknet(tf.keras.Model):
filters
=
config
.
filters
,
downsample
=
True
,
**
self
.
_default_dict
)(
filters
=
config
.
filters
,
downsample
=
True
,
**
self
.
_default_dict
)(
inputs
)
inputs
)
dilated_reps
=
config
.
repetitions
-
(
dilated_reps
=
config
.
repetitions
-
self
.
_default_dict
[
self
.
_default_dict
[
'dilation_rate'
]
//
2
)
-
1
'dilation_rate'
]
//
2
-
1
for
i
in
range
(
dilated_reps
):
for
i
in
range
(
dilated_reps
):
self
.
_default_dict
[
'name'
]
=
f
'
{
name
}
_
{
i
}
'
self
.
_default_dict
[
'name'
]
=
f
'
{
name
}
_
{
i
}
'
x
=
nn_blocks
.
DarkResidual
(
x
=
nn_blocks
.
DarkResidual
(
...
@@ -661,12 +662,13 @@ class Darknet(tf.keras.Model):
...
@@ -661,12 +662,13 @@ class Darknet(tf.keras.Model):
@
factory
.
register_backbone_builder
(
'darknet'
)
@
factory
.
register_backbone_builder
(
'darknet'
)
def
build_darknet
(
def
build_darknet
(
input_specs
:
tf
.
keras
.
layers
.
InputSpec
,
input_specs
:
tf
.
keras
.
layers
.
InputSpec
,
backbone_c
onfi
g
:
hyperparams
.
Config
,
backbone_c
f
g
:
hyperparams
.
Config
,
norm_activation_config
:
hyperparams
.
Config
,
norm_activation_config
:
hyperparams
.
Config
,
l2_regularizer
:
tf
.
keras
.
regularizers
.
Regularizer
=
None
)
->
tf
.
keras
.
Model
:
l2_regularizer
:
tf
.
keras
.
regularizers
.
Regularizer
=
None
)
->
tf
.
keras
.
Model
:
"""Builds darknet."""
"""Builds darknet."""
backbone_cfg
=
backbone_config
.
get
()
backbone_cfg
=
backbone_cfg
.
get
()
model
=
Darknet
(
model
=
Darknet
(
model_id
=
backbone_cfg
.
model_id
,
model_id
=
backbone_cfg
.
model_id
,
min_level
=
backbone_cfg
.
min_level
,
min_level
=
backbone_cfg
.
min_level
,
...
...
official/vision/beta/projects/yolo/modeling/decoders/yolo_decoder.py
View file @
c6d7d57d
...
@@ -12,7 +12,6 @@
...
@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
# Lint as: python3
"""Feature Pyramid Network and Path Aggregation variants used in YOLO."""
"""Feature Pyramid Network and Path Aggregation variants used in YOLO."""
import
tensorflow
as
tf
import
tensorflow
as
tf
...
@@ -39,7 +38,7 @@ class YoloFPN(tf.keras.layers.Layer):
...
@@ -39,7 +38,7 @@ class YoloFPN(tf.keras.layers.Layer):
use_sync_bn
=
False
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
norm_epsilon
=
0.001
,
kernel_initializer
=
'
glorot_uniform
'
,
kernel_initializer
=
'
VarianceScaling
'
,
kernel_regularizer
=
None
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
bias_regularizer
=
None
,
**
kwargs
):
**
kwargs
):
...
@@ -184,7 +183,7 @@ class YoloPAN(tf.keras.layers.Layer):
...
@@ -184,7 +183,7 @@ class YoloPAN(tf.keras.layers.Layer):
use_sync_bn
=
False
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
norm_epsilon
=
0.001
,
kernel_initializer
=
'
glorot_uniform
'
,
kernel_initializer
=
'
VarianceScaling
'
,
kernel_regularizer
=
None
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
bias_regularizer
=
None
,
fpn_input
=
True
,
fpn_input
=
True
,
...
@@ -206,7 +205,7 @@ class YoloPAN(tf.keras.layers.Layer):
...
@@ -206,7 +205,7 @@ class YoloPAN(tf.keras.layers.Layer):
by zero.
by zero.
kernel_initializer: kernel_initializer for convolutional layers.
kernel_initializer: kernel_initializer for convolutional layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2
d
.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2
D
.
fpn_input: `bool`, for whether the input into this fucntion is an FPN or
fpn_input: `bool`, for whether the input into this fucntion is an FPN or
a backbone.
a backbone.
fpn_filter_scale: `int`, scaling factor for the FPN filters.
fpn_filter_scale: `int`, scaling factor for the FPN filters.
...
@@ -374,7 +373,7 @@ class YoloDecoder(tf.keras.Model):
...
@@ -374,7 +373,7 @@ class YoloDecoder(tf.keras.Model):
use_sync_bn
=
False
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
norm_epsilon
=
0.001
,
kernel_initializer
=
'
glorot_uniform
'
,
kernel_initializer
=
'
VarianceScaling
'
,
kernel_regularizer
=
None
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
bias_regularizer
=
None
,
**
kwargs
):
**
kwargs
):
...
@@ -389,8 +388,8 @@ class YoloDecoder(tf.keras.Model):
...
@@ -389,8 +388,8 @@ class YoloDecoder(tf.keras.Model):
use_fpn: `bool`, use the FPN found in the YoloV4 model.
use_fpn: `bool`, use the FPN found in the YoloV4 model.
use_spatial_attention: `bool`, use the spatial attention module.
use_spatial_attention: `bool`, use the spatial attention module.
csp_stack: `bool`, CSPize the FPN.
csp_stack: `bool`, CSPize the FPN.
fpn_depth: `int`, number of layers ot use in each FPN path
fpn_depth: `int`, number of layers ot use in each FPN path
if you choose
if you choose
to use an FPN.
to use an FPN.
fpn_filter_scale: `int`, scaling factor for the FPN filters.
fpn_filter_scale: `int`, scaling factor for the FPN filters.
path_process_len: `int`, number of layers ot use in each Decoder path.
path_process_len: `int`, number of layers ot use in each Decoder path.
max_level_process_len: `int`, number of layers ot use in the largest
max_level_process_len: `int`, number of layers ot use in the largest
...
...
official/vision/beta/projects/yolo/modeling/heads/yolo_head.py
View file @
c6d7d57d
...
@@ -12,7 +12,6 @@
...
@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
# Lint as: python3
"""Yolo heads."""
"""Yolo heads."""
import
tensorflow
as
tf
import
tensorflow
as
tf
...
@@ -30,10 +29,11 @@ class YoloHead(tf.keras.layers.Layer):
...
@@ -30,10 +29,11 @@ class YoloHead(tf.keras.layers.Layer):
output_extras
=
0
,
output_extras
=
0
,
norm_momentum
=
0.99
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
norm_epsilon
=
0.001
,
kernel_initializer
=
'
glorot_uniform
'
,
kernel_initializer
=
'
VarianceScaling
'
,
kernel_regularizer
=
None
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
bias_regularizer
=
None
,
activation
=
None
,
activation
=
None
,
smart_bias
=
False
,
**
kwargs
):
**
kwargs
):
"""Yolo Prediction Head initialization function.
"""Yolo Prediction Head initialization function.
...
@@ -52,6 +52,7 @@ class YoloHead(tf.keras.layers.Layer):
...
@@ -52,6 +52,7 @@ class YoloHead(tf.keras.layers.Layer):
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
activation: `str`, the activation function to use typically leaky or mish.
activation: `str`, the activation function to use typically leaky or mish.
smart_bias: `bool` whether or not use smart bias.
**kwargs: keyword arguments to be passed.
**kwargs: keyword arguments to be passed.
"""
"""
...
@@ -68,6 +69,7 @@ class YoloHead(tf.keras.layers.Layer):
...
@@ -68,6 +69,7 @@ class YoloHead(tf.keras.layers.Layer):
self
.
_output_extras
=
output_extras
self
.
_output_extras
=
output_extras
self
.
_output_conv
=
(
classes
+
output_extras
+
5
)
*
boxes_per_level
self
.
_output_conv
=
(
classes
+
output_extras
+
5
)
*
boxes_per_level
self
.
_smart_bias
=
smart_bias
self
.
_base_config
=
dict
(
self
.
_base_config
=
dict
(
activation
=
activation
,
activation
=
activation
,
...
@@ -85,10 +87,29 @@ class YoloHead(tf.keras.layers.Layer):
...
@@ -85,10 +87,29 @@ class YoloHead(tf.keras.layers.Layer):
use_bn
=
False
,
use_bn
=
False
,
**
self
.
_base_config
)
**
self
.
_base_config
)
def
bias_init
(
self
,
scale
,
inshape
,
isize
=
640
,
no_per_conf
=
8
):
def
bias
(
shape
,
dtype
):
init
=
tf
.
keras
.
initializers
.
Zeros
()
base
=
init
(
shape
,
dtype
=
dtype
)
if
self
.
_smart_bias
:
base
=
tf
.
reshape
(
base
,
[
self
.
_boxes_per_level
,
-
1
])
box
,
conf
,
classes
=
tf
.
split
(
base
,
[
4
,
1
,
-
1
],
axis
=-
1
)
conf
+=
tf
.
math
.
log
(
no_per_conf
/
((
isize
/
scale
)
**
2
))
classes
+=
tf
.
math
.
log
(
0.6
/
(
self
.
_classes
-
0.99
))
base
=
tf
.
concat
([
box
,
conf
,
classes
],
axis
=-
1
)
base
=
tf
.
reshape
(
base
,
[
-
1
])
return
base
return
bias
def
build
(
self
,
input_shape
):
def
build
(
self
,
input_shape
):
self
.
_head
=
dict
()
self
.
_head
=
dict
()
for
key
in
self
.
_key_list
:
for
key
in
self
.
_key_list
:
self
.
_head
[
key
]
=
nn_blocks
.
ConvBN
(
**
self
.
_conv_config
)
scale
=
2
**
int
(
key
)
self
.
_head
[
key
]
=
nn_blocks
.
ConvBN
(
bias_initializer
=
self
.
bias_init
(
scale
,
input_shape
[
key
][
-
1
]),
**
self
.
_conv_config
)
def
call
(
self
,
inputs
):
def
call
(
self
,
inputs
):
outputs
=
dict
()
outputs
=
dict
()
...
@@ -107,6 +128,10 @@ class YoloHead(tf.keras.layers.Layer):
...
@@ -107,6 +128,10 @@ class YoloHead(tf.keras.layers.Layer):
'Model has to be built before number of boxes can be determined.'
)
'Model has to be built before number of boxes can be determined.'
)
return
(
self
.
_max_level
-
self
.
_min_level
+
1
)
*
self
.
_boxes_per_level
return
(
self
.
_max_level
-
self
.
_min_level
+
1
)
*
self
.
_boxes_per_level
@
property
def
num_heads
(
self
):
return
self
.
_max_level
-
self
.
_min_level
+
1
def
get_config
(
self
):
def
get_config
(
self
):
config
=
dict
(
config
=
dict
(
min_level
=
self
.
_min_level
,
min_level
=
self
.
_min_level
,
...
...
official/vision/beta/projects/yolo/modeling/layers/detection_generator.py
View file @
c6d7d57d
...
@@ -15,7 +15,10 @@
...
@@ -15,7 +15,10 @@
"""Contains common building blocks for yolo layer (detection layer)."""
"""Contains common building blocks for yolo layer (detection layer)."""
import
tensorflow
as
tf
import
tensorflow
as
tf
from
official.vision.beta.modeling.layers
import
detection_generator
from
official.vision.beta.projects.yolo.losses
import
yolo_loss
from
official.vision.beta.projects.yolo.ops
import
box_ops
from
official.vision.beta.projects.yolo.ops
import
box_ops
from
official.vision.beta.projects.yolo.ops
import
loss_utils
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'yolo'
)
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'yolo'
)
...
@@ -36,11 +39,11 @@ class YoloLayer(tf.keras.Model):
...
@@ -36,11 +39,11 @@ class YoloLayer(tf.keras.Model):
cls_normalizer
=
1.0
,
cls_normalizer
=
1.0
,
obj_normalizer
=
1.0
,
obj_normalizer
=
1.0
,
use_scaled_loss
=
False
,
use_scaled_loss
=
False
,
darknet
=
Non
e
,
update_on_repeat
=
Fals
e
,
pre_nms_points
=
5000
,
pre_nms_points
=
5000
,
label_smoothing
=
0.0
,
label_smoothing
=
0.0
,
max_boxes
=
200
,
max_boxes
=
200
,
new_cords
=
False
,
box_type
=
'original'
,
path_scale
=
None
,
path_scale
=
None
,
scale_xy
=
None
,
scale_xy
=
None
,
nms_type
=
'greedy'
,
nms_type
=
'greedy'
,
...
@@ -70,14 +73,25 @@ class YoloLayer(tf.keras.Model):
...
@@ -70,14 +73,25 @@ class YoloLayer(tf.keras.Model):
obj_normalizer: `float` for how much to scale loss on the detection map.
obj_normalizer: `float` for how much to scale loss on the detection map.
use_scaled_loss: `bool` for whether to use the scaled loss
use_scaled_loss: `bool` for whether to use the scaled loss
or the traditional loss.
or the traditional loss.
darknet: `bool` for whether to use the DarkNet or PyTorch loss function
update_on_repeat: `bool` indicating how you would like to handle repeated
implementation.
indexes in a given [j, i] index. Setting this to True will give more
consistent MAP, setting it to falls will improve recall by 1-2% but will
sacrifice some MAP.
pre_nms_points: `int` number of top candidate detections per class before
pre_nms_points: `int` number of top candidate detections per class before
NMS.
NMS.
label_smoothing: `float` for how much to smooth the loss on the classes.
label_smoothing: `float` for how much to smooth the loss on the classes.
max_boxes: `int` for the maximum number of boxes retained over all
max_boxes: `int` for the maximum number of boxes retained over all
classes.
classes.
new_cords: `bool` for using the ScaledYOLOv4 coordinates.
box_type: `str`, there are 3 different box types that will affect training
differently {original, scaled and anchor_free}. The original method
decodes the boxes by applying an exponential to the model width and
height maps, then scaling the maps by the anchor boxes. This method is
used in Yolo-v4, Yolo-v3, and all its counterparts. The Scale method
squares the width and height and scales both by a fixed factor of 4.
This method is used in the Scale Yolo models, as well as Yolov4-CSP.
Finally, anchor_free is like the original method but will not apply an
activation function to the boxes, this is used for some of the newer
anchor free versions of YOLO.
path_scale: `dict` for the size of the input tensors. Defaults to
path_scale: `dict` for the size of the input tensors. Defaults to
precalulated values from the `mask`.
precalulated values from the `mask`.
scale_xy: dictionary `float` values inidcating how far each pixel can see
scale_xy: dictionary `float` values inidcating how far each pixel can see
...
@@ -91,18 +105,6 @@ class YoloLayer(tf.keras.Model):
...
@@ -91,18 +105,6 @@ class YoloLayer(tf.keras.Model):
objectness_smooth: `float` for how much to smooth the loss on the
objectness_smooth: `float` for how much to smooth the loss on the
detection map.
detection map.
**kwargs: Addtional keyword arguments.
**kwargs: Addtional keyword arguments.
Return:
loss: `float` for the actual loss.
box_loss: `float` loss on the boxes used for metrics.
conf_loss: `float` loss on the confidence used for metrics.
class_loss: `float` loss on the classes used for metrics.
avg_iou: `float` metric for the average iou between predictions
and ground truth.
avg_obj: `float` metric for the average confidence of the model
for predictions.
recall50: `float` metric for how accurate the model is.
precision50: `float` metric for how precise the model is.
"""
"""
super
().
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
_masks
=
masks
self
.
_masks
=
masks
...
@@ -121,29 +123,18 @@ class YoloLayer(tf.keras.Model):
...
@@ -121,29 +123,18 @@ class YoloLayer(tf.keras.Model):
self
.
_loss_type
=
loss_type
self
.
_loss_type
=
loss_type
self
.
_use_scaled_loss
=
use_scaled_loss
self
.
_use_scaled_loss
=
use_scaled_loss
self
.
_
darknet
=
darkne
t
self
.
_
update_on_repeat
=
update_on_repea
t
self
.
_pre_nms_points
=
pre_nms_points
self
.
_pre_nms_points
=
pre_nms_points
self
.
_label_smoothing
=
label_smoothing
self
.
_label_smoothing
=
label_smoothing
self
.
_keys
=
list
(
masks
.
keys
())
self
.
_keys
=
list
(
masks
.
keys
())
self
.
_len_keys
=
len
(
self
.
_keys
)
self
.
_len_keys
=
len
(
self
.
_keys
)
self
.
_
new_cords
=
new_cords
self
.
_
box_type
=
box_type
self
.
_path_scale
=
path_scale
or
{
self
.
_path_scale
=
path_scale
or
{
key
:
2
**
int
(
key
)
for
key
,
_
in
masks
.
items
()
key
:
2
**
int
(
key
)
for
key
,
_
in
masks
.
items
()
}
}
self
.
_nms_types
=
{
self
.
_nms_type
=
nms_type
'greedy'
:
1
,
'iou'
:
2
,
'giou'
:
3
,
'ciou'
:
4
,
'diou'
:
5
,
'class_independent'
:
6
,
'weighted_diou'
:
7
}
self
.
_nms_type
=
self
.
_nms_types
[
nms_type
]
self
.
_scale_xy
=
scale_xy
or
{
key
:
1.0
for
key
,
_
in
masks
.
items
()}
self
.
_scale_xy
=
scale_xy
or
{
key
:
1.0
for
key
,
_
in
masks
.
items
()}
self
.
_generator
=
{}
self
.
_generator
=
{}
...
@@ -156,27 +147,33 @@ class YoloLayer(tf.keras.Model):
...
@@ -156,27 +147,33 @@ class YoloLayer(tf.keras.Model):
return
return
def
get_generators
(
self
,
anchors
,
path_scale
,
path_key
):
def
get_generators
(
self
,
anchors
,
path_scale
,
path_key
):
return
None
anchor_generator
=
loss_utils
.
GridGenerator
(
anchors
,
scale_anchors
=
path_scale
)
def
rm_nan_inf
(
self
,
x
,
val
=
0.0
):
return
anchor_generator
x
=
tf
.
where
(
tf
.
math
.
is_nan
(
x
),
tf
.
cast
(
val
,
dtype
=
x
.
dtype
),
x
)
x
=
tf
.
where
(
tf
.
math
.
is_inf
(
x
),
tf
.
cast
(
val
,
dtype
=
x
.
dtype
),
x
)
return
x
def
parse_prediction_path
(
self
,
key
,
inputs
):
def
parse_prediction_path
(
self
,
key
,
inputs
):
shape_
=
tf
.
shape
(
inputs
)
shape
=
inputs
.
get_shape
().
as_list
()
shape
=
inputs
.
get_shape
().
as_list
()
height
,
width
=
shape
[
1
],
shape
[
2
]
batchsize
,
height
,
width
=
shape_
[
0
],
shape
[
1
],
shape
[
2
]
if
height
is
None
or
width
is
None
:
height
,
width
=
shape_
[
1
],
shape_
[
2
]
generator
=
self
.
_generator
[
key
]
len_mask
=
self
.
_len_mask
[
key
]
len_mask
=
self
.
_len_mask
[
key
]
scale_xy
=
self
.
_scale_xy
[
key
]
# reshape the yolo output to (batchsize,
# reshape the yolo output to (batchsize,
# width,
# width,
# height,
# height,
# number_anchors,
# number_anchors,
# remaining_points)
# remaining_points)
data
=
tf
.
reshape
(
inputs
,
[
-
1
,
height
,
width
,
len_mask
,
self
.
_classes
+
5
])
data
=
tf
.
reshape
(
inputs
,
[
-
1
,
height
,
width
,
len_mask
,
self
.
_classes
+
5
])
# use the grid generator to get the formatted anchor boxes and grid points
# in shape [1, height, width, 2]
centers
,
anchors
=
generator
(
height
,
width
,
batchsize
,
dtype
=
data
.
dtype
)
# split the yolo detections into boxes, object score map, classes
# split the yolo detections into boxes, object score map, classes
boxes
,
obns_scores
,
class_scores
=
tf
.
split
(
boxes
,
obns_scores
,
class_scores
=
tf
.
split
(
data
,
[
4
,
1
,
self
.
_classes
],
axis
=-
1
)
data
,
[
4
,
1
,
self
.
_classes
],
axis
=-
1
)
...
@@ -184,25 +181,32 @@ class YoloLayer(tf.keras.Model):
...
@@ -184,25 +181,32 @@ class YoloLayer(tf.keras.Model):
# determine the number of classes
# determine the number of classes
classes
=
class_scores
.
get_shape
().
as_list
()[
-
1
]
classes
=
class_scores
.
get_shape
().
as_list
()[
-
1
]
# configurable to use the new coordinates in scaled Yolo v4 or not
_
,
_
,
boxes
=
loss_utils
.
get_predicted_box
(
tf
.
cast
(
height
,
data
.
dtype
),
tf
.
cast
(
width
,
data
.
dtype
),
boxes
,
anchors
,
centers
,
scale_xy
,
stride
=
self
.
_path_scale
[
key
],
darknet
=
False
,
box_type
=
self
.
_box_type
[
key
])
# convert boxes from yolo(x, y, w. h) to tensorflow(ymin, xmin, ymax, xmax)
# convert boxes from yolo(x, y, w. h) to tensorflow(ymin, xmin, ymax, xmax)
boxes
=
box_ops
.
xcycwh_to_yxyx
(
boxes
)
boxes
=
box_ops
.
xcycwh_to_yxyx
(
boxes
)
# activate and detection map
# activate and detection map
obns_scores
=
tf
.
math
.
sigmoid
(
obns_scores
)
obns_scores
=
tf
.
math
.
sigmoid
(
obns_scores
)
# threshold the detection map
obns_mask
=
tf
.
cast
(
obns_scores
>
self
.
_thresh
,
obns_scores
.
dtype
)
# convert detection map to class detection probabailities
# convert detection map to class detection probabailities
class_scores
=
tf
.
math
.
sigmoid
(
class_scores
)
*
obns_mask
*
obns_scores
class_scores
=
tf
.
math
.
sigmoid
(
class_scores
)
*
obns_scores
class_scores
*=
tf
.
cast
(
class_scores
>
self
.
_thresh
,
class_scores
.
dtype
)
fill
=
height
*
width
*
len_mask
# platten predictions to [batchsize, N, -1] for non max supression
# platten predictions to [batchsize, N, -1] for non max supression
fill
=
height
*
width
*
len_mask
boxes
=
tf
.
reshape
(
boxes
,
[
-
1
,
fill
,
4
])
boxes
=
tf
.
reshape
(
boxes
,
[
-
1
,
fill
,
4
])
class_scores
=
tf
.
reshape
(
class_scores
,
[
-
1
,
fill
,
classes
])
class_scores
=
tf
.
reshape
(
class_scores
,
[
-
1
,
fill
,
classes
])
obns_scores
=
tf
.
reshape
(
obns_scores
,
[
-
1
,
fill
])
obns_scores
=
tf
.
reshape
(
obns_scores
,
[
-
1
,
fill
])
return
obns_scores
,
boxes
,
class_scores
return
obns_scores
,
boxes
,
class_scores
def
call
(
self
,
inputs
):
def
call
(
self
,
inputs
):
...
@@ -224,26 +228,49 @@ class YoloLayer(tf.keras.Model):
...
@@ -224,26 +228,49 @@ class YoloLayer(tf.keras.Model):
# colate all predicitons
# colate all predicitons
boxes
=
tf
.
concat
(
boxes
,
axis
=
1
)
boxes
=
tf
.
concat
(
boxes
,
axis
=
1
)
object_scores
=
tf
.
keras
.
backend
.
concatenate
(
object_scores
,
axis
=
1
)
object_scores
=
tf
.
concat
(
object_scores
,
axis
=
1
)
class_scores
=
tf
.
keras
.
backend
.
concatenate
(
class_scores
,
axis
=
1
)
class_scores
=
tf
.
concat
(
class_scores
,
axis
=
1
)
# greedy NMS
# get masks to threshold all the predicitons
boxes
=
tf
.
cast
(
boxes
,
dtype
=
tf
.
float32
)
object_mask
=
tf
.
cast
(
object_scores
>
self
.
_thresh
,
object_scores
.
dtype
)
class_scores
=
tf
.
cast
(
class_scores
,
dtype
=
tf
.
float32
)
class_mask
=
tf
.
cast
(
class_scores
>
self
.
_thresh
,
class_scores
.
dtype
)
nms_items
=
tf
.
image
.
combined_non_max_suppression
(
tf
.
expand_dims
(
boxes
,
axis
=-
2
),
# apply thresholds mask to all the predicitons
class_scores
,
object_scores
*=
object_mask
self
.
_pre_nms_points
,
class_scores
*=
(
tf
.
expand_dims
(
object_mask
,
axis
=-
1
)
*
class_mask
)
self
.
_max_boxes
,
iou_threshold
=
self
.
_nms_thresh
,
# apply nms
score_threshold
=
self
.
_thresh
)
if
self
.
_nms_type
==
'greedy'
:
# cast the boxes and predicitons abck to original datatype
# greedy NMS
boxes
=
tf
.
cast
(
nms_items
.
nmsed_boxes
,
object_scores
.
dtype
)
boxes
=
tf
.
cast
(
boxes
,
dtype
=
tf
.
float32
)
class_scores
=
tf
.
cast
(
nms_items
.
nmsed_classes
,
object_scores
.
dtype
)
class_scores
=
tf
.
cast
(
class_scores
,
dtype
=
tf
.
float32
)
object_scores
=
tf
.
cast
(
nms_items
.
nmsed_scores
,
object_scores
.
dtype
)
boxes
,
object_scores_
,
class_scores
,
num_detections
=
(
tf
.
image
.
combined_non_max_suppression
(
# compute the number of valid detections
tf
.
expand_dims
(
boxes
,
axis
=-
2
),
num_detections
=
tf
.
math
.
reduce_sum
(
tf
.
math
.
ceil
(
object_scores
),
axis
=-
1
)
class_scores
,
self
.
_pre_nms_points
,
self
.
_max_boxes
,
iou_threshold
=
self
.
_nms_thresh
,
score_threshold
=
self
.
_thresh
))
# cast the boxes and predicitons abck to original datatype
boxes
=
tf
.
cast
(
boxes
,
object_scores
.
dtype
)
class_scores
=
tf
.
cast
(
class_scores
,
object_scores
.
dtype
)
object_scores
=
tf
.
cast
(
object_scores_
,
object_scores
.
dtype
)
else
:
# TPU NMS
boxes
=
tf
.
cast
(
boxes
,
dtype
=
tf
.
float32
)
class_scores
=
tf
.
cast
(
class_scores
,
dtype
=
tf
.
float32
)
(
boxes
,
confidence
,
classes
,
num_detections
)
=
detection_generator
.
_generate_detections_v2
(
# pylint:disable=protected-access
tf
.
expand_dims
(
boxes
,
axis
=-
2
),
class_scores
,
pre_nms_top_k
=
self
.
_pre_nms_points
,
max_num_detections
=
self
.
_max_boxes
,
nms_iou_threshold
=
self
.
_nms_thresh
,
pre_nms_score_threshold
=
self
.
_thresh
)
boxes
=
tf
.
cast
(
boxes
,
object_scores
.
dtype
)
class_scores
=
tf
.
cast
(
classes
,
object_scores
.
dtype
)
object_scores
=
tf
.
cast
(
confidence
,
object_scores
.
dtype
)
# format and return
# format and return
return
{
return
{
...
@@ -258,9 +285,28 @@ class YoloLayer(tf.keras.Model):
...
@@ -258,9 +285,28 @@ class YoloLayer(tf.keras.Model):
"""Generates a dictionary of losses to apply to each path.
"""Generates a dictionary of losses to apply to each path.
Done in the detection generator because all parameters are the same
Done in the detection generator because all parameters are the same
across both loss and detection generator
.
across both loss and detection generator
"""
"""
return
None
loss
=
yolo_loss
.
YoloLoss
(
keys
=
self
.
_keys
,
classes
=
self
.
_classes
,
anchors
=
self
.
_anchors
,
masks
=
self
.
_masks
,
path_strides
=
self
.
_path_scale
,
truth_thresholds
=
self
.
_truth_thresh
,
ignore_thresholds
=
self
.
_ignore_thresh
,
loss_types
=
self
.
_loss_type
,
iou_normalizers
=
self
.
_iou_normalizer
,
cls_normalizers
=
self
.
_cls_normalizer
,
obj_normalizers
=
self
.
_obj_normalizer
,
objectness_smooths
=
self
.
_objectness_smooth
,
box_types
=
self
.
_box_type
,
max_deltas
=
self
.
_max_delta
,
scale_xys
=
self
.
_scale_xy
,
use_scaled_loss
=
self
.
_use_scaled_loss
,
update_on_repeat
=
self
.
_update_on_repeat
,
label_smoothing
=
self
.
_label_smoothing
)
return
loss
def
get_config
(
self
):
def
get_config
(
self
):
return
{
return
{
...
...
official/vision/beta/projects/yolo/modeling/layers/detection_generator_test.py
View file @
c6d7d57d
...
@@ -39,7 +39,10 @@ class YoloDecoderTest(parameterized.TestCase, tf.test.TestCase):
...
@@ -39,7 +39,10 @@ class YoloDecoderTest(parameterized.TestCase, tf.test.TestCase):
anchors
=
[[
12.0
,
19.0
],
[
31.0
,
46.0
],
[
96.0
,
54.0
],
[
46.0
,
114.0
],
anchors
=
[[
12.0
,
19.0
],
[
31.0
,
46.0
],
[
96.0
,
54.0
],
[
46.0
,
114.0
],
[
133.0
,
127.0
],
[
79.0
,
225.0
],
[
301.0
,
150.0
],
[
172.0
,
286.0
],
[
133.0
,
127.0
],
[
79.0
,
225.0
],
[
301.0
,
150.0
],
[
172.0
,
286.0
],
[
348.0
,
340.0
]]
[
348.0
,
340.0
]]
layer
=
dg
.
YoloLayer
(
masks
,
anchors
,
classes
,
max_boxes
=
10
)
box_type
=
{
key
:
'scaled'
for
key
in
masks
.
keys
()}
layer
=
dg
.
YoloLayer
(
masks
,
anchors
,
classes
,
box_type
=
box_type
,
max_boxes
=
10
)
inputs
=
{}
inputs
=
{}
for
key
in
input_shape
:
for
key
in
input_shape
:
...
...
official/vision/beta/projects/yolo/modeling/layers/nn_blocks.py
View file @
c6d7d57d
...
@@ -12,9 +12,7 @@
...
@@ -12,9 +12,7 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
# Lint as: python3
"""Contains common building blocks for yolo neural networks."""
"""Contains common building blocks for yolo neural networks."""
from
typing
import
Callable
,
List
import
tensorflow
as
tf
import
tensorflow
as
tf
from
official.modeling
import
tf_utils
from
official.modeling
import
tf_utils
from
official.vision.beta.ops
import
spatial_transform_ops
from
official.vision.beta.ops
import
spatial_transform_ops
...
@@ -48,7 +46,7 @@ class ConvBN(tf.keras.layers.Layer):
...
@@ -48,7 +46,7 @@ class ConvBN(tf.keras.layers.Layer):
strides
=
(
1
,
1
),
strides
=
(
1
,
1
),
padding
=
'same'
,
padding
=
'same'
,
dilation_rate
=
(
1
,
1
),
dilation_rate
=
(
1
,
1
),
kernel_initializer
=
'
glorot_uniform
'
,
kernel_initializer
=
'
VarianceScaling
'
,
bias_initializer
=
'zeros'
,
bias_initializer
=
'zeros'
,
bias_regularizer
=
None
,
bias_regularizer
=
None
,
kernel_regularizer
=
None
,
kernel_regularizer
=
None
,
...
@@ -97,7 +95,14 @@ class ConvBN(tf.keras.layers.Layer):
...
@@ -97,7 +95,14 @@ class ConvBN(tf.keras.layers.Layer):
self
.
_strides
=
strides
self
.
_strides
=
strides
self
.
_padding
=
padding
self
.
_padding
=
padding
self
.
_dilation_rate
=
dilation_rate
self
.
_dilation_rate
=
dilation_rate
self
.
_kernel_initializer
=
kernel_initializer
if
kernel_initializer
==
'VarianceScaling'
:
# to match pytorch initialization method
self
.
_kernel_initializer
=
tf
.
keras
.
initializers
.
VarianceScaling
(
scale
=
1
/
3
,
mode
=
'fan_in'
,
distribution
=
'uniform'
)
else
:
self
.
_kernel_initializer
=
kernel_initializer
self
.
_bias_initializer
=
bias_initializer
self
.
_bias_initializer
=
bias_initializer
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_kernel_regularizer
=
kernel_regularizer
...
@@ -194,7 +199,7 @@ class DarkResidual(tf.keras.layers.Layer):
...
@@ -194,7 +199,7 @@ class DarkResidual(tf.keras.layers.Layer):
filters
=
1
,
filters
=
1
,
filter_scale
=
2
,
filter_scale
=
2
,
dilation_rate
=
1
,
dilation_rate
=
1
,
kernel_initializer
=
'
glorot_uniform
'
,
kernel_initializer
=
'
VarianceScaling
'
,
bias_initializer
=
'zeros'
,
bias_initializer
=
'zeros'
,
kernel_regularizer
=
None
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
bias_regularizer
=
None
,
...
@@ -366,7 +371,7 @@ class CSPTiny(tf.keras.layers.Layer):
...
@@ -366,7 +371,7 @@ class CSPTiny(tf.keras.layers.Layer):
def
__init__
(
self
,
def
__init__
(
self
,
filters
=
1
,
filters
=
1
,
kernel_initializer
=
'
glorot_uniform
'
,
kernel_initializer
=
'
VarianceScaling
'
,
bias_initializer
=
'zeros'
,
bias_initializer
=
'zeros'
,
bias_regularizer
=
None
,
bias_regularizer
=
None
,
kernel_regularizer
=
None
,
kernel_regularizer
=
None
,
...
@@ -532,7 +537,7 @@ class CSPRoute(tf.keras.layers.Layer):
...
@@ -532,7 +537,7 @@ class CSPRoute(tf.keras.layers.Layer):
filters
,
filters
,
filter_scale
=
2
,
filter_scale
=
2
,
activation
=
'mish'
,
activation
=
'mish'
,
kernel_initializer
=
'
glorot_uniform
'
,
kernel_initializer
=
'
VarianceScaling
'
,
bias_initializer
=
'zeros'
,
bias_initializer
=
'zeros'
,
bias_regularizer
=
None
,
bias_regularizer
=
None
,
kernel_regularizer
=
None
,
kernel_regularizer
=
None
,
...
@@ -661,7 +666,7 @@ class CSPConnect(tf.keras.layers.Layer):
...
@@ -661,7 +666,7 @@ class CSPConnect(tf.keras.layers.Layer):
drop_first
=
False
,
drop_first
=
False
,
activation
=
'mish'
,
activation
=
'mish'
,
kernel_size
=
(
1
,
1
),
kernel_size
=
(
1
,
1
),
kernel_initializer
=
'
glorot_uniform
'
,
kernel_initializer
=
'
VarianceScaling
'
,
bias_initializer
=
'zeros'
,
bias_initializer
=
'zeros'
,
bias_regularizer
=
None
,
bias_regularizer
=
None
,
kernel_regularizer
=
None
,
kernel_regularizer
=
None
,
...
@@ -761,122 +766,6 @@ class CSPConnect(tf.keras.layers.Layer):
...
@@ -761,122 +766,6 @@ class CSPConnect(tf.keras.layers.Layer):
return
x
return
x
class
CSPStack
(
tf
.
keras
.
layers
.
Layer
):
"""CSP Stack layer.
CSP full stack, combines the route and the connect in case you dont want to
jsut quickly wrap an existing callable or list of layers to
make it a cross stage partial. Added for ease of use. you should be able
to wrap any layer stack with a CSP independent of wether it belongs
to the Darknet family. if filter_scale = 2, then the blocks in the stack
passed into the the CSP stack should also have filters = filters/filter_scale
Cross Stage Partial networks (CSPNets) were proposed in:
[1] Chien-Yao Wang, Hong-Yuan Mark Liao, I-Hau Yeh, Yueh-Hua Wu,
Ping-Yang Chen, Jun-Wei Hsieh
CSPNet: A New Backbone that can Enhance Learning Capability of CNN.
arXiv:1911.11929
"""
def
__init__
(
self
,
filters
,
model_to_wrap
=
None
,
filter_scale
=
2
,
activation
=
'mish'
,
kernel_initializer
=
'glorot_uniform'
,
bias_initializer
=
'zeros'
,
bias_regularizer
=
None
,
kernel_regularizer
=
None
,
downsample
=
True
,
use_bn
=
True
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
**
kwargs
):
"""CSPStack layer initializer.
Args:
filters: integer for output depth, or the number of features to learn.
model_to_wrap: callable Model or a list of callable objects that will
process the output of CSPRoute, and be input into CSPConnect.
list will be called sequentially.
filter_scale: integer dictating (filters//2) or the number of filters in
the partial feature stack.
activation: string for activation function to use in layer.
kernel_initializer: string to indicate which function to use to initialize
weights.
bias_initializer: string to indicate which function to use to initialize
bias.
bias_regularizer: string to indicate which function to use to regularizer
bias.
kernel_regularizer: string to indicate which function to use to
regularizer weights.
downsample: down_sample the input.
use_bn: boolean for whether to use batch normalization.
use_sync_bn: boolean for whether sync batch normalization statistics
of all batch norm layers to the models global statistics
(across all input batches).
norm_momentum: float for moment to use for batch normalization.
norm_epsilon: float for batch normalization epsilon.
**kwargs: Keyword Arguments.
Raises:
TypeError: model_to_wrap is not a layer or a list of layers
"""
super
().
__init__
(
**
kwargs
)
# layer params
self
.
_filters
=
filters
self
.
_filter_scale
=
filter_scale
self
.
_activation
=
activation
self
.
_downsample
=
downsample
# convoultion params
self
.
_kernel_initializer
=
kernel_initializer
self
.
_bias_initializer
=
bias_initializer
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_bias_regularizer
=
bias_regularizer
self
.
_use_bn
=
use_bn
self
.
_use_sync_bn
=
use_sync_bn
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
if
model_to_wrap
is
None
:
self
.
_model_to_wrap
=
[]
elif
isinstance
(
model_to_wrap
,
Callable
):
self
.
_model_to_wrap
=
[
model_to_wrap
]
elif
isinstance
(
model_to_wrap
,
List
):
self
.
_model_to_wrap
=
model_to_wrap
else
:
raise
TypeError
(
'the input to the CSPStack must be a list of layers that we can'
+
'iterate through, or
\n
a callable'
)
def
build
(
self
,
input_shape
):
dark_conv_args
=
{
'filters'
:
self
.
_filters
,
'filter_scale'
:
self
.
_filter_scale
,
'activation'
:
self
.
_activation
,
'kernel_initializer'
:
self
.
_kernel_initializer
,
'bias_initializer'
:
self
.
_bias_initializer
,
'bias_regularizer'
:
self
.
_bias_regularizer
,
'use_bn'
:
self
.
_use_bn
,
'use_sync_bn'
:
self
.
_use_sync_bn
,
'norm_momentum'
:
self
.
_norm_momentum
,
'norm_epsilon'
:
self
.
_norm_epsilon
,
'kernel_regularizer'
:
self
.
_kernel_regularizer
,
}
self
.
_route
=
CSPRoute
(
downsample
=
self
.
_downsample
,
**
dark_conv_args
)
self
.
_connect
=
CSPConnect
(
**
dark_conv_args
)
def
call
(
self
,
inputs
,
training
=
None
):
x
,
x_route
=
self
.
_route
(
inputs
)
for
layer
in
self
.
_model_to_wrap
:
x
=
layer
(
x
)
x
=
self
.
_connect
([
x
,
x_route
])
return
x
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'yolo'
)
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'yolo'
)
class
PathAggregationBlock
(
tf
.
keras
.
layers
.
Layer
):
class
PathAggregationBlock
(
tf
.
keras
.
layers
.
Layer
):
"""Path Aggregation block."""
"""Path Aggregation block."""
...
@@ -884,7 +773,7 @@ class PathAggregationBlock(tf.keras.layers.Layer):
...
@@ -884,7 +773,7 @@ class PathAggregationBlock(tf.keras.layers.Layer):
def
__init__
(
self
,
def
__init__
(
self
,
filters
=
1
,
filters
=
1
,
drop_final
=
True
,
drop_final
=
True
,
kernel_initializer
=
'
glorot_uniform
'
,
kernel_initializer
=
'
VarianceScaling
'
,
bias_initializer
=
'zeros'
,
bias_initializer
=
'zeros'
,
bias_regularizer
=
None
,
bias_regularizer
=
None
,
kernel_regularizer
=
None
,
kernel_regularizer
=
None
,
...
@@ -1120,7 +1009,7 @@ class SAM(tf.keras.layers.Layer):
...
@@ -1120,7 +1009,7 @@ class SAM(tf.keras.layers.Layer):
strides
=
(
1
,
1
),
strides
=
(
1
,
1
),
padding
=
'same'
,
padding
=
'same'
,
dilation_rate
=
(
1
,
1
),
dilation_rate
=
(
1
,
1
),
kernel_initializer
=
'
glorot_uniform
'
,
kernel_initializer
=
'
VarianceScaling
'
,
bias_initializer
=
'zeros'
,
bias_initializer
=
'zeros'
,
bias_regularizer
=
None
,
bias_regularizer
=
None
,
kernel_regularizer
=
None
,
kernel_regularizer
=
None
,
...
@@ -1192,7 +1081,7 @@ class CAM(tf.keras.layers.Layer):
...
@@ -1192,7 +1081,7 @@ class CAM(tf.keras.layers.Layer):
def
__init__
(
self
,
def
__init__
(
self
,
reduction_ratio
=
1.0
,
reduction_ratio
=
1.0
,
kernel_initializer
=
'
glorot_uniform
'
,
kernel_initializer
=
'
VarianceScaling
'
,
bias_initializer
=
'zeros'
,
bias_initializer
=
'zeros'
,
bias_regularizer
=
None
,
bias_regularizer
=
None
,
kernel_regularizer
=
None
,
kernel_regularizer
=
None
,
...
@@ -1285,7 +1174,7 @@ class CBAM(tf.keras.layers.Layer):
...
@@ -1285,7 +1174,7 @@ class CBAM(tf.keras.layers.Layer):
strides
=
(
1
,
1
),
strides
=
(
1
,
1
),
padding
=
'same'
,
padding
=
'same'
,
dilation_rate
=
(
1
,
1
),
dilation_rate
=
(
1
,
1
),
kernel_initializer
=
'
glorot_uniform
'
,
kernel_initializer
=
'
VarianceScaling
'
,
bias_initializer
=
'zeros'
,
bias_initializer
=
'zeros'
,
bias_regularizer
=
None
,
bias_regularizer
=
None
,
kernel_regularizer
=
None
,
kernel_regularizer
=
None
,
...
@@ -1354,27 +1243,26 @@ class DarkRouteProcess(tf.keras.layers.Layer):
...
@@ -1354,27 +1243,26 @@ class DarkRouteProcess(tf.keras.layers.Layer):
insert_spp = False)(x)
insert_spp = False)(x)
"""
"""
def
__init__
(
def
__init__
(
self
,
self
,
filters
=
2
,
filters
=
2
,
repetitions
=
2
,
repetitions
=
2
,
insert_spp
=
False
,
insert_spp
=
False
,
insert_sam
=
False
,
insert_sam
=
False
,
insert_cbam
=
False
,
insert_cbam
=
False
,
csp_stack
=
0
,
csp_stack
=
0
,
csp_scale
=
2
,
csp_scale
=
2
,
kernel_initializer
=
'VarianceScaling'
,
kernel_initializer
=
'glorot_uniform'
,
bias_initializer
=
'zeros'
,
bias_initializer
=
'zeros'
,
bias_regularizer
=
None
,
bias_regularizer
=
None
,
kernel_regularizer
=
None
,
kernel_regularizer
=
None
,
use_sync_bn
=
False
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
norm_epsilon
=
0.001
,
block_invert
=
False
,
block_invert
=
False
,
activation
=
'leaky'
,
activation
=
'leaky'
,
leaky_alpha
=
0.1
,
leaky_alpha
=
0.1
,
spp_keys
=
None
,
spp_keys
=
None
,
**
kwargs
):
**
kwargs
):
"""DarkRouteProcess initializer.
"""DarkRouteProcess initializer.
Args:
Args:
...
...
official/vision/beta/projects/yolo/modeling/layers/nn_blocks_test.py
View file @
c6d7d57d
...
@@ -106,86 +106,6 @@ class CSPRouteTest(tf.test.TestCase, parameterized.TestCase):
...
@@ -106,86 +106,6 @@ class CSPRouteTest(tf.test.TestCase, parameterized.TestCase):
self
.
assertNotIn
(
None
,
grad
)
self
.
assertNotIn
(
None
,
grad
)
class
CSPStackTest
(
tf
.
test
.
TestCase
,
parameterized
.
TestCase
):
def
build_layer
(
self
,
layer_type
,
filters
,
filter_scale
,
count
,
stack_type
,
downsample
):
if
stack_type
is
not
None
:
layers
=
[]
if
layer_type
==
'residual'
:
for
_
in
range
(
count
):
layers
.
append
(
nn_blocks
.
DarkResidual
(
filters
=
filters
//
filter_scale
,
filter_scale
=
filter_scale
))
else
:
for
_
in
range
(
count
):
layers
.
append
(
nn_blocks
.
ConvBN
(
filters
=
filters
))
if
stack_type
==
'model'
:
layers
=
tf
.
keras
.
Sequential
(
layers
=
layers
)
else
:
layers
=
None
stack
=
nn_blocks
.
CSPStack
(
filters
=
filters
,
filter_scale
=
filter_scale
,
downsample
=
downsample
,
model_to_wrap
=
layers
)
return
stack
@
parameterized
.
named_parameters
(
(
'no_stack'
,
224
,
224
,
64
,
2
,
'residual'
,
None
,
0
,
True
),
(
'residual_stack'
,
224
,
224
,
64
,
2
,
'residual'
,
'list'
,
2
,
True
),
(
'conv_stack'
,
224
,
224
,
64
,
2
,
'conv'
,
'list'
,
3
,
False
),
(
'callable_no_scale'
,
224
,
224
,
64
,
1
,
'residual'
,
'model'
,
5
,
False
))
def
test_pass_through
(
self
,
width
,
height
,
filters
,
mod
,
layer_type
,
stack_type
,
count
,
downsample
):
x
=
tf
.
keras
.
Input
(
shape
=
(
width
,
height
,
filters
))
test_layer
=
self
.
build_layer
(
layer_type
,
filters
,
mod
,
count
,
stack_type
,
downsample
)
outx
=
test_layer
(
x
)
print
(
outx
)
print
(
outx
.
shape
.
as_list
())
if
downsample
:
self
.
assertAllEqual
(
outx
.
shape
.
as_list
(),
[
None
,
width
//
2
,
height
//
2
,
filters
])
else
:
self
.
assertAllEqual
(
outx
.
shape
.
as_list
(),
[
None
,
width
,
height
,
filters
])
@
parameterized
.
named_parameters
(
(
'no_stack'
,
224
,
224
,
64
,
2
,
'residual'
,
None
,
0
,
True
),
(
'residual_stack'
,
224
,
224
,
64
,
2
,
'residual'
,
'list'
,
2
,
True
),
(
'conv_stack'
,
224
,
224
,
64
,
2
,
'conv'
,
'list'
,
3
,
False
),
(
'callable_no_scale'
,
224
,
224
,
64
,
1
,
'residual'
,
'model'
,
5
,
False
))
def
test_gradient_pass_though
(
self
,
width
,
height
,
filters
,
mod
,
layer_type
,
stack_type
,
count
,
downsample
):
loss
=
tf
.
keras
.
losses
.
MeanSquaredError
()
optimizer
=
tf
.
keras
.
optimizers
.
SGD
()
init
=
tf
.
random_normal_initializer
()
x
=
tf
.
Variable
(
initial_value
=
init
(
shape
=
(
1
,
width
,
height
,
filters
),
dtype
=
tf
.
float32
))
if
not
downsample
:
y
=
tf
.
Variable
(
initial_value
=
init
(
shape
=
(
1
,
width
,
height
,
filters
),
dtype
=
tf
.
float32
))
else
:
y
=
tf
.
Variable
(
initial_value
=
init
(
shape
=
(
1
,
width
//
2
,
height
//
2
,
filters
),
dtype
=
tf
.
float32
))
test_layer
=
self
.
build_layer
(
layer_type
,
filters
,
mod
,
count
,
stack_type
,
downsample
)
with
tf
.
GradientTape
()
as
tape
:
x_hat
=
test_layer
(
x
)
grad_loss
=
loss
(
x_hat
,
y
)
grad
=
tape
.
gradient
(
grad_loss
,
test_layer
.
trainable_variables
)
optimizer
.
apply_gradients
(
zip
(
grad
,
test_layer
.
trainable_variables
))
self
.
assertNotIn
(
None
,
grad
)
class
ConvBNTest
(
tf
.
test
.
TestCase
,
parameterized
.
TestCase
):
class
ConvBNTest
(
tf
.
test
.
TestCase
,
parameterized
.
TestCase
):
@
parameterized
.
named_parameters
(
@
parameterized
.
named_parameters
(
...
...
official/vision/beta/projects/yolo/modeling/yolo_model.py
View file @
c6d7d57d
...
@@ -17,7 +17,7 @@
...
@@ -17,7 +17,7 @@
import
tensorflow
as
tf
import
tensorflow
as
tf
#
S
tatic base Yolo Models that do not require configuration
#
s
tatic base Yolo Models that do not require configuration
# similar to a backbone model id.
# similar to a backbone model id.
# this is done greatly simplify the model config
# this is done greatly simplify the model config
...
@@ -85,26 +85,27 @@ class Yolo(tf.keras.Model):
...
@@ -85,26 +85,27 @@ class Yolo(tf.keras.Model):
"""Detection initialization function.
"""Detection initialization function.
Args:
Args:
backbone: `tf.keras.Model`
,
a backbone network.
backbone: `tf.keras.Model` a backbone network.
decoder: `tf.keras.Model`
,
a decoder network.
decoder: `tf.keras.Model` a decoder network.
head: `
Yolo
Head`, the
YOLO
head.
head: `
RetinaNet
Head`, the
RetinaNet
head.
detection_generator:
`tf.keras.Model`,
the detection generator.
detection_generator: the detection generator.
**kwargs: keyword arguments to be passed.
**kwargs: keyword arguments to be passed.
"""
"""
super
().
__init__
(
**
kwargs
)
super
(
Yolo
,
self
).
__init__
(
**
kwargs
)
self
.
_config_dict
=
{
self
.
_config_dict
=
{
"backbone"
:
backbone
,
"backbone"
:
backbone
,
"decoder"
:
decoder
,
"decoder"
:
decoder
,
"head"
:
head
,
"head"
:
head
,
"
detection_generato
r"
:
detection_generator
"
filte
r"
:
detection_generator
}
}
# model components
# model components
self
.
_backbone
=
backbone
self
.
_backbone
=
backbone
self
.
_decoder
=
decoder
self
.
_decoder
=
decoder
self
.
_head
=
head
self
.
_head
=
head
self
.
_detection_generator
=
detection_generator
self
.
_filter
=
detection_generator
return
def
call
(
self
,
inputs
,
training
=
False
):
def
call
(
self
,
inputs
,
training
=
False
):
maps
=
self
.
_backbone
(
inputs
)
maps
=
self
.
_backbone
(
inputs
)
...
@@ -114,7 +115,7 @@ class Yolo(tf.keras.Model):
...
@@ -114,7 +115,7 @@ class Yolo(tf.keras.Model):
return
{
"raw_output"
:
raw_predictions
}
return
{
"raw_output"
:
raw_predictions
}
else
:
else
:
# Post-processing.
# Post-processing.
predictions
=
self
.
_
detection_generato
r
(
raw_predictions
)
predictions
=
self
.
_
filte
r
(
raw_predictions
)
predictions
.
update
({
"raw_output"
:
raw_predictions
})
predictions
.
update
({
"raw_output"
:
raw_predictions
})
return
predictions
return
predictions
...
@@ -131,8 +132,8 @@ class Yolo(tf.keras.Model):
...
@@ -131,8 +132,8 @@ class Yolo(tf.keras.Model):
return
self
.
_head
return
self
.
_head
@
property
@
property
def
detection_generato
r
(
self
):
def
filte
r
(
self
):
return
self
.
_
detection_generato
r
return
self
.
_
filte
r
def
get_config
(
self
):
def
get_config
(
self
):
return
self
.
_config_dict
return
self
.
_config_dict
...
@@ -140,3 +141,29 @@ class Yolo(tf.keras.Model):
...
@@ -140,3 +141,29 @@ class Yolo(tf.keras.Model):
@
classmethod
@
classmethod
def
from_config
(
cls
,
config
):
def
from_config
(
cls
,
config
):
return
cls
(
**
config
)
return
cls
(
**
config
)
def
get_weight_groups
(
self
,
train_vars
):
"""Sort the list of trainable variables into groups for optimization.
Args:
train_vars: a list of tf.Variables that need to get sorted into their
respective groups.
Returns:
weights: a list of tf.Variables for the weights.
bias: a list of tf.Variables for the bias.
other: a list of tf.Variables for the other operations.
"""
bias
=
[]
weights
=
[]
other
=
[]
for
var
in
train_vars
:
if
"bias"
in
var
.
name
:
bias
.
append
(
var
)
elif
"beta"
in
var
.
name
:
bias
.
append
(
var
)
elif
"kernel"
in
var
.
name
or
"weight"
in
var
.
name
:
weights
.
append
(
var
)
else
:
other
.
append
(
var
)
return
weights
,
bias
,
other
official/vision/beta/projects/yolo/ops/box_ops.py
View file @
c6d7d57d
...
@@ -38,51 +38,26 @@ def yxyx_to_xcycwh(box: tf.Tensor):
...
@@ -38,51 +38,26 @@ def yxyx_to_xcycwh(box: tf.Tensor):
return
box
return
box
@
tf
.
custom_gradient
def
xcycwh_to_yxyx
(
box
:
tf
.
Tensor
):
def
_xcycwh_to_yxyx
(
box
:
tf
.
Tensor
,
scale
):
"""Private function to allow custom gradients with defaults."""
with
tf
.
name_scope
(
'xcycwh_to_yxyx'
):
xy
,
wh
=
tf
.
split
(
box
,
2
,
axis
=-
1
)
xy_min
=
xy
-
wh
/
2
xy_max
=
xy
+
wh
/
2
x_min
,
y_min
=
tf
.
split
(
xy_min
,
2
,
axis
=-
1
)
x_max
,
y_max
=
tf
.
split
(
xy_max
,
2
,
axis
=-
1
)
box
=
tf
.
concat
([
y_min
,
x_min
,
y_max
,
x_max
],
axis
=-
1
)
def
delta
(
dbox
):
# y_min = top, x_min = left, y_max = bottom, x_max = right
dt
,
dl
,
db
,
dr
=
tf
.
split
(
dbox
,
4
,
axis
=-
1
)
dx
=
dl
+
dr
dy
=
dt
+
db
dw
=
(
dr
-
dl
)
/
scale
dh
=
(
db
-
dt
)
/
scale
dbox
=
tf
.
concat
([
dx
,
dy
,
dw
,
dh
],
axis
=-
1
)
return
dbox
,
0.0
return
box
,
delta
def
xcycwh_to_yxyx
(
box
:
tf
.
Tensor
,
darknet
=
False
):
"""Converts boxes from x_center, y_center, width, height to yxyx format.
"""Converts boxes from x_center, y_center, width, height to yxyx format.
Args:
Args:
box: any `Tensor` whose last dimension is 4 representing the coordinates of
box: any `Tensor` whose last dimension is 4 representing the coordinates of
boxes in x_center, y_center, width, height.
boxes in x_center, y_center, width, height.
darknet: `bool`, if True a scale of 1.0 is used.
Returns:
Returns:
box: a `Tensor` whose shape is the same as `box` in new format.
box: a `Tensor` whose shape is the same as `box` in new format.
"""
"""
if
darknet
:
with
tf
.
name_scope
(
'xcycwh_to_yxyx'
):
scale
=
1.0
xy
,
wh
=
tf
.
split
(
box
,
2
,
axis
=-
1
)
else
:
xy_min
=
xy
-
wh
/
2
scale
=
2.0
xy_max
=
xy
+
wh
/
2
box
=
_xcycwh_to_yxyx
(
box
,
scale
)
x_min
,
y_min
=
tf
.
split
(
xy_min
,
2
,
axis
=-
1
)
x_max
,
y_max
=
tf
.
split
(
xy_max
,
2
,
axis
=-
1
)
box
=
tf
.
concat
([
y_min
,
x_min
,
y_max
,
x_max
],
axis
=-
1
)
return
box
return
box
# IOU
def
intersect_and_union
(
box1
,
box2
,
yxyx
=
False
):
def
intersect_and_union
(
box1
,
box2
,
yxyx
=
False
):
"""Calculates the intersection and union between box1 and box2.
"""Calculates the intersection and union between box1 and box2.
...
@@ -98,8 +73,9 @@ def intersect_and_union(box1, box2, yxyx=False):
...
@@ -98,8 +73,9 @@ def intersect_and_union(box1, box2, yxyx=False):
intersection: a `Tensor` who represents the intersection.
intersection: a `Tensor` who represents the intersection.
union: a `Tensor` who represents the union.
union: a `Tensor` who represents the union.
"""
"""
if
not
yxyx
:
if
not
yxyx
:
box1_area
=
tf
.
reduce_prod
(
tf
.
split
(
box1
,
2
,
axis
=-
1
)[
-
1
],
axis
=-
1
)
box2_area
=
tf
.
reduce_prod
(
tf
.
split
(
box2
,
2
,
axis
=-
1
)[
-
1
],
axis
=-
1
)
box1
=
xcycwh_to_yxyx
(
box1
)
box1
=
xcycwh_to_yxyx
(
box1
)
box2
=
xcycwh_to_yxyx
(
box2
)
box2
=
xcycwh_to_yxyx
(
box2
)
...
@@ -110,13 +86,14 @@ def intersect_and_union(box1, box2, yxyx=False):
...
@@ -110,13 +86,14 @@ def intersect_and_union(box1, box2, yxyx=False):
intersect_wh
=
tf
.
math
.
maximum
(
intersect_maxes
-
intersect_mins
,
0.0
)
intersect_wh
=
tf
.
math
.
maximum
(
intersect_maxes
-
intersect_mins
,
0.0
)
intersection
=
tf
.
reduce_prod
(
intersect_wh
,
axis
=-
1
)
intersection
=
tf
.
reduce_prod
(
intersect_wh
,
axis
=-
1
)
box1_area
=
tf
.
reduce_prod
(
b1ma
-
b1mi
,
axis
=-
1
)
if
yxyx
:
box2_area
=
tf
.
reduce_prod
(
b2ma
-
b2mi
,
axis
=-
1
)
box1_area
=
tf
.
reduce_prod
(
b1ma
-
b1mi
,
axis
=-
1
)
box2_area
=
tf
.
reduce_prod
(
b2ma
-
b2mi
,
axis
=-
1
)
union
=
box1_area
+
box2_area
-
intersection
union
=
box1_area
+
box2_area
-
intersection
return
intersection
,
union
return
intersection
,
union
def
smallest_encompassing_box
(
box1
,
box2
,
yxyx
=
False
):
def
smallest_encompassing_box
(
box1
,
box2
,
yxyx
=
False
,
clip
=
False
):
"""Calculates the smallest box that encompasses box1 and box2.
"""Calculates the smallest box that encompasses box1 and box2.
Args:
Args:
...
@@ -126,6 +103,7 @@ def smallest_encompassing_box(box1, box2, yxyx=False):
...
@@ -126,6 +103,7 @@ def smallest_encompassing_box(box1, box2, yxyx=False):
boxes.
boxes.
yxyx: a `bool` indicating whether the input box is of the format x_center
yxyx: a `bool` indicating whether the input box is of the format x_center
y_center, width, height or y_min, x_min, y_max, x_max.
y_center, width, height or y_min, x_min, y_max, x_max.
clip: a `bool`, whether or not to clip boxes.
Returns:
Returns:
box_c: a `Tensor` whose last dimension is 4 representing the coordinates of
box_c: a `Tensor` whose last dimension is 4 representing the coordinates of
...
@@ -141,15 +119,15 @@ def smallest_encompassing_box(box1, box2, yxyx=False):
...
@@ -141,15 +119,15 @@ def smallest_encompassing_box(box1, box2, yxyx=False):
bcmi
=
tf
.
math
.
minimum
(
b1mi
,
b2mi
)
bcmi
=
tf
.
math
.
minimum
(
b1mi
,
b2mi
)
bcma
=
tf
.
math
.
maximum
(
b1ma
,
b2ma
)
bcma
=
tf
.
math
.
maximum
(
b1ma
,
b2ma
)
bca
=
tf
.
reduce_prod
(
bcma
-
bcmi
,
keepdims
=
True
,
axis
=-
1
)
box_c
=
tf
.
concat
([
bcmi
,
bcma
],
axis
=-
1
)
box_c
=
tf
.
concat
([
bcmi
,
bcma
],
axis
=-
1
)
if
not
yxyx
:
if
not
yxyx
:
box_c
=
yxyx_to_xcycwh
(
box_c
)
box_c
=
yxyx_to_xcycwh
(
box_c
)
box_c
=
tf
.
where
(
bca
==
0.0
,
tf
.
zeros_like
(
box_c
),
box_c
)
if
clip
:
return
box_c
bca
=
tf
.
reduce_prod
(
bcma
-
bcmi
,
keepdims
=
True
,
axis
=-
1
)
box_c
=
tf
.
where
(
bca
<=
0.0
,
tf
.
zeros_like
(
box_c
),
box_c
)
return
bcmi
,
bcma
,
box_c
def
compute_iou
(
box1
,
box2
,
yxyx
=
False
):
def
compute_iou
(
box1
,
box2
,
yxyx
=
False
):
...
@@ -166,15 +144,13 @@ def compute_iou(box1, box2, yxyx=False):
...
@@ -166,15 +144,13 @@ def compute_iou(box1, box2, yxyx=False):
Returns:
Returns:
iou: a `Tensor` who represents the intersection over union.
iou: a `Tensor` who represents the intersection over union.
"""
"""
# get box corners
with
tf
.
name_scope
(
'iou'
):
with
tf
.
name_scope
(
'iou'
):
intersection
,
union
=
intersect_and_union
(
box1
,
box2
,
yxyx
=
yxyx
)
intersection
,
union
=
intersect_and_union
(
box1
,
box2
,
yxyx
=
yxyx
)
iou
=
math_ops
.
divide_no_nan
(
intersection
,
union
)
iou
=
math_ops
.
divide_no_nan
(
intersection
,
union
)
iou
=
math_ops
.
rm_nan_inf
(
iou
,
val
=
0.0
)
return
iou
return
iou
def
compute_giou
(
box1
,
box2
,
yxyx
=
False
,
darknet
=
False
):
def
compute_giou
(
box1
,
box2
,
yxyx
=
False
):
"""Calculates the General intersection over union between box1 and box2.
"""Calculates the General intersection over union between box1 and box2.
Args:
Args:
...
@@ -184,38 +160,30 @@ def compute_giou(box1, box2, yxyx=False, darknet=False):
...
@@ -184,38 +160,30 @@ def compute_giou(box1, box2, yxyx=False, darknet=False):
boxes.
boxes.
yxyx: a `bool` indicating whether the input box is of the format x_center
yxyx: a `bool` indicating whether the input box is of the format x_center
y_center, width, height or y_min, x_min, y_max, x_max.
y_center, width, height or y_min, x_min, y_max, x_max.
darknet: a `bool` indicating whether the calling function is the YOLO
darknet loss.
Returns:
Returns:
giou: a `Tensor` who represents the General intersection over union.
giou: a `Tensor` who represents the General intersection over union.
"""
"""
with
tf
.
name_scope
(
'giou'
):
with
tf
.
name_scope
(
'giou'
):
# get IOU
if
not
yxyx
:
if
not
yxyx
:
box1
=
xcycwh_to_yxyx
(
box1
,
darknet
=
darknet
)
yxyx1
=
xcycwh_to_yxyx
(
box1
)
box2
=
xcycwh_to_yxyx
(
box2
,
darknet
=
darknet
)
yxyx2
=
xcycwh_to_yxyx
(
box2
)
yxyx
=
True
else
:
yxyx1
,
yxyx2
=
box1
,
box2
intersection
,
union
=
intersect_and_union
(
box1
,
box2
,
yxyx
=
yxyx
)
cmi
,
cma
,
_
=
smallest_encompassing_box
(
yxyx1
,
yxyx2
,
yxyx
=
True
)
intersection
,
union
=
intersect_and_union
(
yxyx1
,
yxyx2
,
yxyx
=
True
)
iou
=
math_ops
.
divide_no_nan
(
intersection
,
union
)
iou
=
math_ops
.
divide_no_nan
(
intersection
,
union
)
iou
=
math_ops
.
rm_nan_inf
(
iou
,
val
=
0.0
)
# find the smallest box to encompase both box1 and box2
bcwh
=
cma
-
cmi
boxc
=
smallest_encompassing_box
(
box1
,
box2
,
yxyx
=
yxyx
)
c
=
tf
.
math
.
reduce_prod
(
bcwh
,
axis
=-
1
)
if
yxyx
:
boxc
=
yxyx_to_xcycwh
(
boxc
)
_
,
cwch
=
tf
.
split
(
boxc
,
2
,
axis
=-
1
)
c
=
tf
.
math
.
reduce_prod
(
cwch
,
axis
=-
1
)
# compute giou
regularization
=
math_ops
.
divide_no_nan
((
c
-
union
),
c
)
regularization
=
math_ops
.
divide_no_nan
((
c
-
union
),
c
)
giou
=
iou
-
regularization
giou
=
iou
-
regularization
giou
=
tf
.
clip_by_value
(
giou
,
clip_value_min
=-
1.0
,
clip_value_max
=
1.0
)
return
iou
,
giou
return
iou
,
giou
def
compute_diou
(
box1
,
box2
,
beta
=
1.0
,
yxyx
=
False
,
darknet
=
False
):
def
compute_diou
(
box1
,
box2
,
beta
=
1.0
,
yxyx
=
False
):
"""Calculates the distance intersection over union between box1 and box2.
"""Calculates the distance intersection over union between box1 and box2.
Args:
Args:
...
@@ -227,8 +195,6 @@ def compute_diou(box1, box2, beta=1.0, yxyx=False, darknet=False):
...
@@ -227,8 +195,6 @@ def compute_diou(box1, box2, beta=1.0, yxyx=False, darknet=False):
regularization term.
regularization term.
yxyx: a `bool` indicating whether the input box is of the format x_center
yxyx: a `bool` indicating whether the input box is of the format x_center
y_center, width, height or y_min, x_min, y_max, x_max.
y_center, width, height or y_min, x_min, y_max, x_max.
darknet: a `bool` indicating whether the calling function is the YOLO
darknet loss.
Returns:
Returns:
diou: a `Tensor` who represents the distance intersection over union.
diou: a `Tensor` who represents the distance intersection over union.
...
@@ -236,30 +202,27 @@ def compute_diou(box1, box2, beta=1.0, yxyx=False, darknet=False):
...
@@ -236,30 +202,27 @@ def compute_diou(box1, box2, beta=1.0, yxyx=False, darknet=False):
with
tf
.
name_scope
(
'diou'
):
with
tf
.
name_scope
(
'diou'
):
# compute center distance
# compute center distance
if
not
yxyx
:
if
not
yxyx
:
box1
=
xcycwh_to_yxyx
(
box1
,
darknet
=
darknet
)
xycc1
,
xycc2
=
box1
,
box2
box2
=
xcycwh_to_yxyx
(
box2
,
darknet
=
darknet
)
yxyx1
=
xcycwh_to_yxyx
(
box1
)
yxyx
=
True
yxyx2
=
xcycwh_to_yxyx
(
box2
)
else
:
intersection
,
union
=
intersect_and_union
(
box1
,
box2
,
yxyx
=
yxyx
)
yxyx1
,
yxyx2
=
box1
,
box2
boxc
=
smallest_encompassing_box
(
box1
,
box2
,
yxyx
=
yxyx
)
xycc1
=
yxyx_to_xcycwh
(
box1
)
xycc2
=
yxyx_to_xcycwh
(
box2
)
cmi
,
cma
,
_
=
smallest_encompassing_box
(
yxyx1
,
yxyx2
,
yxyx
=
True
)
intersection
,
union
=
intersect_and_union
(
yxyx1
,
yxyx2
,
yxyx
=
True
)
iou
=
math_ops
.
divide_no_nan
(
intersection
,
union
)
iou
=
math_ops
.
divide_no_nan
(
intersection
,
union
)
iou
=
math_ops
.
rm_nan_inf
(
iou
,
val
=
0.0
)
if
yxyx
:
boxc
=
yxyx_to_xcycwh
(
boxc
)
box1
=
yxyx_to_xcycwh
(
box1
)
box2
=
yxyx_to_xcycwh
(
box2
)
b1xy
,
_
=
tf
.
split
(
box
1
,
2
,
axis
=-
1
)
b1xy
,
_
=
tf
.
split
(
xycc
1
,
2
,
axis
=-
1
)
b2xy
,
_
=
tf
.
split
(
box
2
,
2
,
axis
=-
1
)
b2xy
,
_
=
tf
.
split
(
xycc
2
,
2
,
axis
=-
1
)
_
,
bcwh
=
tf
.
split
(
boxc
,
2
,
axis
=-
1
)
bcwh
=
cma
-
cmi
center_dist
=
tf
.
reduce_sum
((
b1xy
-
b2xy
)
**
2
,
axis
=-
1
)
center_dist
=
tf
.
reduce_sum
((
b1xy
-
b2xy
)
**
2
,
axis
=-
1
)
c_diag
=
tf
.
reduce_sum
(
bcwh
**
2
,
axis
=-
1
)
c_diag
=
tf
.
reduce_sum
(
bcwh
**
2
,
axis
=-
1
)
regularization
=
math_ops
.
divide_no_nan
(
center_dist
,
c_diag
)
regularization
=
math_ops
.
divide_no_nan
(
center_dist
,
c_diag
)
diou
=
iou
-
regularization
**
beta
diou
=
iou
-
regularization
**
beta
diou
=
tf
.
clip_by_value
(
diou
,
clip_value_min
=-
1.0
,
clip_value_max
=
1.0
)
return
iou
,
diou
return
iou
,
diou
...
@@ -280,33 +243,48 @@ def compute_ciou(box1, box2, yxyx=False, darknet=False):
...
@@ -280,33 +243,48 @@ def compute_ciou(box1, box2, yxyx=False, darknet=False):
ciou: a `Tensor` who represents the complete intersection over union.
ciou: a `Tensor` who represents the complete intersection over union.
"""
"""
with
tf
.
name_scope
(
'ciou'
):
with
tf
.
name_scope
(
'ciou'
):
# compute DIOU and IOU
if
not
yxyx
:
iou
,
diou
=
compute_diou
(
box1
,
box2
,
yxyx
=
yxyx
,
darknet
=
darknet
)
xycc1
,
xycc2
=
box1
,
box2
yxyx1
=
xcycwh_to_yxyx
(
box1
)
if
yxyx
:
yxyx2
=
xcycwh_to_yxyx
(
box2
)
box1
=
yxyx_to_xcycwh
(
box1
)
else
:
box2
=
yxyx_to_xcycwh
(
box2
)
yxyx1
,
yxyx2
=
box1
,
box2
xycc1
=
yxyx_to_xcycwh
(
box1
)
_
,
_
,
b1w
,
b1h
=
tf
.
split
(
box1
,
4
,
axis
=-
1
)
xycc2
=
yxyx_to_xcycwh
(
box2
)
_
,
_
,
b2w
,
b2h
=
tf
.
split
(
box1
,
4
,
axis
=-
1
)
# Build the smallest encomapssing box.
# computer aspect ratio consistency
cmi
,
cma
,
_
=
smallest_encompassing_box
(
yxyx1
,
yxyx2
,
yxyx
=
True
)
terma
=
tf
.
cast
(
math_ops
.
divide_no_nan
(
b1w
,
b1h
),
tf
.
float32
)
intersection
,
union
=
intersect_and_union
(
yxyx1
,
yxyx2
,
yxyx
=
True
)
termb
=
tf
.
cast
(
math_ops
.
divide_no_nan
(
b2w
,
b2h
),
tf
.
float32
)
iou
=
math_ops
.
divide_no_nan
(
intersection
,
union
)
arcterm
=
tf
.
square
(
tf
.
math
.
atan
(
terma
)
-
tf
.
math
.
atan
(
termb
))
v
=
tf
.
squeeze
(
4
*
arcterm
/
(
math
.
pi
**
2
),
axis
=-
1
)
b1xy
,
b1w
,
b1h
=
tf
.
split
(
xycc1
,
[
2
,
1
,
1
],
axis
=-
1
)
v
=
tf
.
cast
(
v
,
b1w
.
dtype
)
b2xy
,
b2w
,
b2h
=
tf
.
split
(
xycc2
,
[
2
,
1
,
1
],
axis
=-
1
)
bchw
=
cma
-
cmi
a
=
tf
.
stop_gradient
(
math_ops
.
divide_no_nan
(
v
,
((
1
-
iou
)
+
v
)))
ciou
=
diou
-
(
v
*
a
)
# Center regularization
ciou
=
tf
.
clip_by_value
(
ciou
,
clip_value_min
=-
1.0
,
clip_value_max
=
1.0
)
center_dist
=
tf
.
reduce_sum
((
b1xy
-
b2xy
)
**
2
,
axis
=-
1
)
c_diag
=
tf
.
reduce_sum
(
bchw
**
2
,
axis
=-
1
)
regularization
=
math_ops
.
divide_no_nan
(
center_dist
,
c_diag
)
# Computer aspect ratio consistency
terma
=
math_ops
.
divide_no_nan
(
b1w
,
b1h
)
# gt
termb
=
math_ops
.
divide_no_nan
(
b2w
,
b2h
)
# pred
arcterm
=
tf
.
squeeze
(
tf
.
math
.
pow
(
tf
.
math
.
atan
(
termb
)
-
tf
.
math
.
atan
(
terma
),
2
),
axis
=-
1
)
v
=
(
4
/
math
.
pi
**
2
)
*
arcterm
# Compute the aspect ratio weight, should be treated as a constant
a
=
tf
.
stop_gradient
(
math_ops
.
divide_no_nan
(
v
,
1
-
iou
+
v
))
if
darknet
:
grad_scale
=
tf
.
stop_gradient
(
tf
.
square
(
b2w
)
+
tf
.
square
(
b2h
))
v
*=
tf
.
squeeze
(
grad_scale
,
axis
=-
1
)
ciou
=
iou
-
regularization
-
(
v
*
a
)
return
iou
,
ciou
return
iou
,
ciou
def
aggregated_comparitive_iou
(
boxes1
,
def
aggregated_comparitive_iou
(
boxes1
,
boxes2
=
None
,
iou_type
=
0
,
beta
=
0.6
):
boxes2
=
None
,
iou_type
=
0
,
beta
=
0.6
):
"""Calculates the IOU between two set of boxes.
"""Calculates the IOU between two set of boxes.
Similar to bbox_overlap but far more versitile.
Similar to bbox_overlap but far more versitile.
...
@@ -333,11 +311,11 @@ def aggregated_comparitive_iou(boxes1,
...
@@ -333,11 +311,11 @@ def aggregated_comparitive_iou(boxes1,
else
:
else
:
boxes2
=
tf
.
transpose
(
boxes1
,
perm
=
(
0
,
2
,
1
,
3
))
boxes2
=
tf
.
transpose
(
boxes1
,
perm
=
(
0
,
2
,
1
,
3
))
if
iou_type
==
0
:
# diou
if
iou_type
==
0
or
iou_type
==
'diou'
:
# diou
_
,
iou
=
compute_diou
(
boxes1
,
boxes2
,
beta
=
beta
,
yxyx
=
True
)
_
,
iou
=
compute_diou
(
boxes1
,
boxes2
,
beta
=
beta
,
yxyx
=
True
)
elif
iou_type
==
1
:
# giou
elif
iou_type
==
1
or
iou_type
==
'giou'
:
# giou
_
,
iou
=
compute_giou
(
boxes1
,
boxes2
,
yxyx
=
True
)
_
,
iou
=
compute_giou
(
boxes1
,
boxes2
,
yxyx
=
True
)
elif
iou_type
==
2
:
# ciou
elif
iou_type
==
2
or
iou_type
==
'ciou'
:
# ciou
_
,
iou
=
compute_ciou
(
boxes1
,
boxes2
,
yxyx
=
True
)
_
,
iou
=
compute_ciou
(
boxes1
,
boxes2
,
yxyx
=
True
)
else
:
else
:
iou
=
compute_iou
(
boxes1
,
boxes2
,
yxyx
=
True
)
iou
=
compute_iou
(
boxes1
,
boxes2
,
yxyx
=
True
)
...
...
official/vision/beta/projects/yolo/ops/loss_utils.py
0 → 100755
View file @
c6d7d57d
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Yolo loss utility functions."""
import
numpy
as
np
import
tensorflow
as
tf
from
official.vision.beta.projects.yolo.ops
import
box_ops
from
official.vision.beta.projects.yolo.ops
import
math_ops
@
tf
.
custom_gradient
def
sigmoid_bce
(
y
,
x_prime
,
label_smoothing
):
"""Applies the Sigmoid Cross Entropy Loss.
Implements the same derivative as that found in the Darknet C library.
The derivative of this method is not the same as the standard binary cross
entropy with logits function.
The BCE with logits function equation is as follows:
x = 1 / (1 + exp(-x_prime))
bce = -ylog(x) - (1 - y)log(1 - x)
The standard BCE with logits function derivative is as follows:
dloss = -y/x + (1-y)/(1-x)
dsigmoid = x * (1 - x)
dx = dloss * dsigmoid
This derivative can be reduced simply to:
dx = (-y + x)
This simplification is used by the darknet library in order to improve
training stability. The gradient is almost the same
as tf.keras.losses.binary_crossentropy but varies slightly and
yields different performance.
Args:
y: `Tensor` holding ground truth data.
x_prime: `Tensor` holding the predictions prior to application of the
sigmoid operation.
label_smoothing: float value between 0.0 and 1.0 indicating the amount of
smoothing to apply to the data.
Returns:
bce: Tensor of the be applied loss values.
delta: callable function indicating the custom gradient for this operation.
"""
eps
=
1e-9
x
=
tf
.
math
.
sigmoid
(
x_prime
)
y
=
tf
.
stop_gradient
(
y
*
(
1
-
label_smoothing
)
+
0.5
*
label_smoothing
)
bce
=
-
y
*
tf
.
math
.
log
(
x
+
eps
)
-
(
1
-
y
)
*
tf
.
math
.
log
(
1
-
x
+
eps
)
def
delta
(
dpass
):
x
=
tf
.
math
.
sigmoid
(
x_prime
)
dx
=
(
-
y
+
x
)
*
dpass
dy
=
tf
.
zeros_like
(
y
)
return
dy
,
dx
,
0.0
return
bce
,
delta
def
apply_mask
(
mask
,
x
,
value
=
0
):
"""This function is used for gradient masking.
The YOLO loss function makes extensive use of dynamically shaped tensors.
To allow this use case on the TPU while preserving the gradient correctly
for back propagation we use this masking function to use a tf.where operation
to hard set masked location to have a gradient and a value of zero.
Args:
mask: A `Tensor` with the same shape as x used to select values of
importance.
x: A `Tensor` with the same shape as mask that will be getting masked.
value: `float` constant additive value.
Returns:
x: A masked `Tensor` with the same shape as x.
"""
mask
=
tf
.
cast
(
mask
,
tf
.
bool
)
masked
=
tf
.
where
(
mask
,
x
,
tf
.
zeros_like
(
x
)
+
value
)
return
masked
def
build_grid
(
indexes
,
truths
,
preds
,
ind_mask
,
update
=
False
,
grid
=
None
):
"""This function is used to broadcast elements into the output shape.
This function is used to broadcasts a list of truths into the correct index
in the output shape. This is used for the ground truth map construction in
the scaled loss and the classification map in the darknet loss.
Args:
indexes: A `Tensor` for the indexes
truths: A `Tensor` for the ground truth.
preds: A `Tensor` for the predictions.
ind_mask: A `Tensor` for the index masks.
update: A `bool` for updating the grid.
grid: A `Tensor` for the grid.
Returns:
grid: A `Tensor` representing the augmented grid.
"""
# this function is used to broadcast all the indexes to the correct
# into the correct ground truth mask, used for iou detection map
# in the scaled loss and the classification mask in the darknet loss
num_flatten
=
tf
.
shape
(
preds
)[
-
1
]
# is there a way to verify that we are not on the CPU?
ind_mask
=
tf
.
cast
(
ind_mask
,
indexes
.
dtype
)
# find all the batch indexes using the cumulated sum of a ones tensor
# cumsum(ones) - 1 yeild the zero indexed batches
bhep
=
tf
.
reduce_max
(
tf
.
ones_like
(
indexes
),
axis
=-
1
,
keepdims
=
True
)
bhep
=
tf
.
math
.
cumsum
(
bhep
,
axis
=
0
)
-
1
# concatnate the batch sizes to the indexes
indexes
=
tf
.
concat
([
bhep
,
indexes
],
axis
=-
1
)
indexes
=
apply_mask
(
tf
.
cast
(
ind_mask
,
indexes
.
dtype
),
indexes
)
indexes
=
(
indexes
+
(
ind_mask
-
1
))
# reshape the indexes into the correct shape for the loss,
# just flatten all indexes but the last
indexes
=
tf
.
reshape
(
indexes
,
[
-
1
,
4
])
# also flatten the ground truth value on all axis but the last
truths
=
tf
.
reshape
(
truths
,
[
-
1
,
num_flatten
])
# build a zero grid in the samve shape as the predicitons
if
grid
is
None
:
grid
=
tf
.
zeros_like
(
preds
)
# remove invalid values from the truths that may have
# come up from computation, invalid = nan and inf
truths
=
math_ops
.
rm_nan_inf
(
truths
)
# scatter update the zero grid
if
update
:
grid
=
tf
.
tensor_scatter_nd_update
(
grid
,
indexes
,
truths
)
else
:
grid
=
tf
.
tensor_scatter_nd_max
(
grid
,
indexes
,
truths
)
# stop gradient and return to avoid TPU errors and save compute
# resources
return
grid
class
GridGenerator
:
"""Grid generator that generates anchor grids for box decoding."""
def
__init__
(
self
,
anchors
,
masks
=
None
,
scale_anchors
=
None
):
"""Initialize Grid Generator.
Args:
anchors: A `List[List[int]]` for the anchor boxes that are used in the
model at all levels.
masks: A `List[int]` for the output level that this specific model output
Level.
scale_anchors: An `int` for how much to scale this level to get the
original input shape.
"""
self
.
dtype
=
tf
.
keras
.
backend
.
floatx
()
if
masks
is
not
None
:
self
.
_num
=
len
(
masks
)
else
:
self
.
_num
=
tf
.
shape
(
anchors
)[
0
]
if
masks
is
not
None
:
anchors
=
[
anchors
[
mask
]
for
mask
in
masks
]
self
.
_scale_anchors
=
scale_anchors
self
.
_anchors
=
tf
.
convert_to_tensor
(
anchors
)
return
def
_build_grid_points
(
self
,
lwidth
,
lheight
,
anchors
,
dtype
):
"""Generate a grid of fixed grid edges for box center decoding."""
with
tf
.
name_scope
(
'center_grid'
):
y
=
tf
.
range
(
0
,
lheight
)
x
=
tf
.
range
(
0
,
lwidth
)
num
=
tf
.
shape
(
anchors
)[
0
]
x_left
=
tf
.
tile
(
tf
.
transpose
(
tf
.
expand_dims
(
y
,
axis
=-
1
),
perm
=
[
1
,
0
]),
[
lwidth
,
1
])
y_left
=
tf
.
tile
(
tf
.
expand_dims
(
x
,
axis
=-
1
),
[
1
,
lheight
])
x_y
=
tf
.
stack
([
x_left
,
y_left
],
axis
=-
1
)
x_y
=
tf
.
cast
(
x_y
,
dtype
=
dtype
)
x_y
=
tf
.
expand_dims
(
tf
.
tile
(
tf
.
expand_dims
(
x_y
,
axis
=-
2
),
[
1
,
1
,
num
,
1
]),
axis
=
0
)
return
x_y
def
_build_anchor_grid
(
self
,
anchors
,
dtype
):
"""Get the transformed anchor boxes for each dimention."""
with
tf
.
name_scope
(
'anchor_grid'
):
num
=
tf
.
shape
(
anchors
)[
0
]
anchors
=
tf
.
cast
(
anchors
,
dtype
=
dtype
)
anchors
=
tf
.
reshape
(
anchors
,
[
1
,
1
,
1
,
num
,
2
])
return
anchors
def
_extend_batch
(
self
,
grid
,
batch_size
):
return
tf
.
tile
(
grid
,
[
batch_size
,
1
,
1
,
1
,
1
])
def
__call__
(
self
,
width
,
height
,
batch_size
,
dtype
=
None
):
if
dtype
is
None
:
self
.
dtype
=
tf
.
keras
.
backend
.
floatx
()
else
:
self
.
dtype
=
dtype
grid_points
=
self
.
_build_grid_points
(
width
,
height
,
self
.
_anchors
,
self
.
dtype
)
anchor_grid
=
self
.
_build_anchor_grid
(
tf
.
cast
(
self
.
_anchors
,
self
.
dtype
)
/
tf
.
cast
(
self
.
_scale_anchors
,
self
.
dtype
),
self
.
dtype
)
grid_points
=
self
.
_extend_batch
(
grid_points
,
batch_size
)
anchor_grid
=
self
.
_extend_batch
(
anchor_grid
,
batch_size
)
return
grid_points
,
anchor_grid
TILE_SIZE
=
50
class
PairWiseSearch
:
"""Apply a pairwise search between the ground truth and the labels.
The goal is to indicate the locations where the predictions overlap with
ground truth for dynamic ground truth associations.
"""
def
__init__
(
self
,
iou_type
=
'iou'
,
any_match
=
True
,
min_conf
=
0.0
,
track_boxes
=
False
,
track_classes
=
False
):
"""Initialization of Pair Wise Search.
Args:
iou_type: An `str` for the iou type to use.
any_match: A `bool` for any match(no class match).
min_conf: An `int` for minimum confidence threshold.
track_boxes: A `bool` dynamic box assignment.
track_classes: A `bool` dynamic class assignment.
"""
self
.
iou_type
=
iou_type
self
.
_any
=
any_match
self
.
_min_conf
=
min_conf
self
.
_track_boxes
=
track_boxes
self
.
_track_classes
=
track_classes
return
def
box_iou
(
self
,
true_box
,
pred_box
):
# based on the type of loss, compute the iou loss for a box
# compute_<name> indicated the type of iou to use
if
self
.
iou_type
==
'giou'
:
_
,
iou
=
box_ops
.
compute_giou
(
true_box
,
pred_box
)
elif
self
.
iou_type
==
'ciou'
:
_
,
iou
=
box_ops
.
compute_ciou
(
true_box
,
pred_box
)
else
:
iou
=
box_ops
.
compute_iou
(
true_box
,
pred_box
)
return
iou
def
_search_body
(
self
,
pred_box
,
pred_class
,
boxes
,
classes
,
running_boxes
,
running_classes
,
max_iou
,
idx
):
"""Main search fn."""
# capture the batch size to be used, and gather a slice of
# boxes from the ground truth. currently TILE_SIZE = 50, to
# save memory
batch_size
=
tf
.
shape
(
boxes
)[
0
]
box_slice
=
tf
.
slice
(
boxes
,
[
0
,
idx
*
TILE_SIZE
,
0
],
[
batch_size
,
TILE_SIZE
,
4
])
# match the dimentions of the slice to the model predictions
# shape: [batch_size, 1, 1, num, TILE_SIZE, 4]
box_slice
=
tf
.
expand_dims
(
box_slice
,
axis
=
1
)
box_slice
=
tf
.
expand_dims
(
box_slice
,
axis
=
1
)
box_slice
=
tf
.
expand_dims
(
box_slice
,
axis
=
1
)
box_grid
=
tf
.
expand_dims
(
pred_box
,
axis
=-
2
)
# capture the classes
class_slice
=
tf
.
slice
(
classes
,
[
0
,
idx
*
TILE_SIZE
],
[
batch_size
,
TILE_SIZE
])
class_slice
=
tf
.
expand_dims
(
class_slice
,
axis
=
1
)
class_slice
=
tf
.
expand_dims
(
class_slice
,
axis
=
1
)
class_slice
=
tf
.
expand_dims
(
class_slice
,
axis
=
1
)
iou
=
self
.
box_iou
(
box_slice
,
box_grid
)
if
self
.
_min_conf
>
0.0
:
if
not
self
.
_any
:
class_grid
=
tf
.
expand_dims
(
pred_class
,
axis
=-
2
)
class_mask
=
tf
.
one_hot
(
tf
.
cast
(
class_slice
,
tf
.
int32
),
depth
=
tf
.
shape
(
pred_class
)[
-
1
],
dtype
=
pred_class
.
dtype
)
class_mask
=
tf
.
reduce_any
(
tf
.
equal
(
class_mask
,
class_grid
),
axis
=-
1
)
else
:
class_mask
=
tf
.
reduce_max
(
pred_class
,
axis
=-
1
,
keepdims
=
True
)
class_mask
=
tf
.
cast
(
class_mask
,
iou
.
dtype
)
iou
*=
class_mask
max_iou_
=
tf
.
concat
([
max_iou
,
iou
],
axis
=-
1
)
max_iou
=
tf
.
reduce_max
(
max_iou_
,
axis
=-
1
,
keepdims
=
True
)
ind
=
tf
.
expand_dims
(
tf
.
argmax
(
max_iou_
,
axis
=-
1
),
axis
=-
1
)
if
self
.
_track_boxes
:
running_boxes
=
tf
.
expand_dims
(
running_boxes
,
axis
=-
2
)
box_slice
=
tf
.
zeros_like
(
running_boxes
)
+
box_slice
box_slice
=
tf
.
concat
([
running_boxes
,
box_slice
],
axis
=-
2
)
running_boxes
=
tf
.
gather_nd
(
box_slice
,
ind
,
batch_dims
=
4
)
if
self
.
_track_classes
:
running_classes
=
tf
.
expand_dims
(
running_classes
,
axis
=-
1
)
class_slice
=
tf
.
zeros_like
(
running_classes
)
+
class_slice
class_slice
=
tf
.
concat
([
running_classes
,
class_slice
],
axis
=-
1
)
running_classes
=
tf
.
gather_nd
(
class_slice
,
ind
,
batch_dims
=
4
)
return
(
pred_box
,
pred_class
,
boxes
,
classes
,
running_boxes
,
running_classes
,
max_iou
,
idx
+
1
)
def
__call__
(
self
,
pred_boxes
,
pred_classes
,
boxes
,
classes
,
scale
=
None
,
yxyx
=
True
,
clip_thresh
=
0.0
):
num_boxes
=
tf
.
shape
(
boxes
)[
-
2
]
num_tiles
=
(
num_boxes
//
TILE_SIZE
)
-
1
if
yxyx
:
boxes
=
box_ops
.
yxyx_to_xcycwh
(
boxes
)
if
scale
is
not
None
:
boxes
=
boxes
*
tf
.
stop_gradient
(
scale
)
if
self
.
_min_conf
>
0.0
:
pred_classes
=
tf
.
cast
(
pred_classes
>
self
.
_min_conf
,
pred_classes
.
dtype
)
def
_loop_cond
(
unused_pred_box
,
unused_pred_class
,
boxes
,
unused_classes
,
unused_running_boxes
,
unused_running_classes
,
unused_max_iou
,
idx
):
# check that the slice has boxes that all zeros
batch_size
=
tf
.
shape
(
boxes
)[
0
]
box_slice
=
tf
.
slice
(
boxes
,
[
0
,
idx
*
TILE_SIZE
,
0
],
[
batch_size
,
TILE_SIZE
,
4
])
return
tf
.
logical_and
(
idx
<
num_tiles
,
tf
.
math
.
greater
(
tf
.
reduce_sum
(
box_slice
),
0
))
running_boxes
=
tf
.
zeros_like
(
pred_boxes
)
running_classes
=
tf
.
zeros_like
(
tf
.
reduce_sum
(
running_boxes
,
axis
=-
1
))
max_iou
=
tf
.
zeros_like
(
tf
.
reduce_sum
(
running_boxes
,
axis
=-
1
))
max_iou
=
tf
.
expand_dims
(
max_iou
,
axis
=-
1
)
(
pred_boxes
,
pred_classes
,
boxes
,
classes
,
running_boxes
,
running_classes
,
max_iou
,
_
)
=
tf
.
while_loop
(
_loop_cond
,
self
.
_search_body
,
[
pred_boxes
,
pred_classes
,
boxes
,
classes
,
running_boxes
,
running_classes
,
max_iou
,
tf
.
constant
(
0
)
])
mask
=
tf
.
cast
(
max_iou
>
clip_thresh
,
running_boxes
.
dtype
)
running_boxes
*=
mask
running_classes
*=
tf
.
squeeze
(
mask
,
axis
=-
1
)
max_iou
*=
mask
max_iou
=
tf
.
squeeze
(
max_iou
,
axis
=-
1
)
mask
=
tf
.
squeeze
(
mask
,
axis
=-
1
)
return
(
tf
.
stop_gradient
(
running_boxes
),
tf
.
stop_gradient
(
running_classes
),
tf
.
stop_gradient
(
max_iou
),
tf
.
stop_gradient
(
mask
))
def
average_iou
(
iou
):
"""Computes the average intersection over union without counting locations.
where the iou is zero.
Args:
iou: A `Tensor` representing the iou values.
Returns:
tf.stop_gradient(avg_iou): A `Tensor` representing average
intersection over union.
"""
iou_sum
=
tf
.
reduce_sum
(
iou
,
axis
=
tf
.
range
(
1
,
tf
.
shape
(
tf
.
shape
(
iou
))[
0
]))
counts
=
tf
.
cast
(
tf
.
math
.
count_nonzero
(
iou
,
axis
=
tf
.
range
(
1
,
tf
.
shape
(
tf
.
shape
(
iou
))[
0
])),
iou
.
dtype
)
avg_iou
=
tf
.
reduce_mean
(
math_ops
.
divide_no_nan
(
iou_sum
,
counts
))
return
tf
.
stop_gradient
(
avg_iou
)
def
_scale_boxes
(
encoded_boxes
,
width
,
height
,
anchor_grid
,
grid_points
,
scale_xy
):
"""Decodes models boxes applying and exponential to width and height maps."""
# split the boxes
pred_xy
=
encoded_boxes
[...,
0
:
2
]
pred_wh
=
encoded_boxes
[...,
2
:
4
]
# build a scaling tensor to get the offset of th ebox relative to the image
scaler
=
tf
.
convert_to_tensor
([
height
,
width
,
height
,
width
])
scale_xy
=
tf
.
cast
(
scale_xy
,
encoded_boxes
.
dtype
)
# apply the sigmoid
pred_xy
=
tf
.
math
.
sigmoid
(
pred_xy
)
# scale the centers and find the offset of each box relative to
# their center pixel
pred_xy
=
pred_xy
*
scale_xy
-
0.5
*
(
scale_xy
-
1
)
# scale the offsets and add them to the grid points or a tensor that is
# the realtive location of each pixel
box_xy
=
grid_points
+
pred_xy
# scale the width and height of the predictions and corlate them
# to anchor boxes
box_wh
=
tf
.
math
.
exp
(
pred_wh
)
*
anchor_grid
# build the final predicted box
scaled_box
=
tf
.
concat
([
box_xy
,
box_wh
],
axis
=-
1
)
pred_box
=
scaled_box
/
scaler
# shift scaled boxes
scaled_box
=
tf
.
concat
([
pred_xy
,
box_wh
],
axis
=-
1
)
return
(
scaler
,
scaled_box
,
pred_box
)
@
tf
.
custom_gradient
def
_darknet_boxes
(
encoded_boxes
,
width
,
height
,
anchor_grid
,
grid_points
,
max_delta
,
scale_xy
):
"""Wrapper for _scale_boxes to implement a custom gradient."""
(
scaler
,
scaled_box
,
pred_box
)
=
_scale_boxes
(
encoded_boxes
,
width
,
height
,
anchor_grid
,
grid_points
,
scale_xy
)
def
delta
(
unused_dy_scaler
,
dy_scaled
,
dy
):
dy_xy
,
dy_wh
=
tf
.
split
(
dy
,
2
,
axis
=-
1
)
dy_xy_
,
dy_wh_
=
tf
.
split
(
dy_scaled
,
2
,
axis
=-
1
)
# add all the gradients that may have been applied to the
# boxes and those that have been applied to the width and height
dy_wh
+=
dy_wh_
dy_xy
+=
dy_xy_
# propagate the exponential applied to the width and height in
# order to ensure the gradient propagated is of the correct
# magnitude
pred_wh
=
encoded_boxes
[...,
2
:
4
]
dy_wh
*=
tf
.
math
.
exp
(
pred_wh
)
dbox
=
tf
.
concat
([
dy_xy
,
dy_wh
],
axis
=-
1
)
# apply the gradient clipping to xy and wh
dbox
=
math_ops
.
rm_nan_inf
(
dbox
)
delta
=
tf
.
cast
(
max_delta
,
dbox
.
dtype
)
dbox
=
tf
.
clip_by_value
(
dbox
,
-
delta
,
delta
)
return
dbox
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
return
(
scaler
,
scaled_box
,
pred_box
),
delta
def
_new_coord_scale_boxes
(
encoded_boxes
,
width
,
height
,
anchor_grid
,
grid_points
,
scale_xy
):
"""Decodes models boxes by squaring and scaling the width and height maps."""
# split the boxes
pred_xy
=
encoded_boxes
[...,
0
:
2
]
pred_wh
=
encoded_boxes
[...,
2
:
4
]
# build a scaling tensor to get the offset of th ebox relative to the image
scaler
=
tf
.
convert_to_tensor
([
height
,
width
,
height
,
width
])
scale_xy
=
tf
.
cast
(
scale_xy
,
pred_xy
.
dtype
)
# apply the sigmoid
pred_xy
=
tf
.
math
.
sigmoid
(
pred_xy
)
pred_wh
=
tf
.
math
.
sigmoid
(
pred_wh
)
# scale the xy offset predictions according to the config
pred_xy
=
pred_xy
*
scale_xy
-
0.5
*
(
scale_xy
-
1
)
# find the true offset from the grid points and the scaler
# where the grid points are the relative offset of each pixel with
# in the image
box_xy
=
grid_points
+
pred_xy
# decode the widht and height of the boxes and correlate them
# to the anchor boxes
box_wh
=
(
2
*
pred_wh
)
**
2
*
anchor_grid
# build the final boxes
scaled_box
=
tf
.
concat
([
box_xy
,
box_wh
],
axis
=-
1
)
pred_box
=
scaled_box
/
scaler
# shift scaled boxes
scaled_box
=
tf
.
concat
([
pred_xy
,
box_wh
],
axis
=-
1
)
return
(
scaler
,
scaled_box
,
pred_box
)
@
tf
.
custom_gradient
def
_darknet_new_coord_boxes
(
encoded_boxes
,
width
,
height
,
anchor_grid
,
grid_points
,
max_delta
,
scale_xy
):
"""Wrapper for _new_coord_scale_boxes to implement a custom gradient."""
(
scaler
,
scaled_box
,
pred_box
)
=
_new_coord_scale_boxes
(
encoded_boxes
,
width
,
height
,
anchor_grid
,
grid_points
,
scale_xy
)
def
delta
(
unused_dy_scaler
,
dy_scaled
,
dy
):
dy_xy
,
dy_wh
=
tf
.
split
(
dy
,
2
,
axis
=-
1
)
dy_xy_
,
dy_wh_
=
tf
.
split
(
dy_scaled
,
2
,
axis
=-
1
)
# add all the gradients that may have been applied to the
# boxes and those that have been applied to the width and height
dy_wh
+=
dy_wh_
dy_xy
+=
dy_xy_
dbox
=
tf
.
concat
([
dy_xy
,
dy_wh
],
axis
=-
1
)
# apply the gradient clipping to xy and wh
dbox
=
math_ops
.
rm_nan_inf
(
dbox
)
delta
=
tf
.
cast
(
max_delta
,
dbox
.
dtype
)
dbox
=
tf
.
clip_by_value
(
dbox
,
-
delta
,
delta
)
return
dbox
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
return
(
scaler
,
scaled_box
,
pred_box
),
delta
def
_anchor_free_scale_boxes
(
encoded_boxes
,
width
,
height
,
stride
,
grid_points
,
scale_xy
):
"""Decode models boxes using FPN stride under anchor free conditions."""
# split the boxes
pred_xy
=
encoded_boxes
[...,
0
:
2
]
pred_wh
=
encoded_boxes
[...,
2
:
4
]
# build a scaling tensor to get the offset of th ebox relative to the image
scaler
=
tf
.
convert_to_tensor
([
height
,
width
,
height
,
width
])
scale_xy
=
tf
.
cast
(
scale_xy
,
encoded_boxes
.
dtype
)
# scale the centers and find the offset of each box relative to
# their center pixel
pred_xy
=
pred_xy
*
scale_xy
-
0.5
*
(
scale_xy
-
1
)
# scale the offsets and add them to the grid points or a tensor that is
# the realtive location of each pixel
box_xy
=
(
grid_points
+
pred_xy
)
*
stride
# scale the width and height of the predictions and corlate them
# to anchor boxes
box_wh
=
tf
.
math
.
exp
(
pred_wh
)
*
stride
# build the final predicted box
scaled_box
=
tf
.
concat
([
box_xy
,
box_wh
],
axis
=-
1
)
pred_box
=
scaled_box
/
scaler
return
(
scaler
,
scaled_box
,
pred_box
)
def
get_predicted_box
(
width
,
height
,
encoded_boxes
,
anchor_grid
,
grid_points
,
scale_xy
,
stride
,
darknet
=
False
,
box_type
=
'original'
,
max_delta
=
np
.
inf
):
"""Decodes the predicted boxes from the model format to a usable format.
This function decodes the model outputs into the [x, y, w, h] format for
use in the loss function as well as for use within the detection generator.
Args:
width: A `float` scalar indicating the width of the prediction layer.
height: A `float` scalar indicating the height of the prediction layer
encoded_boxes: A `Tensor` of shape [..., height, width, 4] holding encoded
boxes.
anchor_grid: A `Tensor` of shape [..., 1, 1, 2] holding the anchor boxes
organized for box decoding, box width and height.
grid_points: A `Tensor` of shape [..., height, width, 2] holding the anchor
boxes for decoding the box centers.
scale_xy: A `float` scaler used to indicate the range for each center
outside of its given [..., i, j, 4] index, where i and j are indexing
pixels along the width and height of the predicted output map.
stride: An `int` defining the amount of down stride realtive to the input
image.
darknet: A `bool` used to select between custom gradient and default
autograd.
box_type: An `str` indicating the type of box encoding that is being used.
max_delta: A `float` scaler used for gradient clipping in back propagation.
Returns:
scaler: A `Tensor` of shape [4] returned to allow the scaling of the ground
truth boxes to be of the same magnitude as the decoded predicted boxes.
scaled_box: A `Tensor` of shape [..., height, width, 4] with the predicted
boxes.
pred_box: A `Tensor` of shape [..., height, width, 4] with the predicted
boxes divided by the scaler parameter used to put all boxes in the [0, 1]
range.
"""
if
box_type
==
'anchor_free'
:
(
scaler
,
scaled_box
,
pred_box
)
=
_anchor_free_scale_boxes
(
encoded_boxes
,
width
,
height
,
stride
,
grid_points
,
scale_xy
)
elif
darknet
:
# pylint:disable=unbalanced-tuple-unpacking
# if we are using the darknet loss we shoud nto propagate the
# decoding of the box
if
box_type
==
'scaled'
:
(
scaler
,
scaled_box
,
pred_box
)
=
_darknet_new_coord_boxes
(
encoded_boxes
,
width
,
height
,
anchor_grid
,
grid_points
,
max_delta
,
scale_xy
)
else
:
(
scaler
,
scaled_box
,
pred_box
)
=
_darknet_boxes
(
encoded_boxes
,
width
,
height
,
anchor_grid
,
grid_points
,
max_delta
,
scale_xy
)
else
:
# if we are using the scaled loss we should propagate the decoding of
# the boxes
if
box_type
==
'scaled'
:
(
scaler
,
scaled_box
,
pred_box
)
=
_new_coord_scale_boxes
(
encoded_boxes
,
width
,
height
,
anchor_grid
,
grid_points
,
scale_xy
)
else
:
(
scaler
,
scaled_box
,
pred_box
)
=
_scale_boxes
(
encoded_boxes
,
width
,
height
,
anchor_grid
,
grid_points
,
scale_xy
)
return
(
scaler
,
scaled_box
,
pred_box
)
official/vision/beta/projects/yolo/ops/math_ops.py
View file @
c6d7d57d
...
@@ -58,25 +58,4 @@ def divide_no_nan(a, b):
...
@@ -58,25 +58,4 @@ def divide_no_nan(a, b):
Returns:
Returns:
a `Tensor` representing a divided by b, with all nan values removed.
a `Tensor` representing a divided by b, with all nan values removed.
"""
"""
zero
=
tf
.
cast
(
0.0
,
b
.
dtype
)
return
a
/
(
b
+
1e-9
)
return
tf
.
where
(
b
==
zero
,
zero
,
a
/
b
)
def
mul_no_nan
(
x
,
y
):
"""Nan safe multiply operation.
Built to allow model compilation in tflite and
to allow one tensor to mask another. Where ever x is zero the
multiplication is not computed and the value is replaced with a zero. This is
required because 0 * nan = nan. This can make computation unstable in some
cases where the intended behavior is for zero to mean ignore.
Args:
x: any `Tensor` of any type.
y: any `Tensor` of any type with the same shape as tensor x.
Returns:
a `Tensor` representing x times y, where x is used to safely mask the
tensor y.
"""
return
tf
.
where
(
x
==
0
,
tf
.
cast
(
0
,
x
.
dtype
),
x
*
y
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment