Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
6a4c0889
Commit
6a4c0889
authored
Sep 13, 2021
by
A. Unique TensorFlower
Browse files
Merge pull request #10251 from PurdueDualityLab:loss_fn_pr
PiperOrigin-RevId: 396512110
parents
d983b7d7
7f90664e
Changes
14
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
1790 additions
and
457 deletions
+1790
-457
official/vision/beta/projects/yolo/losses/__init__.py
official/vision/beta/projects/yolo/losses/__init__.py
+14
-0
official/vision/beta/projects/yolo/losses/yolo_loss.py
official/vision/beta/projects/yolo/losses/yolo_loss.py
+714
-0
official/vision/beta/projects/yolo/losses/yolo_loss_test.py
official/vision/beta/projects/yolo/losses/yolo_loss_test.py
+98
-0
official/vision/beta/projects/yolo/modeling/backbones/darknet.py
...l/vision/beta/projects/yolo/modeling/backbones/darknet.py
+12
-10
official/vision/beta/projects/yolo/modeling/decoders/yolo_decoder.py
...sion/beta/projects/yolo/modeling/decoders/yolo_decoder.py
+6
-7
official/vision/beta/projects/yolo/modeling/heads/yolo_head.py
...ial/vision/beta/projects/yolo/modeling/heads/yolo_head.py
+28
-3
official/vision/beta/projects/yolo/modeling/layers/detection_generator.py
...beta/projects/yolo/modeling/layers/detection_generator.py
+114
-68
official/vision/beta/projects/yolo/modeling/layers/detection_generator_test.py
...projects/yolo/modeling/layers/detection_generator_test.py
+4
-1
official/vision/beta/projects/yolo/modeling/layers/nn_blocks.py
...al/vision/beta/projects/yolo/modeling/layers/nn_blocks.py
+37
-149
official/vision/beta/projects/yolo/modeling/layers/nn_blocks_test.py
...sion/beta/projects/yolo/modeling/layers/nn_blocks_test.py
+0
-80
official/vision/beta/projects/yolo/modeling/yolo_model.py
official/vision/beta/projects/yolo/modeling/yolo_model.py
+38
-11
official/vision/beta/projects/yolo/ops/box_ops.py
official/vision/beta/projects/yolo/ops/box_ops.py
+84
-106
official/vision/beta/projects/yolo/ops/loss_utils.py
official/vision/beta/projects/yolo/ops/loss_utils.py
+640
-0
official/vision/beta/projects/yolo/ops/math_ops.py
official/vision/beta/projects/yolo/ops/math_ops.py
+1
-22
No files found.
official/vision/beta/projects/yolo/losses/__init__.py
0 → 100644
View file @
6a4c0889
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
official/vision/beta/projects/yolo/losses/yolo_loss.py
0 → 100755
View file @
6a4c0889
This diff is collapsed.
Click to expand it.
official/vision/beta/projects/yolo/losses/yolo_loss_test.py
0 → 100755
View file @
6a4c0889
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for yolo heads."""
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
official.vision.beta.projects.yolo.losses
import
yolo_loss
class
YoloDecoderTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
True
),
(
False
),
)
def
test_loss_init
(
self
,
scaled
):
"""Test creation of YOLO family models."""
def
inpdict
(
input_shape
,
dtype
=
tf
.
float32
):
inputs
=
{}
for
key
in
input_shape
:
inputs
[
key
]
=
tf
.
ones
(
input_shape
[
key
],
dtype
=
dtype
)
return
inputs
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
input_shape
=
{
'3'
:
[
1
,
52
,
52
,
255
],
'4'
:
[
1
,
26
,
26
,
255
],
'5'
:
[
1
,
13
,
13
,
255
]
}
classes
=
80
masks
=
{
'3'
:
[
0
,
1
,
2
],
'4'
:
[
3
,
4
,
5
],
'5'
:
[
6
,
7
,
8
]}
anchors
=
[[
12.0
,
19.0
],
[
31.0
,
46.0
],
[
96.0
,
54.0
],
[
46.0
,
114.0
],
[
133.0
,
127.0
],
[
79.0
,
225.0
],
[
301.0
,
150.0
],
[
172.0
,
286.0
],
[
348.0
,
340.0
]]
keys
=
[
'3'
,
'4'
,
'5'
]
path_strides
=
{
key
:
2
**
int
(
key
)
for
key
in
keys
}
loss
=
yolo_loss
.
YoloLoss
(
keys
,
classes
,
anchors
,
masks
=
masks
,
path_strides
=
path_strides
,
truth_thresholds
=
{
key
:
1.0
for
key
in
keys
},
ignore_thresholds
=
{
key
:
0.7
for
key
in
keys
},
loss_types
=
{
key
:
'ciou'
for
key
in
keys
},
iou_normalizers
=
{
key
:
0.05
for
key
in
keys
},
cls_normalizers
=
{
key
:
0.5
for
key
in
keys
},
obj_normalizers
=
{
key
:
1.0
for
key
in
keys
},
objectness_smooths
=
{
key
:
1.0
for
key
in
keys
},
box_types
=
{
key
:
'scaled'
for
key
in
keys
},
scale_xys
=
{
key
:
2.0
for
key
in
keys
},
max_deltas
=
{
key
:
30.0
for
key
in
keys
},
label_smoothing
=
0.0
,
use_scaled_loss
=
scaled
,
update_on_repeat
=
True
)
count
=
inpdict
({
'3'
:
[
1
,
52
,
52
,
3
,
1
],
'4'
:
[
1
,
26
,
26
,
3
,
1
],
'5'
:
[
1
,
13
,
13
,
3
,
1
]
})
ind
=
inpdict
({
'3'
:
[
1
,
300
,
3
],
'4'
:
[
1
,
300
,
3
],
'5'
:
[
1
,
300
,
3
]
},
tf
.
int32
)
truths
=
inpdict
({
'3'
:
[
1
,
300
,
8
],
'4'
:
[
1
,
300
,
8
],
'5'
:
[
1
,
300
,
8
]})
boxes
=
tf
.
ones
([
1
,
300
,
4
],
dtype
=
tf
.
float32
)
classes
=
tf
.
ones
([
1
,
300
],
dtype
=
tf
.
float32
)
gt
=
{
'true_conf'
:
count
,
'inds'
:
ind
,
'upds'
:
truths
,
'bbox'
:
boxes
,
'classes'
:
classes
}
_
,
_
,
_
=
loss
(
gt
,
inpdict
(
input_shape
))
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/beta/projects/yolo/modeling/backbones/darknet.py
View file @
6a4c0889
...
...
@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Contains definitions of Darknet Backbone Networks.
The models are inspired by ResNet and CSPNet.
...
...
@@ -390,7 +389,7 @@ class Darknet(tf.keras.Model):
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
dilate
=
False
,
kernel_initializer
=
'
glorot_uniform
'
,
kernel_initializer
=
'
VarianceScaling
'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
**
kwargs
):
...
...
@@ -507,10 +506,12 @@ class Darknet(tf.keras.Model):
self
.
_default_dict
[
'name'
]
=
f
'
{
name
}
_csp_down'
if
self
.
_dilate
:
self
.
_default_dict
[
'dilation_rate'
]
=
config
.
dilation_rate
degrid
=
int
(
tf
.
math
.
log
(
float
(
config
.
dilation_rate
))
/
tf
.
math
.
log
(
2.
))
else
:
self
.
_default_dict
[
'dilation_rate'
]
=
1
degrid
=
0
# swap/add dilation
# swap/add di
a
lation
x
,
x_route
=
nn_blocks
.
CSPRoute
(
filters
=
config
.
filters
,
filter_scale
=
csp_filter_scale
,
...
...
@@ -518,7 +519,7 @@ class Darknet(tf.keras.Model):
**
self
.
_default_dict
)(
inputs
)
dilated_reps
=
config
.
repetitions
-
self
.
_default_dict
[
'dilation_rate'
]
//
2
dilated_reps
=
config
.
repetitions
-
degrid
for
i
in
range
(
dilated_reps
):
self
.
_default_dict
[
'name'
]
=
f
'
{
name
}
_
{
i
}
'
x
=
nn_blocks
.
DarkResidual
(
...
...
@@ -528,8 +529,8 @@ class Darknet(tf.keras.Model):
x
)
for
i
in
range
(
dilated_reps
,
config
.
repetitions
):
self
.
_default_dict
[
'dilation_rate'
]
=
self
.
_default_dict
[
'dilation_rate'
]
//
2
self
.
_default_dict
[
'dilation_rate'
]
=
max
(
1
,
self
.
_default_dict
[
'dilation_rate'
]
//
2
)
self
.
_default_dict
[
'name'
]
=
f
"
{
name
}
_
{
i
}
_degridded_
{
self
.
_default_dict
[
'dilation_rate'
]
}
"
x
=
nn_blocks
.
DarkResidual
(
...
...
@@ -592,8 +593,8 @@ class Darknet(tf.keras.Model):
filters
=
config
.
filters
,
downsample
=
True
,
**
self
.
_default_dict
)(
inputs
)
dilated_reps
=
config
.
repetitions
-
(
self
.
_default_dict
[
'dilation_rate'
]
//
2
)
-
1
dilated_reps
=
config
.
repetitions
-
self
.
_default_dict
[
'dilation_rate'
]
//
2
-
1
for
i
in
range
(
dilated_reps
):
self
.
_default_dict
[
'name'
]
=
f
'
{
name
}
_
{
i
}
'
x
=
nn_blocks
.
DarkResidual
(
...
...
@@ -661,12 +662,13 @@ class Darknet(tf.keras.Model):
@
factory
.
register_backbone_builder
(
'darknet'
)
def
build_darknet
(
input_specs
:
tf
.
keras
.
layers
.
InputSpec
,
backbone_c
onfi
g
:
hyperparams
.
Config
,
backbone_c
f
g
:
hyperparams
.
Config
,
norm_activation_config
:
hyperparams
.
Config
,
l2_regularizer
:
tf
.
keras
.
regularizers
.
Regularizer
=
None
)
->
tf
.
keras
.
Model
:
"""Builds darknet."""
backbone_cfg
=
backbone_config
.
get
()
backbone_cfg
=
backbone_cfg
.
get
()
model
=
Darknet
(
model_id
=
backbone_cfg
.
model_id
,
min_level
=
backbone_cfg
.
min_level
,
...
...
official/vision/beta/projects/yolo/modeling/decoders/yolo_decoder.py
View file @
6a4c0889
...
...
@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Feature Pyramid Network and Path Aggregation variants used in YOLO."""
import
tensorflow
as
tf
...
...
@@ -39,7 +38,7 @@ class YoloFPN(tf.keras.layers.Layer):
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
kernel_initializer
=
'
glorot_uniform
'
,
kernel_initializer
=
'
VarianceScaling
'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
**
kwargs
):
...
...
@@ -184,7 +183,7 @@ class YoloPAN(tf.keras.layers.Layer):
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
kernel_initializer
=
'
glorot_uniform
'
,
kernel_initializer
=
'
VarianceScaling
'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
fpn_input
=
True
,
...
...
@@ -206,7 +205,7 @@ class YoloPAN(tf.keras.layers.Layer):
by zero.
kernel_initializer: kernel_initializer for convolutional layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2
d
.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2
D
.
fpn_input: `bool`, for whether the input into this fucntion is an FPN or
a backbone.
fpn_filter_scale: `int`, scaling factor for the FPN filters.
...
...
@@ -374,7 +373,7 @@ class YoloDecoder(tf.keras.Model):
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
kernel_initializer
=
'
glorot_uniform
'
,
kernel_initializer
=
'
VarianceScaling
'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
**
kwargs
):
...
...
@@ -389,8 +388,8 @@ class YoloDecoder(tf.keras.Model):
use_fpn: `bool`, use the FPN found in the YoloV4 model.
use_spatial_attention: `bool`, use the spatial attention module.
csp_stack: `bool`, CSPize the FPN.
fpn_depth: `int`, number of layers ot use in each FPN path
if you choose
to use an FPN.
fpn_depth: `int`, number of layers ot use in each FPN path
if you choose
to use an FPN.
fpn_filter_scale: `int`, scaling factor for the FPN filters.
path_process_len: `int`, number of layers ot use in each Decoder path.
max_level_process_len: `int`, number of layers ot use in the largest
...
...
official/vision/beta/projects/yolo/modeling/heads/yolo_head.py
View file @
6a4c0889
...
...
@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Yolo heads."""
import
tensorflow
as
tf
...
...
@@ -30,10 +29,11 @@ class YoloHead(tf.keras.layers.Layer):
output_extras
=
0
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
kernel_initializer
=
'
glorot_uniform
'
,
kernel_initializer
=
'
VarianceScaling
'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
activation
=
None
,
smart_bias
=
False
,
**
kwargs
):
"""Yolo Prediction Head initialization function.
...
...
@@ -52,6 +52,7 @@ class YoloHead(tf.keras.layers.Layer):
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
activation: `str`, the activation function to use typically leaky or mish.
smart_bias: `bool` whether or not use smart bias.
**kwargs: keyword arguments to be passed.
"""
...
...
@@ -68,6 +69,7 @@ class YoloHead(tf.keras.layers.Layer):
self
.
_output_extras
=
output_extras
self
.
_output_conv
=
(
classes
+
output_extras
+
5
)
*
boxes_per_level
self
.
_smart_bias
=
smart_bias
self
.
_base_config
=
dict
(
activation
=
activation
,
...
...
@@ -85,10 +87,29 @@ class YoloHead(tf.keras.layers.Layer):
use_bn
=
False
,
**
self
.
_base_config
)
def
bias_init
(
self
,
scale
,
inshape
,
isize
=
640
,
no_per_conf
=
8
):
def
bias
(
shape
,
dtype
):
init
=
tf
.
keras
.
initializers
.
Zeros
()
base
=
init
(
shape
,
dtype
=
dtype
)
if
self
.
_smart_bias
:
base
=
tf
.
reshape
(
base
,
[
self
.
_boxes_per_level
,
-
1
])
box
,
conf
,
classes
=
tf
.
split
(
base
,
[
4
,
1
,
-
1
],
axis
=-
1
)
conf
+=
tf
.
math
.
log
(
no_per_conf
/
((
isize
/
scale
)
**
2
))
classes
+=
tf
.
math
.
log
(
0.6
/
(
self
.
_classes
-
0.99
))
base
=
tf
.
concat
([
box
,
conf
,
classes
],
axis
=-
1
)
base
=
tf
.
reshape
(
base
,
[
-
1
])
return
base
return
bias
def
build
(
self
,
input_shape
):
self
.
_head
=
dict
()
for
key
in
self
.
_key_list
:
self
.
_head
[
key
]
=
nn_blocks
.
ConvBN
(
**
self
.
_conv_config
)
scale
=
2
**
int
(
key
)
self
.
_head
[
key
]
=
nn_blocks
.
ConvBN
(
bias_initializer
=
self
.
bias_init
(
scale
,
input_shape
[
key
][
-
1
]),
**
self
.
_conv_config
)
def
call
(
self
,
inputs
):
outputs
=
dict
()
...
...
@@ -107,6 +128,10 @@ class YoloHead(tf.keras.layers.Layer):
'Model has to be built before number of boxes can be determined.'
)
return
(
self
.
_max_level
-
self
.
_min_level
+
1
)
*
self
.
_boxes_per_level
@
property
def
num_heads
(
self
):
return
self
.
_max_level
-
self
.
_min_level
+
1
def
get_config
(
self
):
config
=
dict
(
min_level
=
self
.
_min_level
,
...
...
official/vision/beta/projects/yolo/modeling/layers/detection_generator.py
View file @
6a4c0889
...
...
@@ -15,7 +15,10 @@
"""Contains common building blocks for yolo layer (detection layer)."""
import
tensorflow
as
tf
from
official.vision.beta.modeling.layers
import
detection_generator
from
official.vision.beta.projects.yolo.losses
import
yolo_loss
from
official.vision.beta.projects.yolo.ops
import
box_ops
from
official.vision.beta.projects.yolo.ops
import
loss_utils
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'yolo'
)
...
...
@@ -36,11 +39,11 @@ class YoloLayer(tf.keras.Model):
cls_normalizer
=
1.0
,
obj_normalizer
=
1.0
,
use_scaled_loss
=
False
,
darknet
=
Non
e
,
update_on_repeat
=
Fals
e
,
pre_nms_points
=
5000
,
label_smoothing
=
0.0
,
max_boxes
=
200
,
new_cords
=
False
,
box_type
=
'original'
,
path_scale
=
None
,
scale_xy
=
None
,
nms_type
=
'greedy'
,
...
...
@@ -70,14 +73,25 @@ class YoloLayer(tf.keras.Model):
obj_normalizer: `float` for how much to scale loss on the detection map.
use_scaled_loss: `bool` for whether to use the scaled loss
or the traditional loss.
darknet: `bool` for whether to use the DarkNet or PyTorch loss function
implementation.
update_on_repeat: `bool` indicating how you would like to handle repeated
indexes in a given [j, i] index. Setting this to True will give more
consistent MAP, setting it to falls will improve recall by 1-2% but will
sacrifice some MAP.
pre_nms_points: `int` number of top candidate detections per class before
NMS.
label_smoothing: `float` for how much to smooth the loss on the classes.
max_boxes: `int` for the maximum number of boxes retained over all
classes.
new_cords: `bool` for using the ScaledYOLOv4 coordinates.
box_type: `str`, there are 3 different box types that will affect training
differently {original, scaled and anchor_free}. The original method
decodes the boxes by applying an exponential to the model width and
height maps, then scaling the maps by the anchor boxes. This method is
used in Yolo-v4, Yolo-v3, and all its counterparts. The Scale method
squares the width and height and scales both by a fixed factor of 4.
This method is used in the Scale Yolo models, as well as Yolov4-CSP.
Finally, anchor_free is like the original method but will not apply an
activation function to the boxes, this is used for some of the newer
anchor free versions of YOLO.
path_scale: `dict` for the size of the input tensors. Defaults to
precalulated values from the `mask`.
scale_xy: dictionary `float` values inidcating how far each pixel can see
...
...
@@ -91,18 +105,6 @@ class YoloLayer(tf.keras.Model):
objectness_smooth: `float` for how much to smooth the loss on the
detection map.
**kwargs: Addtional keyword arguments.
Return:
loss: `float` for the actual loss.
box_loss: `float` loss on the boxes used for metrics.
conf_loss: `float` loss on the confidence used for metrics.
class_loss: `float` loss on the classes used for metrics.
avg_iou: `float` metric for the average iou between predictions
and ground truth.
avg_obj: `float` metric for the average confidence of the model
for predictions.
recall50: `float` metric for how accurate the model is.
precision50: `float` metric for how precise the model is.
"""
super
().
__init__
(
**
kwargs
)
self
.
_masks
=
masks
...
...
@@ -121,29 +123,18 @@ class YoloLayer(tf.keras.Model):
self
.
_loss_type
=
loss_type
self
.
_use_scaled_loss
=
use_scaled_loss
self
.
_
darknet
=
darkne
t
self
.
_
update_on_repeat
=
update_on_repea
t
self
.
_pre_nms_points
=
pre_nms_points
self
.
_label_smoothing
=
label_smoothing
self
.
_keys
=
list
(
masks
.
keys
())
self
.
_len_keys
=
len
(
self
.
_keys
)
self
.
_
new_cords
=
new_cords
self
.
_
box_type
=
box_type
self
.
_path_scale
=
path_scale
or
{
key
:
2
**
int
(
key
)
for
key
,
_
in
masks
.
items
()
}
self
.
_nms_types
=
{
'greedy'
:
1
,
'iou'
:
2
,
'giou'
:
3
,
'ciou'
:
4
,
'diou'
:
5
,
'class_independent'
:
6
,
'weighted_diou'
:
7
}
self
.
_nms_type
=
self
.
_nms_types
[
nms_type
]
self
.
_nms_type
=
nms_type
self
.
_scale_xy
=
scale_xy
or
{
key
:
1.0
for
key
,
_
in
masks
.
items
()}
self
.
_generator
=
{}
...
...
@@ -156,27 +147,33 @@ class YoloLayer(tf.keras.Model):
return
def
get_generators
(
self
,
anchors
,
path_scale
,
path_key
):
return
None
def
rm_nan_inf
(
self
,
x
,
val
=
0.0
):
x
=
tf
.
where
(
tf
.
math
.
is_nan
(
x
),
tf
.
cast
(
val
,
dtype
=
x
.
dtype
),
x
)
x
=
tf
.
where
(
tf
.
math
.
is_inf
(
x
),
tf
.
cast
(
val
,
dtype
=
x
.
dtype
),
x
)
return
x
anchor_generator
=
loss_utils
.
GridGenerator
(
anchors
,
scale_anchors
=
path_scale
)
return
anchor_generator
def
parse_prediction_path
(
self
,
key
,
inputs
):
shape_
=
tf
.
shape
(
inputs
)
shape
=
inputs
.
get_shape
().
as_list
()
height
,
width
=
shape
[
1
],
shape
[
2
]
batchsize
,
height
,
width
=
shape_
[
0
],
shape
[
1
],
shape
[
2
]
if
height
is
None
or
width
is
None
:
height
,
width
=
shape_
[
1
],
shape_
[
2
]
generator
=
self
.
_generator
[
key
]
len_mask
=
self
.
_len_mask
[
key
]
scale_xy
=
self
.
_scale_xy
[
key
]
# reshape the yolo output to (batchsize,
# width,
# height,
# number_anchors,
# remaining_points)
data
=
tf
.
reshape
(
inputs
,
[
-
1
,
height
,
width
,
len_mask
,
self
.
_classes
+
5
])
# use the grid generator to get the formatted anchor boxes and grid points
# in shape [1, height, width, 2]
centers
,
anchors
=
generator
(
height
,
width
,
batchsize
,
dtype
=
data
.
dtype
)
# split the yolo detections into boxes, object score map, classes
boxes
,
obns_scores
,
class_scores
=
tf
.
split
(
data
,
[
4
,
1
,
self
.
_classes
],
axis
=-
1
)
...
...
@@ -184,25 +181,32 @@ class YoloLayer(tf.keras.Model):
# determine the number of classes
classes
=
class_scores
.
get_shape
().
as_list
()[
-
1
]
# configurable to use the new coordinates in scaled Yolo v4 or not
_
,
_
,
boxes
=
loss_utils
.
get_predicted_box
(
tf
.
cast
(
height
,
data
.
dtype
),
tf
.
cast
(
width
,
data
.
dtype
),
boxes
,
anchors
,
centers
,
scale_xy
,
stride
=
self
.
_path_scale
[
key
],
darknet
=
False
,
box_type
=
self
.
_box_type
[
key
])
# convert boxes from yolo(x, y, w. h) to tensorflow(ymin, xmin, ymax, xmax)
boxes
=
box_ops
.
xcycwh_to_yxyx
(
boxes
)
# activate and detection map
obns_scores
=
tf
.
math
.
sigmoid
(
obns_scores
)
# threshold the detection map
obns_mask
=
tf
.
cast
(
obns_scores
>
self
.
_thresh
,
obns_scores
.
dtype
)
# convert detection map to class detection probabailities
class_scores
=
tf
.
math
.
sigmoid
(
class_scores
)
*
obns_mask
*
obns_scores
class_scores
*=
tf
.
cast
(
class_scores
>
self
.
_thresh
,
class_scores
.
dtype
)
class_scores
=
tf
.
math
.
sigmoid
(
class_scores
)
*
obns_scores
fill
=
height
*
width
*
len_mask
# platten predictions to [batchsize, N, -1] for non max supression
fill
=
height
*
width
*
len_mask
boxes
=
tf
.
reshape
(
boxes
,
[
-
1
,
fill
,
4
])
class_scores
=
tf
.
reshape
(
class_scores
,
[
-
1
,
fill
,
classes
])
obns_scores
=
tf
.
reshape
(
obns_scores
,
[
-
1
,
fill
])
return
obns_scores
,
boxes
,
class_scores
def
call
(
self
,
inputs
):
...
...
@@ -224,26 +228,49 @@ class YoloLayer(tf.keras.Model):
# colate all predicitons
boxes
=
tf
.
concat
(
boxes
,
axis
=
1
)
object_scores
=
tf
.
keras
.
backend
.
concatenate
(
object_scores
,
axis
=
1
)
class_scores
=
tf
.
keras
.
backend
.
concatenate
(
class_scores
,
axis
=
1
)
# greedy NMS
boxes
=
tf
.
cast
(
boxes
,
dtype
=
tf
.
float32
)
class_scores
=
tf
.
cast
(
class_scores
,
dtype
=
tf
.
float32
)
nms_items
=
tf
.
image
.
combined_non_max_suppression
(
tf
.
expand_dims
(
boxes
,
axis
=-
2
),
class_scores
,
self
.
_pre_nms_points
,
self
.
_max_boxes
,
iou_threshold
=
self
.
_nms_thresh
,
score_threshold
=
self
.
_thresh
)
# cast the boxes and predicitons abck to original datatype
boxes
=
tf
.
cast
(
nms_items
.
nmsed_boxes
,
object_scores
.
dtype
)
class_scores
=
tf
.
cast
(
nms_items
.
nmsed_classes
,
object_scores
.
dtype
)
object_scores
=
tf
.
cast
(
nms_items
.
nmsed_scores
,
object_scores
.
dtype
)
# compute the number of valid detections
num_detections
=
tf
.
math
.
reduce_sum
(
tf
.
math
.
ceil
(
object_scores
),
axis
=-
1
)
object_scores
=
tf
.
concat
(
object_scores
,
axis
=
1
)
class_scores
=
tf
.
concat
(
class_scores
,
axis
=
1
)
# get masks to threshold all the predicitons
object_mask
=
tf
.
cast
(
object_scores
>
self
.
_thresh
,
object_scores
.
dtype
)
class_mask
=
tf
.
cast
(
class_scores
>
self
.
_thresh
,
class_scores
.
dtype
)
# apply thresholds mask to all the predicitons
object_scores
*=
object_mask
class_scores
*=
(
tf
.
expand_dims
(
object_mask
,
axis
=-
1
)
*
class_mask
)
# apply nms
if
self
.
_nms_type
==
'greedy'
:
# greedy NMS
boxes
=
tf
.
cast
(
boxes
,
dtype
=
tf
.
float32
)
class_scores
=
tf
.
cast
(
class_scores
,
dtype
=
tf
.
float32
)
boxes
,
object_scores_
,
class_scores
,
num_detections
=
(
tf
.
image
.
combined_non_max_suppression
(
tf
.
expand_dims
(
boxes
,
axis
=-
2
),
class_scores
,
self
.
_pre_nms_points
,
self
.
_max_boxes
,
iou_threshold
=
self
.
_nms_thresh
,
score_threshold
=
self
.
_thresh
))
# cast the boxes and predicitons abck to original datatype
boxes
=
tf
.
cast
(
boxes
,
object_scores
.
dtype
)
class_scores
=
tf
.
cast
(
class_scores
,
object_scores
.
dtype
)
object_scores
=
tf
.
cast
(
object_scores_
,
object_scores
.
dtype
)
else
:
# TPU NMS
boxes
=
tf
.
cast
(
boxes
,
dtype
=
tf
.
float32
)
class_scores
=
tf
.
cast
(
class_scores
,
dtype
=
tf
.
float32
)
(
boxes
,
confidence
,
classes
,
num_detections
)
=
detection_generator
.
_generate_detections_v2
(
# pylint:disable=protected-access
tf
.
expand_dims
(
boxes
,
axis
=-
2
),
class_scores
,
pre_nms_top_k
=
self
.
_pre_nms_points
,
max_num_detections
=
self
.
_max_boxes
,
nms_iou_threshold
=
self
.
_nms_thresh
,
pre_nms_score_threshold
=
self
.
_thresh
)
boxes
=
tf
.
cast
(
boxes
,
object_scores
.
dtype
)
class_scores
=
tf
.
cast
(
classes
,
object_scores
.
dtype
)
object_scores
=
tf
.
cast
(
confidence
,
object_scores
.
dtype
)
# format and return
return
{
...
...
@@ -258,9 +285,28 @@ class YoloLayer(tf.keras.Model):
"""Generates a dictionary of losses to apply to each path.
Done in the detection generator because all parameters are the same
across both loss and detection generator
.
across both loss and detection generator
"""
return
None
loss
=
yolo_loss
.
YoloLoss
(
keys
=
self
.
_keys
,
classes
=
self
.
_classes
,
anchors
=
self
.
_anchors
,
masks
=
self
.
_masks
,
path_strides
=
self
.
_path_scale
,
truth_thresholds
=
self
.
_truth_thresh
,
ignore_thresholds
=
self
.
_ignore_thresh
,
loss_types
=
self
.
_loss_type
,
iou_normalizers
=
self
.
_iou_normalizer
,
cls_normalizers
=
self
.
_cls_normalizer
,
obj_normalizers
=
self
.
_obj_normalizer
,
objectness_smooths
=
self
.
_objectness_smooth
,
box_types
=
self
.
_box_type
,
max_deltas
=
self
.
_max_delta
,
scale_xys
=
self
.
_scale_xy
,
use_scaled_loss
=
self
.
_use_scaled_loss
,
update_on_repeat
=
self
.
_update_on_repeat
,
label_smoothing
=
self
.
_label_smoothing
)
return
loss
def
get_config
(
self
):
return
{
...
...
official/vision/beta/projects/yolo/modeling/layers/detection_generator_test.py
View file @
6a4c0889
...
...
@@ -39,7 +39,10 @@ class YoloDecoderTest(parameterized.TestCase, tf.test.TestCase):
anchors
=
[[
12.0
,
19.0
],
[
31.0
,
46.0
],
[
96.0
,
54.0
],
[
46.0
,
114.0
],
[
133.0
,
127.0
],
[
79.0
,
225.0
],
[
301.0
,
150.0
],
[
172.0
,
286.0
],
[
348.0
,
340.0
]]
layer
=
dg
.
YoloLayer
(
masks
,
anchors
,
classes
,
max_boxes
=
10
)
box_type
=
{
key
:
'scaled'
for
key
in
masks
.
keys
()}
layer
=
dg
.
YoloLayer
(
masks
,
anchors
,
classes
,
box_type
=
box_type
,
max_boxes
=
10
)
inputs
=
{}
for
key
in
input_shape
:
...
...
official/vision/beta/projects/yolo/modeling/layers/nn_blocks.py
View file @
6a4c0889
...
...
@@ -12,9 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Contains common building blocks for yolo neural networks."""
from
typing
import
Callable
,
List
import
tensorflow
as
tf
from
official.modeling
import
tf_utils
from
official.vision.beta.ops
import
spatial_transform_ops
...
...
@@ -48,7 +46,7 @@ class ConvBN(tf.keras.layers.Layer):
strides
=
(
1
,
1
),
padding
=
'same'
,
dilation_rate
=
(
1
,
1
),
kernel_initializer
=
'
glorot_uniform
'
,
kernel_initializer
=
'
VarianceScaling
'
,
bias_initializer
=
'zeros'
,
bias_regularizer
=
None
,
kernel_regularizer
=
None
,
...
...
@@ -97,7 +95,14 @@ class ConvBN(tf.keras.layers.Layer):
self
.
_strides
=
strides
self
.
_padding
=
padding
self
.
_dilation_rate
=
dilation_rate
self
.
_kernel_initializer
=
kernel_initializer
if
kernel_initializer
==
'VarianceScaling'
:
# to match pytorch initialization method
self
.
_kernel_initializer
=
tf
.
keras
.
initializers
.
VarianceScaling
(
scale
=
1
/
3
,
mode
=
'fan_in'
,
distribution
=
'uniform'
)
else
:
self
.
_kernel_initializer
=
kernel_initializer
self
.
_bias_initializer
=
bias_initializer
self
.
_kernel_regularizer
=
kernel_regularizer
...
...
@@ -194,7 +199,7 @@ class DarkResidual(tf.keras.layers.Layer):
filters
=
1
,
filter_scale
=
2
,
dilation_rate
=
1
,
kernel_initializer
=
'
glorot_uniform
'
,
kernel_initializer
=
'
VarianceScaling
'
,
bias_initializer
=
'zeros'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
...
...
@@ -366,7 +371,7 @@ class CSPTiny(tf.keras.layers.Layer):
def
__init__
(
self
,
filters
=
1
,
kernel_initializer
=
'
glorot_uniform
'
,
kernel_initializer
=
'
VarianceScaling
'
,
bias_initializer
=
'zeros'
,
bias_regularizer
=
None
,
kernel_regularizer
=
None
,
...
...
@@ -532,7 +537,7 @@ class CSPRoute(tf.keras.layers.Layer):
filters
,
filter_scale
=
2
,
activation
=
'mish'
,
kernel_initializer
=
'
glorot_uniform
'
,
kernel_initializer
=
'
VarianceScaling
'
,
bias_initializer
=
'zeros'
,
bias_regularizer
=
None
,
kernel_regularizer
=
None
,
...
...
@@ -661,7 +666,7 @@ class CSPConnect(tf.keras.layers.Layer):
drop_first
=
False
,
activation
=
'mish'
,
kernel_size
=
(
1
,
1
),
kernel_initializer
=
'
glorot_uniform
'
,
kernel_initializer
=
'
VarianceScaling
'
,
bias_initializer
=
'zeros'
,
bias_regularizer
=
None
,
kernel_regularizer
=
None
,
...
...
@@ -761,122 +766,6 @@ class CSPConnect(tf.keras.layers.Layer):
return
x
class
CSPStack
(
tf
.
keras
.
layers
.
Layer
):
"""CSP Stack layer.
CSP full stack, combines the route and the connect in case you dont want to
jsut quickly wrap an existing callable or list of layers to
make it a cross stage partial. Added for ease of use. you should be able
to wrap any layer stack with a CSP independent of wether it belongs
to the Darknet family. if filter_scale = 2, then the blocks in the stack
passed into the the CSP stack should also have filters = filters/filter_scale
Cross Stage Partial networks (CSPNets) were proposed in:
[1] Chien-Yao Wang, Hong-Yuan Mark Liao, I-Hau Yeh, Yueh-Hua Wu,
Ping-Yang Chen, Jun-Wei Hsieh
CSPNet: A New Backbone that can Enhance Learning Capability of CNN.
arXiv:1911.11929
"""
def
__init__
(
self
,
filters
,
model_to_wrap
=
None
,
filter_scale
=
2
,
activation
=
'mish'
,
kernel_initializer
=
'glorot_uniform'
,
bias_initializer
=
'zeros'
,
bias_regularizer
=
None
,
kernel_regularizer
=
None
,
downsample
=
True
,
use_bn
=
True
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
**
kwargs
):
"""CSPStack layer initializer.
Args:
filters: integer for output depth, or the number of features to learn.
model_to_wrap: callable Model or a list of callable objects that will
process the output of CSPRoute, and be input into CSPConnect.
list will be called sequentially.
filter_scale: integer dictating (filters//2) or the number of filters in
the partial feature stack.
activation: string for activation function to use in layer.
kernel_initializer: string to indicate which function to use to initialize
weights.
bias_initializer: string to indicate which function to use to initialize
bias.
bias_regularizer: string to indicate which function to use to regularizer
bias.
kernel_regularizer: string to indicate which function to use to
regularizer weights.
downsample: down_sample the input.
use_bn: boolean for whether to use batch normalization.
use_sync_bn: boolean for whether sync batch normalization statistics
of all batch norm layers to the models global statistics
(across all input batches).
norm_momentum: float for moment to use for batch normalization.
norm_epsilon: float for batch normalization epsilon.
**kwargs: Keyword Arguments.
Raises:
TypeError: model_to_wrap is not a layer or a list of layers
"""
super
().
__init__
(
**
kwargs
)
# layer params
self
.
_filters
=
filters
self
.
_filter_scale
=
filter_scale
self
.
_activation
=
activation
self
.
_downsample
=
downsample
# convoultion params
self
.
_kernel_initializer
=
kernel_initializer
self
.
_bias_initializer
=
bias_initializer
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_bias_regularizer
=
bias_regularizer
self
.
_use_bn
=
use_bn
self
.
_use_sync_bn
=
use_sync_bn
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
if
model_to_wrap
is
None
:
self
.
_model_to_wrap
=
[]
elif
isinstance
(
model_to_wrap
,
Callable
):
self
.
_model_to_wrap
=
[
model_to_wrap
]
elif
isinstance
(
model_to_wrap
,
List
):
self
.
_model_to_wrap
=
model_to_wrap
else
:
raise
TypeError
(
'the input to the CSPStack must be a list of layers that we can'
+
'iterate through, or
\n
a callable'
)
def
build
(
self
,
input_shape
):
dark_conv_args
=
{
'filters'
:
self
.
_filters
,
'filter_scale'
:
self
.
_filter_scale
,
'activation'
:
self
.
_activation
,
'kernel_initializer'
:
self
.
_kernel_initializer
,
'bias_initializer'
:
self
.
_bias_initializer
,
'bias_regularizer'
:
self
.
_bias_regularizer
,
'use_bn'
:
self
.
_use_bn
,
'use_sync_bn'
:
self
.
_use_sync_bn
,
'norm_momentum'
:
self
.
_norm_momentum
,
'norm_epsilon'
:
self
.
_norm_epsilon
,
'kernel_regularizer'
:
self
.
_kernel_regularizer
,
}
self
.
_route
=
CSPRoute
(
downsample
=
self
.
_downsample
,
**
dark_conv_args
)
self
.
_connect
=
CSPConnect
(
**
dark_conv_args
)
def
call
(
self
,
inputs
,
training
=
None
):
x
,
x_route
=
self
.
_route
(
inputs
)
for
layer
in
self
.
_model_to_wrap
:
x
=
layer
(
x
)
x
=
self
.
_connect
([
x
,
x_route
])
return
x
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'yolo'
)
class
PathAggregationBlock
(
tf
.
keras
.
layers
.
Layer
):
"""Path Aggregation block."""
...
...
@@ -884,7 +773,7 @@ class PathAggregationBlock(tf.keras.layers.Layer):
def
__init__
(
self
,
filters
=
1
,
drop_final
=
True
,
kernel_initializer
=
'
glorot_uniform
'
,
kernel_initializer
=
'
VarianceScaling
'
,
bias_initializer
=
'zeros'
,
bias_regularizer
=
None
,
kernel_regularizer
=
None
,
...
...
@@ -1120,7 +1009,7 @@ class SAM(tf.keras.layers.Layer):
strides
=
(
1
,
1
),
padding
=
'same'
,
dilation_rate
=
(
1
,
1
),
kernel_initializer
=
'
glorot_uniform
'
,
kernel_initializer
=
'
VarianceScaling
'
,
bias_initializer
=
'zeros'
,
bias_regularizer
=
None
,
kernel_regularizer
=
None
,
...
...
@@ -1192,7 +1081,7 @@ class CAM(tf.keras.layers.Layer):
def
__init__
(
self
,
reduction_ratio
=
1.0
,
kernel_initializer
=
'
glorot_uniform
'
,
kernel_initializer
=
'
VarianceScaling
'
,
bias_initializer
=
'zeros'
,
bias_regularizer
=
None
,
kernel_regularizer
=
None
,
...
...
@@ -1285,7 +1174,7 @@ class CBAM(tf.keras.layers.Layer):
strides
=
(
1
,
1
),
padding
=
'same'
,
dilation_rate
=
(
1
,
1
),
kernel_initializer
=
'
glorot_uniform
'
,
kernel_initializer
=
'
VarianceScaling
'
,
bias_initializer
=
'zeros'
,
bias_regularizer
=
None
,
kernel_regularizer
=
None
,
...
...
@@ -1354,27 +1243,26 @@ class DarkRouteProcess(tf.keras.layers.Layer):
insert_spp = False)(x)
"""
def
__init__
(
self
,
filters
=
2
,
repetitions
=
2
,
insert_spp
=
False
,
insert_sam
=
False
,
insert_cbam
=
False
,
csp_stack
=
0
,
csp_scale
=
2
,
kernel_initializer
=
'glorot_uniform'
,
bias_initializer
=
'zeros'
,
bias_regularizer
=
None
,
kernel_regularizer
=
None
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
block_invert
=
False
,
activation
=
'leaky'
,
leaky_alpha
=
0.1
,
spp_keys
=
None
,
**
kwargs
):
def
__init__
(
self
,
filters
=
2
,
repetitions
=
2
,
insert_spp
=
False
,
insert_sam
=
False
,
insert_cbam
=
False
,
csp_stack
=
0
,
csp_scale
=
2
,
kernel_initializer
=
'VarianceScaling'
,
bias_initializer
=
'zeros'
,
bias_regularizer
=
None
,
kernel_regularizer
=
None
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
block_invert
=
False
,
activation
=
'leaky'
,
leaky_alpha
=
0.1
,
spp_keys
=
None
,
**
kwargs
):
"""DarkRouteProcess initializer.
Args:
...
...
official/vision/beta/projects/yolo/modeling/layers/nn_blocks_test.py
View file @
6a4c0889
...
...
@@ -106,86 +106,6 @@ class CSPRouteTest(tf.test.TestCase, parameterized.TestCase):
self
.
assertNotIn
(
None
,
grad
)
class
CSPStackTest
(
tf
.
test
.
TestCase
,
parameterized
.
TestCase
):
def
build_layer
(
self
,
layer_type
,
filters
,
filter_scale
,
count
,
stack_type
,
downsample
):
if
stack_type
is
not
None
:
layers
=
[]
if
layer_type
==
'residual'
:
for
_
in
range
(
count
):
layers
.
append
(
nn_blocks
.
DarkResidual
(
filters
=
filters
//
filter_scale
,
filter_scale
=
filter_scale
))
else
:
for
_
in
range
(
count
):
layers
.
append
(
nn_blocks
.
ConvBN
(
filters
=
filters
))
if
stack_type
==
'model'
:
layers
=
tf
.
keras
.
Sequential
(
layers
=
layers
)
else
:
layers
=
None
stack
=
nn_blocks
.
CSPStack
(
filters
=
filters
,
filter_scale
=
filter_scale
,
downsample
=
downsample
,
model_to_wrap
=
layers
)
return
stack
@
parameterized
.
named_parameters
(
(
'no_stack'
,
224
,
224
,
64
,
2
,
'residual'
,
None
,
0
,
True
),
(
'residual_stack'
,
224
,
224
,
64
,
2
,
'residual'
,
'list'
,
2
,
True
),
(
'conv_stack'
,
224
,
224
,
64
,
2
,
'conv'
,
'list'
,
3
,
False
),
(
'callable_no_scale'
,
224
,
224
,
64
,
1
,
'residual'
,
'model'
,
5
,
False
))
def
test_pass_through
(
self
,
width
,
height
,
filters
,
mod
,
layer_type
,
stack_type
,
count
,
downsample
):
x
=
tf
.
keras
.
Input
(
shape
=
(
width
,
height
,
filters
))
test_layer
=
self
.
build_layer
(
layer_type
,
filters
,
mod
,
count
,
stack_type
,
downsample
)
outx
=
test_layer
(
x
)
print
(
outx
)
print
(
outx
.
shape
.
as_list
())
if
downsample
:
self
.
assertAllEqual
(
outx
.
shape
.
as_list
(),
[
None
,
width
//
2
,
height
//
2
,
filters
])
else
:
self
.
assertAllEqual
(
outx
.
shape
.
as_list
(),
[
None
,
width
,
height
,
filters
])
@
parameterized
.
named_parameters
(
(
'no_stack'
,
224
,
224
,
64
,
2
,
'residual'
,
None
,
0
,
True
),
(
'residual_stack'
,
224
,
224
,
64
,
2
,
'residual'
,
'list'
,
2
,
True
),
(
'conv_stack'
,
224
,
224
,
64
,
2
,
'conv'
,
'list'
,
3
,
False
),
(
'callable_no_scale'
,
224
,
224
,
64
,
1
,
'residual'
,
'model'
,
5
,
False
))
def
test_gradient_pass_though
(
self
,
width
,
height
,
filters
,
mod
,
layer_type
,
stack_type
,
count
,
downsample
):
loss
=
tf
.
keras
.
losses
.
MeanSquaredError
()
optimizer
=
tf
.
keras
.
optimizers
.
SGD
()
init
=
tf
.
random_normal_initializer
()
x
=
tf
.
Variable
(
initial_value
=
init
(
shape
=
(
1
,
width
,
height
,
filters
),
dtype
=
tf
.
float32
))
if
not
downsample
:
y
=
tf
.
Variable
(
initial_value
=
init
(
shape
=
(
1
,
width
,
height
,
filters
),
dtype
=
tf
.
float32
))
else
:
y
=
tf
.
Variable
(
initial_value
=
init
(
shape
=
(
1
,
width
//
2
,
height
//
2
,
filters
),
dtype
=
tf
.
float32
))
test_layer
=
self
.
build_layer
(
layer_type
,
filters
,
mod
,
count
,
stack_type
,
downsample
)
with
tf
.
GradientTape
()
as
tape
:
x_hat
=
test_layer
(
x
)
grad_loss
=
loss
(
x_hat
,
y
)
grad
=
tape
.
gradient
(
grad_loss
,
test_layer
.
trainable_variables
)
optimizer
.
apply_gradients
(
zip
(
grad
,
test_layer
.
trainable_variables
))
self
.
assertNotIn
(
None
,
grad
)
class
ConvBNTest
(
tf
.
test
.
TestCase
,
parameterized
.
TestCase
):
@
parameterized
.
named_parameters
(
...
...
official/vision/beta/projects/yolo/modeling/yolo_model.py
View file @
6a4c0889
...
...
@@ -17,7 +17,7 @@
import
tensorflow
as
tf
#
S
tatic base Yolo Models that do not require configuration
#
s
tatic base Yolo Models that do not require configuration
# similar to a backbone model id.
# this is done greatly simplify the model config
...
...
@@ -85,26 +85,27 @@ class Yolo(tf.keras.Model):
"""Detection initialization function.
Args:
backbone: `tf.keras.Model`
,
a backbone network.
decoder: `tf.keras.Model`
,
a decoder network.
head: `
Yolo
Head`, the
YOLO
head.
detection_generator:
`tf.keras.Model`,
the detection generator.
backbone: `tf.keras.Model` a backbone network.
decoder: `tf.keras.Model` a decoder network.
head: `
RetinaNet
Head`, the
RetinaNet
head.
detection_generator: the detection generator.
**kwargs: keyword arguments to be passed.
"""
super
().
__init__
(
**
kwargs
)
super
(
Yolo
,
self
).
__init__
(
**
kwargs
)
self
.
_config_dict
=
{
"backbone"
:
backbone
,
"decoder"
:
decoder
,
"head"
:
head
,
"
detection_generato
r"
:
detection_generator
"
filte
r"
:
detection_generator
}
# model components
self
.
_backbone
=
backbone
self
.
_decoder
=
decoder
self
.
_head
=
head
self
.
_detection_generator
=
detection_generator
self
.
_filter
=
detection_generator
return
def
call
(
self
,
inputs
,
training
=
False
):
maps
=
self
.
_backbone
(
inputs
)
...
...
@@ -114,7 +115,7 @@ class Yolo(tf.keras.Model):
return
{
"raw_output"
:
raw_predictions
}
else
:
# Post-processing.
predictions
=
self
.
_
detection_generato
r
(
raw_predictions
)
predictions
=
self
.
_
filte
r
(
raw_predictions
)
predictions
.
update
({
"raw_output"
:
raw_predictions
})
return
predictions
...
...
@@ -131,8 +132,8 @@ class Yolo(tf.keras.Model):
return
self
.
_head
@
property
def
detection_generato
r
(
self
):
return
self
.
_
detection_generato
r
def
filte
r
(
self
):
return
self
.
_
filte
r
def
get_config
(
self
):
return
self
.
_config_dict
...
...
@@ -140,3 +141,29 @@ class Yolo(tf.keras.Model):
@
classmethod
def
from_config
(
cls
,
config
):
return
cls
(
**
config
)
def
get_weight_groups
(
self
,
train_vars
):
"""Sort the list of trainable variables into groups for optimization.
Args:
train_vars: a list of tf.Variables that need to get sorted into their
respective groups.
Returns:
weights: a list of tf.Variables for the weights.
bias: a list of tf.Variables for the bias.
other: a list of tf.Variables for the other operations.
"""
bias
=
[]
weights
=
[]
other
=
[]
for
var
in
train_vars
:
if
"bias"
in
var
.
name
:
bias
.
append
(
var
)
elif
"beta"
in
var
.
name
:
bias
.
append
(
var
)
elif
"kernel"
in
var
.
name
or
"weight"
in
var
.
name
:
weights
.
append
(
var
)
else
:
other
.
append
(
var
)
return
weights
,
bias
,
other
official/vision/beta/projects/yolo/ops/box_ops.py
View file @
6a4c0889
...
...
@@ -38,51 +38,26 @@ def yxyx_to_xcycwh(box: tf.Tensor):
return
box
@
tf
.
custom_gradient
def
_xcycwh_to_yxyx
(
box
:
tf
.
Tensor
,
scale
):
"""Private function to allow custom gradients with defaults."""
with
tf
.
name_scope
(
'xcycwh_to_yxyx'
):
xy
,
wh
=
tf
.
split
(
box
,
2
,
axis
=-
1
)
xy_min
=
xy
-
wh
/
2
xy_max
=
xy
+
wh
/
2
x_min
,
y_min
=
tf
.
split
(
xy_min
,
2
,
axis
=-
1
)
x_max
,
y_max
=
tf
.
split
(
xy_max
,
2
,
axis
=-
1
)
box
=
tf
.
concat
([
y_min
,
x_min
,
y_max
,
x_max
],
axis
=-
1
)
def
delta
(
dbox
):
# y_min = top, x_min = left, y_max = bottom, x_max = right
dt
,
dl
,
db
,
dr
=
tf
.
split
(
dbox
,
4
,
axis
=-
1
)
dx
=
dl
+
dr
dy
=
dt
+
db
dw
=
(
dr
-
dl
)
/
scale
dh
=
(
db
-
dt
)
/
scale
dbox
=
tf
.
concat
([
dx
,
dy
,
dw
,
dh
],
axis
=-
1
)
return
dbox
,
0.0
return
box
,
delta
def
xcycwh_to_yxyx
(
box
:
tf
.
Tensor
,
darknet
=
False
):
def
xcycwh_to_yxyx
(
box
:
tf
.
Tensor
):
"""Converts boxes from x_center, y_center, width, height to yxyx format.
Args:
box: any `Tensor` whose last dimension is 4 representing the coordinates of
boxes in x_center, y_center, width, height.
darknet: `bool`, if True a scale of 1.0 is used.
Returns:
box: a `Tensor` whose shape is the same as `box` in new format.
"""
if
darknet
:
scale
=
1.0
else
:
scale
=
2.0
box
=
_xcycwh_to_yxyx
(
box
,
scale
)
with
tf
.
name_scope
(
'xcycwh_to_yxyx'
):
xy
,
wh
=
tf
.
split
(
box
,
2
,
axis
=-
1
)
xy_min
=
xy
-
wh
/
2
xy_max
=
xy
+
wh
/
2
x_min
,
y_min
=
tf
.
split
(
xy_min
,
2
,
axis
=-
1
)
x_max
,
y_max
=
tf
.
split
(
xy_max
,
2
,
axis
=-
1
)
box
=
tf
.
concat
([
y_min
,
x_min
,
y_max
,
x_max
],
axis
=-
1
)
return
box
# IOU
def
intersect_and_union
(
box1
,
box2
,
yxyx
=
False
):
"""Calculates the intersection and union between box1 and box2.
...
...
@@ -98,8 +73,9 @@ def intersect_and_union(box1, box2, yxyx=False):
intersection: a `Tensor` who represents the intersection.
union: a `Tensor` who represents the union.
"""
if
not
yxyx
:
box1_area
=
tf
.
reduce_prod
(
tf
.
split
(
box1
,
2
,
axis
=-
1
)[
-
1
],
axis
=-
1
)
box2_area
=
tf
.
reduce_prod
(
tf
.
split
(
box2
,
2
,
axis
=-
1
)[
-
1
],
axis
=-
1
)
box1
=
xcycwh_to_yxyx
(
box1
)
box2
=
xcycwh_to_yxyx
(
box2
)
...
...
@@ -110,13 +86,14 @@ def intersect_and_union(box1, box2, yxyx=False):
intersect_wh
=
tf
.
math
.
maximum
(
intersect_maxes
-
intersect_mins
,
0.0
)
intersection
=
tf
.
reduce_prod
(
intersect_wh
,
axis
=-
1
)
box1_area
=
tf
.
reduce_prod
(
b1ma
-
b1mi
,
axis
=-
1
)
box2_area
=
tf
.
reduce_prod
(
b2ma
-
b2mi
,
axis
=-
1
)
if
yxyx
:
box1_area
=
tf
.
reduce_prod
(
b1ma
-
b1mi
,
axis
=-
1
)
box2_area
=
tf
.
reduce_prod
(
b2ma
-
b2mi
,
axis
=-
1
)
union
=
box1_area
+
box2_area
-
intersection
return
intersection
,
union
def
smallest_encompassing_box
(
box1
,
box2
,
yxyx
=
False
):
def
smallest_encompassing_box
(
box1
,
box2
,
yxyx
=
False
,
clip
=
False
):
"""Calculates the smallest box that encompasses box1 and box2.
Args:
...
...
@@ -126,6 +103,7 @@ def smallest_encompassing_box(box1, box2, yxyx=False):
boxes.
yxyx: a `bool` indicating whether the input box is of the format x_center
y_center, width, height or y_min, x_min, y_max, x_max.
clip: a `bool`, whether or not to clip boxes.
Returns:
box_c: a `Tensor` whose last dimension is 4 representing the coordinates of
...
...
@@ -141,15 +119,15 @@ def smallest_encompassing_box(box1, box2, yxyx=False):
bcmi
=
tf
.
math
.
minimum
(
b1mi
,
b2mi
)
bcma
=
tf
.
math
.
maximum
(
b1ma
,
b2ma
)
bca
=
tf
.
reduce_prod
(
bcma
-
bcmi
,
keepdims
=
True
,
axis
=-
1
)
box_c
=
tf
.
concat
([
bcmi
,
bcma
],
axis
=-
1
)
if
not
yxyx
:
box_c
=
yxyx_to_xcycwh
(
box_c
)
box_c
=
tf
.
where
(
bca
==
0.0
,
tf
.
zeros_like
(
box_c
),
box_c
)
return
box_c
if
clip
:
bca
=
tf
.
reduce_prod
(
bcma
-
bcmi
,
keepdims
=
True
,
axis
=-
1
)
box_c
=
tf
.
where
(
bca
<=
0.0
,
tf
.
zeros_like
(
box_c
),
box_c
)
return
bcmi
,
bcma
,
box_c
def
compute_iou
(
box1
,
box2
,
yxyx
=
False
):
...
...
@@ -166,15 +144,13 @@ def compute_iou(box1, box2, yxyx=False):
Returns:
iou: a `Tensor` who represents the intersection over union.
"""
# get box corners
with
tf
.
name_scope
(
'iou'
):
intersection
,
union
=
intersect_and_union
(
box1
,
box2
,
yxyx
=
yxyx
)
iou
=
math_ops
.
divide_no_nan
(
intersection
,
union
)
iou
=
math_ops
.
rm_nan_inf
(
iou
,
val
=
0.0
)
return
iou
def
compute_giou
(
box1
,
box2
,
yxyx
=
False
,
darknet
=
False
):
def
compute_giou
(
box1
,
box2
,
yxyx
=
False
):
"""Calculates the General intersection over union between box1 and box2.
Args:
...
...
@@ -184,38 +160,30 @@ def compute_giou(box1, box2, yxyx=False, darknet=False):
boxes.
yxyx: a `bool` indicating whether the input box is of the format x_center
y_center, width, height or y_min, x_min, y_max, x_max.
darknet: a `bool` indicating whether the calling function is the YOLO
darknet loss.
Returns:
giou: a `Tensor` who represents the General intersection over union.
"""
with
tf
.
name_scope
(
'giou'
):
# get IOU
if
not
yxyx
:
box1
=
xcycwh_to_yxyx
(
box1
,
darknet
=
darknet
)
box2
=
xcycwh_to_yxyx
(
box2
,
darknet
=
darknet
)
yxyx
=
True
yxyx1
=
xcycwh_to_yxyx
(
box1
)
yxyx2
=
xcycwh_to_yxyx
(
box2
)
else
:
yxyx1
,
yxyx2
=
box1
,
box2
intersection
,
union
=
intersect_and_union
(
box1
,
box2
,
yxyx
=
yxyx
)
cmi
,
cma
,
_
=
smallest_encompassing_box
(
yxyx1
,
yxyx2
,
yxyx
=
True
)
intersection
,
union
=
intersect_and_union
(
yxyx1
,
yxyx2
,
yxyx
=
True
)
iou
=
math_ops
.
divide_no_nan
(
intersection
,
union
)
iou
=
math_ops
.
rm_nan_inf
(
iou
,
val
=
0.0
)
# find the smallest box to encompase both box1 and box2
boxc
=
smallest_encompassing_box
(
box1
,
box2
,
yxyx
=
yxyx
)
if
yxyx
:
boxc
=
yxyx_to_xcycwh
(
boxc
)
_
,
cwch
=
tf
.
split
(
boxc
,
2
,
axis
=-
1
)
c
=
tf
.
math
.
reduce_prod
(
cwch
,
axis
=-
1
)
bcwh
=
cma
-
cmi
c
=
tf
.
math
.
reduce_prod
(
bcwh
,
axis
=-
1
)
# compute giou
regularization
=
math_ops
.
divide_no_nan
((
c
-
union
),
c
)
giou
=
iou
-
regularization
giou
=
tf
.
clip_by_value
(
giou
,
clip_value_min
=-
1.0
,
clip_value_max
=
1.0
)
return
iou
,
giou
def
compute_diou
(
box1
,
box2
,
beta
=
1.0
,
yxyx
=
False
,
darknet
=
False
):
def
compute_diou
(
box1
,
box2
,
beta
=
1.0
,
yxyx
=
False
):
"""Calculates the distance intersection over union between box1 and box2.
Args:
...
...
@@ -227,8 +195,6 @@ def compute_diou(box1, box2, beta=1.0, yxyx=False, darknet=False):
regularization term.
yxyx: a `bool` indicating whether the input box is of the format x_center
y_center, width, height or y_min, x_min, y_max, x_max.
darknet: a `bool` indicating whether the calling function is the YOLO
darknet loss.
Returns:
diou: a `Tensor` who represents the distance intersection over union.
...
...
@@ -236,30 +202,27 @@ def compute_diou(box1, box2, beta=1.0, yxyx=False, darknet=False):
with
tf
.
name_scope
(
'diou'
):
# compute center distance
if
not
yxyx
:
box1
=
xcycwh_to_yxyx
(
box1
,
darknet
=
darknet
)
box2
=
xcycwh_to_yxyx
(
box2
,
darknet
=
darknet
)
yxyx
=
True
intersection
,
union
=
intersect_and_union
(
box1
,
box2
,
yxyx
=
yxyx
)
boxc
=
smallest_encompassing_box
(
box1
,
box2
,
yxyx
=
yxyx
)
xycc1
,
xycc2
=
box1
,
box2
yxyx1
=
xcycwh_to_yxyx
(
box1
)
yxyx2
=
xcycwh_to_yxyx
(
box2
)
else
:
yxyx1
,
yxyx2
=
box1
,
box2
xycc1
=
yxyx_to_xcycwh
(
box1
)
xycc2
=
yxyx_to_xcycwh
(
box2
)
cmi
,
cma
,
_
=
smallest_encompassing_box
(
yxyx1
,
yxyx2
,
yxyx
=
True
)
intersection
,
union
=
intersect_and_union
(
yxyx1
,
yxyx2
,
yxyx
=
True
)
iou
=
math_ops
.
divide_no_nan
(
intersection
,
union
)
iou
=
math_ops
.
rm_nan_inf
(
iou
,
val
=
0.0
)
if
yxyx
:
boxc
=
yxyx_to_xcycwh
(
boxc
)
box1
=
yxyx_to_xcycwh
(
box1
)
box2
=
yxyx_to_xcycwh
(
box2
)
b1xy
,
_
=
tf
.
split
(
box
1
,
2
,
axis
=-
1
)
b2xy
,
_
=
tf
.
split
(
box
2
,
2
,
axis
=-
1
)
_
,
bcwh
=
tf
.
split
(
boxc
,
2
,
axis
=-
1
)
b1xy
,
_
=
tf
.
split
(
xycc
1
,
2
,
axis
=-
1
)
b2xy
,
_
=
tf
.
split
(
xycc
2
,
2
,
axis
=-
1
)
bcwh
=
cma
-
cmi
center_dist
=
tf
.
reduce_sum
((
b1xy
-
b2xy
)
**
2
,
axis
=-
1
)
c_diag
=
tf
.
reduce_sum
(
bcwh
**
2
,
axis
=-
1
)
regularization
=
math_ops
.
divide_no_nan
(
center_dist
,
c_diag
)
diou
=
iou
-
regularization
**
beta
diou
=
tf
.
clip_by_value
(
diou
,
clip_value_min
=-
1.0
,
clip_value_max
=
1.0
)
return
iou
,
diou
...
...
@@ -280,33 +243,48 @@ def compute_ciou(box1, box2, yxyx=False, darknet=False):
ciou: a `Tensor` who represents the complete intersection over union.
"""
with
tf
.
name_scope
(
'ciou'
):
# compute DIOU and IOU
iou
,
diou
=
compute_diou
(
box1
,
box2
,
yxyx
=
yxyx
,
darknet
=
darknet
)
if
yxyx
:
box1
=
yxyx_to_xcycwh
(
box1
)
box2
=
yxyx_to_xcycwh
(
box2
)
_
,
_
,
b1w
,
b1h
=
tf
.
split
(
box1
,
4
,
axis
=-
1
)
_
,
_
,
b2w
,
b2h
=
tf
.
split
(
box1
,
4
,
axis
=-
1
)
# computer aspect ratio consistency
terma
=
tf
.
cast
(
math_ops
.
divide_no_nan
(
b1w
,
b1h
),
tf
.
float32
)
termb
=
tf
.
cast
(
math_ops
.
divide_no_nan
(
b2w
,
b2h
),
tf
.
float32
)
arcterm
=
tf
.
square
(
tf
.
math
.
atan
(
terma
)
-
tf
.
math
.
atan
(
termb
))
v
=
tf
.
squeeze
(
4
*
arcterm
/
(
math
.
pi
**
2
),
axis
=-
1
)
v
=
tf
.
cast
(
v
,
b1w
.
dtype
)
a
=
tf
.
stop_gradient
(
math_ops
.
divide_no_nan
(
v
,
((
1
-
iou
)
+
v
)))
ciou
=
diou
-
(
v
*
a
)
ciou
=
tf
.
clip_by_value
(
ciou
,
clip_value_min
=-
1.0
,
clip_value_max
=
1.0
)
if
not
yxyx
:
xycc1
,
xycc2
=
box1
,
box2
yxyx1
=
xcycwh_to_yxyx
(
box1
)
yxyx2
=
xcycwh_to_yxyx
(
box2
)
else
:
yxyx1
,
yxyx2
=
box1
,
box2
xycc1
=
yxyx_to_xcycwh
(
box1
)
xycc2
=
yxyx_to_xcycwh
(
box2
)
# Build the smallest encomapssing box.
cmi
,
cma
,
_
=
smallest_encompassing_box
(
yxyx1
,
yxyx2
,
yxyx
=
True
)
intersection
,
union
=
intersect_and_union
(
yxyx1
,
yxyx2
,
yxyx
=
True
)
iou
=
math_ops
.
divide_no_nan
(
intersection
,
union
)
b1xy
,
b1w
,
b1h
=
tf
.
split
(
xycc1
,
[
2
,
1
,
1
],
axis
=-
1
)
b2xy
,
b2w
,
b2h
=
tf
.
split
(
xycc2
,
[
2
,
1
,
1
],
axis
=-
1
)
bchw
=
cma
-
cmi
# Center regularization
center_dist
=
tf
.
reduce_sum
((
b1xy
-
b2xy
)
**
2
,
axis
=-
1
)
c_diag
=
tf
.
reduce_sum
(
bchw
**
2
,
axis
=-
1
)
regularization
=
math_ops
.
divide_no_nan
(
center_dist
,
c_diag
)
# Computer aspect ratio consistency
terma
=
math_ops
.
divide_no_nan
(
b1w
,
b1h
)
# gt
termb
=
math_ops
.
divide_no_nan
(
b2w
,
b2h
)
# pred
arcterm
=
tf
.
squeeze
(
tf
.
math
.
pow
(
tf
.
math
.
atan
(
termb
)
-
tf
.
math
.
atan
(
terma
),
2
),
axis
=-
1
)
v
=
(
4
/
math
.
pi
**
2
)
*
arcterm
# Compute the aspect ratio weight, should be treated as a constant
a
=
tf
.
stop_gradient
(
math_ops
.
divide_no_nan
(
v
,
1
-
iou
+
v
))
if
darknet
:
grad_scale
=
tf
.
stop_gradient
(
tf
.
square
(
b2w
)
+
tf
.
square
(
b2h
))
v
*=
tf
.
squeeze
(
grad_scale
,
axis
=-
1
)
ciou
=
iou
-
regularization
-
(
v
*
a
)
return
iou
,
ciou
def
aggregated_comparitive_iou
(
boxes1
,
boxes2
=
None
,
iou_type
=
0
,
beta
=
0.6
):
def
aggregated_comparitive_iou
(
boxes1
,
boxes2
=
None
,
iou_type
=
0
,
beta
=
0.6
):
"""Calculates the IOU between two set of boxes.
Similar to bbox_overlap but far more versitile.
...
...
@@ -333,11 +311,11 @@ def aggregated_comparitive_iou(boxes1,
else
:
boxes2
=
tf
.
transpose
(
boxes1
,
perm
=
(
0
,
2
,
1
,
3
))
if
iou_type
==
0
:
# diou
if
iou_type
==
0
or
iou_type
==
'diou'
:
# diou
_
,
iou
=
compute_diou
(
boxes1
,
boxes2
,
beta
=
beta
,
yxyx
=
True
)
elif
iou_type
==
1
:
# giou
elif
iou_type
==
1
or
iou_type
==
'giou'
:
# giou
_
,
iou
=
compute_giou
(
boxes1
,
boxes2
,
yxyx
=
True
)
elif
iou_type
==
2
:
# ciou
elif
iou_type
==
2
or
iou_type
==
'ciou'
:
# ciou
_
,
iou
=
compute_ciou
(
boxes1
,
boxes2
,
yxyx
=
True
)
else
:
iou
=
compute_iou
(
boxes1
,
boxes2
,
yxyx
=
True
)
...
...
official/vision/beta/projects/yolo/ops/loss_utils.py
0 → 100755
View file @
6a4c0889
This diff is collapsed.
Click to expand it.
official/vision/beta/projects/yolo/ops/math_ops.py
View file @
6a4c0889
...
...
@@ -58,25 +58,4 @@ def divide_no_nan(a, b):
Returns:
a `Tensor` representing a divided by b, with all nan values removed.
"""
zero
=
tf
.
cast
(
0.0
,
b
.
dtype
)
return
tf
.
where
(
b
==
zero
,
zero
,
a
/
b
)
def
mul_no_nan
(
x
,
y
):
"""Nan safe multiply operation.
Built to allow model compilation in tflite and
to allow one tensor to mask another. Where ever x is zero the
multiplication is not computed and the value is replaced with a zero. This is
required because 0 * nan = nan. This can make computation unstable in some
cases where the intended behavior is for zero to mean ignore.
Args:
x: any `Tensor` of any type.
y: any `Tensor` of any type with the same shape as tensor x.
Returns:
a `Tensor` representing x times y, where x is used to safely mask the
tensor y.
"""
return
tf
.
where
(
x
==
0
,
tf
.
cast
(
0
,
x
.
dtype
),
x
*
y
)
return
a
/
(
b
+
1e-9
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment