Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
9474c108
Commit
9474c108
authored
Sep 23, 2021
by
Vishnu Banna
Browse files
comments addressed
parent
bcd5283d
Changes
13
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
331 additions
and
278 deletions
+331
-278
official/vision/beta/projects/yolo/configs/backbones.py
official/vision/beta/projects/yolo/configs/backbones.py
+2
-2
official/vision/beta/projects/yolo/dataloaders/tf_example_decoder.py
...sion/beta/projects/yolo/dataloaders/tf_example_decoder.py
+11
-1
official/vision/beta/projects/yolo/dataloaders/yolo_input.py
official/vision/beta/projects/yolo/dataloaders/yolo_input.py
+44
-86
official/vision/beta/projects/yolo/losses/yolo_loss.py
official/vision/beta/projects/yolo/losses/yolo_loss.py
+23
-34
official/vision/beta/projects/yolo/losses/yolo_loss_test.py
official/vision/beta/projects/yolo/losses/yolo_loss_test.py
+3
-5
official/vision/beta/projects/yolo/modeling/layers/detection_generator.py
...beta/projects/yolo/modeling/layers/detection_generator.py
+8
-21
official/vision/beta/projects/yolo/modeling/layers/detection_generator_test.py
...projects/yolo/modeling/layers/detection_generator_test.py
+8
-8
official/vision/beta/projects/yolo/modeling/layers/nn_blocks.py
...al/vision/beta/projects/yolo/modeling/layers/nn_blocks.py
+7
-1
official/vision/beta/projects/yolo/modeling/yolo_model.py
official/vision/beta/projects/yolo/modeling/yolo_model.py
+0
-1
official/vision/beta/projects/yolo/ops/anchor.py
official/vision/beta/projects/yolo/ops/anchor.py
+176
-85
official/vision/beta/projects/yolo/ops/loss_utils.py
official/vision/beta/projects/yolo/ops/loss_utils.py
+7
-26
official/vision/beta/projects/yolo/ops/mosaic.py
official/vision/beta/projects/yolo/ops/mosaic.py
+1
-2
official/vision/beta/projects/yolo/ops/preprocessing_ops.py
official/vision/beta/projects/yolo/ops/preprocessing_ops.py
+41
-6
No files found.
official/vision/beta/projects/yolo/configs/backbones.py
View file @
9474c108
...
...
@@ -22,8 +22,8 @@ from official.vision.beta.configs import backbones
class
Darknet
(
hyperparams
.
Config
):
"""DarkNet config."""
model_id
:
str
=
'cspdarknet53'
width_scale
:
in
t
=
1.0
depth_scale
:
in
t
=
1.0
width_scale
:
floa
t
=
1.0
depth_scale
:
floa
t
=
1.0
dilate
:
bool
=
False
min_level
:
int
=
3
max_level
:
int
=
5
...
...
official/vision/beta/projects/yolo/dataloaders/tf_example_decoder.py
View file @
9474c108
...
...
@@ -59,10 +59,20 @@ class TfExampleDecoder(tf_example_decoder.TfExampleDecoder):
"""Tensorflow Example proto decoder."""
def
__init__
(
self
,
coco91_to_80
,
coco91_to_80
=
None
,
include_mask
=
False
,
regenerate_source_id
=
False
,
mask_binarize_threshold
=
None
):
"""Initialize the example decoder.
Args:
coco91_to_80: `bool` indicating whether to convert coco from its 91 class
format to the 80 class format.
include_mask: `bool` indicating if the decoder should also decode instance
masks for instance segmentation.
regenerate_source_id: `bool` indicating if the source id needs to be
recreated for each image sample.
"""
if
coco91_to_80
and
include_mask
:
raise
ValueError
(
"If masks are included you cannot
\
convert coco from the 91 class format
\
...
...
official/vision/beta/projects/yolo/dataloaders/yolo_input.py
View file @
9474c108
""" Detection Data parser and processing for YOLO.
Parse image and ground truths in a dataset to training targets and package them
into (image, labels) tuple for RetinaNet.
"""
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Detection Data parser and processing for YOLO."""
import
tensorflow
as
tf
import
numpy
as
np
from
official.vision.beta.projects.yolo.ops
import
preprocessing_ops
...
...
@@ -19,7 +30,7 @@ class Parser(parser.Parser):
output_size
,
anchors
,
expanded_strides
,
level_limit
=
None
,
level_limit
s
=
None
,
max_num_instances
=
200
,
area_thresh
=
0.1
,
aug_rand_hue
=
1.0
,
...
...
@@ -48,11 +59,13 @@ class Parser(parser.Parser):
output_size should be divided by the largest feature stride 2^max_level.
anchors: `Dict[List[Union[int, float]]]` values for each anchor box.
expanded_strides: `Dict[int]` for how much the model scales down the
images at the largest level.
level_limit: `List` the box sizes that will be allowed at each FPN
images at the largest level. For example, level 3 down samples the image
by a factor of 16, in the expanded strides dictionary, we will pass
along {3: 16} indicating that relative to the original image, the
shapes must be reduced by a factor of 16 to compute the loss.
level_limits: `List` the box sizes that will be allowed at each FPN
level as is done in the FCOS and YOLOX paper for anchor free box
assignment. Anchor free will perform worse than Anchor based, but only
slightly.
assignment.
max_num_instances: `int` for the number of boxes to compute loss on.
area_thresh: `float` for the minimum area of a box to allow to pass
through for optimization.
...
...
@@ -108,20 +121,9 @@ class Parser(parser.Parser):
assert
output_size
[
1
]
%
expanded_strides
[
str
(
key
)]
==
0
assert
output_size
[
0
]
%
expanded_strides
[
str
(
key
)]
==
0
# scale of each FPN level
self
.
_strides
=
expanded_strides
# Set the width and height properly and base init:
self
.
_image_w
=
output_size
[
1
]
self
.
_image_h
=
output_size
[
0
]
# Set the anchor boxes for each scale
self
.
_anchors
=
anchors
self
.
_level_limit
=
level_limit
# anchor labeling paramters
self
.
_use_tie_breaker
=
use_tie_breaker
self
.
_best_match_only
=
best_match_only
self
.
_max_num_instances
=
max_num_instances
# Image scaling params
...
...
@@ -143,33 +145,23 @@ class Parser(parser.Parser):
self
.
_aug_rand_hue
=
aug_rand_hue
# Set the per level values needed for operation
self
.
_scale_xy
=
scale_xy
self
.
_anchor_t
=
anchor_t
self
.
_darknet
=
darknet
self
.
_area_thresh
=
area_thresh
keys
=
list
(
self
.
_anchors
.
keys
())
if
self
.
_level_limit
is
not
None
:
maxim
=
2000
self
.
_scale_up
=
{
key
:
maxim
//
self
.
_max_num_instances
for
key
in
keys
}
self
.
_anchor_t
=
-
0.01
elif
not
self
.
_darknet
:
self
.
_scale_up
=
{
key
:
6
-
i
for
i
,
key
in
enumerate
(
keys
)}
else
:
self
.
_scale_up
=
{
key
:
1
for
key
in
keys
}
self
.
_seed
=
seed
# Set the data type based on input string
self
.
_dtype
=
dtype
self
.
_label_builder
=
anchor
.
YoloAnchorLabeler
(
anchors
=
self
.
_anchors
,
match_threshold
=
self
.
_anchor_t
,
best_matches_only
=
self
.
_best_match_only
,
use_tie_breaker
=
self
.
_use_tie_breaker
)
anchors
=
anchors
,
anchor_free_level_limits
=
level_limits
,
level_strides
=
expanded_strides
,
center_radius
=
scale_xy
,
max_num_instances
=
max_num_instances
,
match_threshold
=
anchor_t
,
best_matches_only
=
best_match_only
,
use_tie_breaker
=
use_tie_breaker
,
darknet
=
darknet
,
dtype
=
dtype
)
def
_pad_infos_object
(
self
,
image
):
"""Get a Tensor to pad the info object list."""
...
...
@@ -307,57 +299,22 @@ class Parser(parser.Parser):
is_training
=
False
)
return
image
,
labels
def
set_shape
(
self
,
values
,
pad_axis
=
0
,
pad_value
=
0
,
inds
=
None
,
scale
=
1
):
def
set_shape
(
self
,
values
,
pad_axis
=
0
,
pad_value
=
0
,
inds
=
None
):
"""Calls set shape for all input objects."""
if
inds
is
not
None
:
values
=
tf
.
gather
(
values
,
inds
)
vshape
=
values
.
get_shape
().
as_list
()
if
pad_value
is
not
None
:
values
=
preprocessing_ops
.
pad_max_instances
(
values
=
preprocessing_ops
.
pad_max_instances
(
values
,
self
.
_max_num_instances
,
pad_axis
=
pad_axis
,
pad_value
=
pad_value
)
vshape
[
pad_axis
]
=
self
.
_max_num_instances
*
scale
vshape
[
pad_axis
]
=
self
.
_max_num_instances
values
.
set_shape
(
vshape
)
return
values
def
_build_grid
(
self
,
boxes
,
classes
,
width
,
height
):
"""Private function for building the full scale object and class grid."""
indexes
=
{}
updates
=
{}
true_grids
=
{}
if
self
.
_level_limit
is
not
None
:
self
.
_level_limit
=
[
0.0
]
+
self
.
_level_limit
+
[
np
.
inf
]
# for each prediction path generate a properly scaled output prediction map
for
i
,
key
in
enumerate
(
self
.
_anchors
.
keys
()):
if
self
.
_level_limit
is
not
None
:
fpn_limits
=
self
.
_level_limit
[
i
:
i
+
2
]
else
:
fpn_limits
=
None
scale_xy
=
self
.
_scale_xy
[
key
]
if
not
self
.
_darknet
else
1
indexes
[
key
],
updates
[
key
],
true_grids
[
key
]
=
self
.
_label_builder
(
key
,
boxes
,
classes
,
self
.
_anchors
[
key
],
width
,
height
,
self
.
_strides
[
str
(
key
)],
scale_xy
,
self
.
_max_num_instances
*
self
.
_scale_up
[
key
],
fpn_limits
=
fpn_limits
)
# set/fix the shapes
indexes
[
key
]
=
self
.
set_shape
(
indexes
[
key
],
-
2
,
None
,
None
,
self
.
_scale_up
[
key
])
updates
[
key
]
=
self
.
set_shape
(
updates
[
key
],
-
2
,
None
,
None
,
self
.
_scale_up
[
key
])
# add all the values to the final dictionary
updates
[
key
]
=
tf
.
cast
(
updates
[
key
],
dtype
=
self
.
_dtype
)
return
indexes
,
updates
,
true_grids
def
_build_label
(
self
,
image
,
gt_boxes
,
...
...
@@ -376,16 +333,15 @@ class Parser(parser.Parser):
image
.
set_shape
(
imshape
)
labels
=
dict
()
labels
[
'inds'
],
labels
[
'upds'
],
labels
[
'true_conf'
]
=
self
.
_build_grid
(
gt_boxes
,
gt_classes
,
width
,
height
)
(
labels
[
'inds'
],
labels
[
'upds'
],
labels
[
'true_conf'
])
=
self
.
_label_builder
(
gt_boxes
,
gt_classes
,
width
,
height
)
# Set/fix the boxes shape.
boxes
=
self
.
set_shape
(
gt_boxes
,
pad_axis
=
0
,
pad_value
=
0
)
classes
=
self
.
set_shape
(
gt_classes
,
pad_axis
=
0
,
pad_value
=-
1
)
area
=
self
.
set_shape
(
data
[
'groundtruth_area'
],
pad_axis
=
0
,
pad_value
=
0
,
inds
=
inds
)
is_crowd
=
self
.
set_shape
(
data
[
'groundtruth_is_crowd'
],
pad_axis
=
0
,
pad_value
=
0
,
inds
=
inds
)
# Build the dictionary set.
labels
.
update
({
...
...
@@ -396,6 +352,7 @@ class Parser(parser.Parser):
# Update the labels dictionary.
if
not
is_training
:
# Sets up groundtruth data for evaluation.
groundtruths
=
{
'source_id'
:
labels
[
'source_id'
],
...
...
@@ -405,8 +362,9 @@ class Parser(parser.Parser):
'image_info'
:
info
,
'boxes'
:
gt_boxes
,
'classes'
:
gt_classes
,
'areas'
:
area
,
'is_crowds'
:
tf
.
cast
(
is_crowd
,
tf
.
int32
),
'areas'
:
tf
.
gather
(
data
[
'groundtruth_area'
],
inds
),
'is_crowds'
:
tf
.
cast
(
tf
.
gather
(
data
[
'groundtruth_is_crowd'
],
inds
),
tf
.
int32
),
}
groundtruths
[
'source_id'
]
=
utils
.
process_source_id
(
groundtruths
[
'source_id'
])
...
...
official/vision/beta/projects/yolo/losses/yolo_loss.py
View file @
9474c108
...
...
@@ -14,13 +14,12 @@
"""Yolo Loss function."""
import
abc
import
collections
import
functools
import
collections
import
tensorflow
as
tf
from
official.vision.beta.projects.yolo.ops
import
box_ops
from
official.vision.beta.projects.yolo.ops
import
loss_utils
from
official.vision.beta.projects.yolo.ops
import
box_ops
from
official.vision.beta.projects.yolo.ops
import
math_ops
...
...
@@ -33,7 +32,6 @@ class YoloLossBase(object, metaclass=abc.ABCMeta):
def
__init__
(
self
,
classes
,
mask
,
anchors
,
path_stride
=
1
,
ignore_thresh
=
0.7
,
...
...
@@ -52,8 +50,6 @@ class YoloLossBase(object, metaclass=abc.ABCMeta):
Args:
classes: `int` for the number of classes
mask: `List[int]` for the output level that this specific model output
level
anchors: `List[List[int]]` for the anchor boxes that are used in the model
at all levels. For anchor free prediction set the anchor list to be the
same as the image resolution.
...
...
@@ -86,10 +82,9 @@ class YoloLossBase(object, metaclass=abc.ABCMeta):
"""
self
.
_loss_type
=
loss_type
self
.
_classes
=
classes
self
.
_num
=
tf
.
cast
(
len
(
mask
),
dtype
=
tf
.
int32
)
self
.
_num
=
tf
.
cast
(
len
(
anchors
),
dtype
=
tf
.
int32
)
self
.
_truth_thresh
=
truth_thresh
self
.
_ignore_thresh
=
ignore_thresh
self
.
_masks
=
mask
self
.
_anchors
=
anchors
self
.
_iou_normalizer
=
iou_normalizer
...
...
@@ -112,7 +107,7 @@ class YoloLossBase(object, metaclass=abc.ABCMeta):
self
.
_decode_boxes
=
functools
.
partial
(
loss_utils
.
get_predicted_box
,
**
box_kwargs
)
self
.
_search_pairs
=
None
self
.
_search_pairs
=
lambda
*
args
:
(
None
,
None
,
None
,
None
)
self
.
_build_per_path_attributes
()
def
box_loss
(
self
,
true_box
,
pred_box
,
darknet
=
False
):
...
...
@@ -136,13 +131,18 @@ class YoloLossBase(object, metaclass=abc.ABCMeta):
scale
=
None
):
"""Search of all groundtruths to associate groundtruths to predictions."""
if
self
.
_search_pairs
is
None
:
return
true_conf
,
tf
.
ones_like
(
true_conf
)
boxes
=
box_ops
.
yxyx_to_xcycwh
(
boxes
)
if
scale
is
not
None
:
boxes
=
boxes
*
tf
.
cast
(
tf
.
stop_gradient
(
scale
),
boxes
.
dtype
)
# Search all predictions against ground truths to find mathcing boxes for
# each pixel.
_
,
_
,
iou_max
,
_
=
self
.
_search_pairs
(
pred_boxes
,
pred_classes
,
boxes
,
classes
,
scale
=
scale
,
yxyx
=
True
)
_
,
_
,
iou_max
,
_
=
self
.
_search_pairs
(
pred_boxes
,
pred_classes
,
boxes
,
classes
)
if
iou_max
is
None
:
return
true_conf
,
tf
.
ones_like
(
true_conf
)
# Find the exact indexes to ignore and keep.
ignore_mask
=
tf
.
cast
(
iou_max
<
self
.
_ignore_thresh
,
pred_boxes
.
dtype
)
...
...
@@ -196,7 +196,7 @@ class YoloLossBase(object, metaclass=abc.ABCMeta):
predictions.
"""
(
loss
,
box_loss
,
conf_loss
,
class_loss
,
mean_loss
,
iou
,
pred_conf
,
ind_mask
,
grid_mask
)
=
self
.
_compute_loss
(
true_counts
,
inds
,
y_true
,
boxes
,
classes
,
grid_mask
)
=
self
.
_compute_loss
(
true_counts
,
inds
,
y_true
,
boxes
,
classes
,
y_pred
)
# Metric compute using done here to save time and resources.
...
...
@@ -219,7 +219,8 @@ class YoloLossBase(object, metaclass=abc.ABCMeta):
"""The actual logic to apply to the raw model for optimization."""
...
def
post_path_aggregation
(
self
,
loss
,
ground_truths
,
predictions
):
# pylint:disable=unused-argument
def
post_path_aggregation
(
self
,
loss
,
box_loss
,
conf_loss
,
class_loss
,
ground_truths
,
predictions
):
# pylint:disable=unused-argument
"""This method allows for post processing of a loss value.
After the loss has been aggregated across all the FPN levels some post
...
...
@@ -277,7 +278,6 @@ class DarknetLoss(YoloLossBase):
association.
"""
self
.
_anchor_generator
=
loss_utils
.
GridGenerator
(
masks
=
self
.
_masks
,
anchors
=
self
.
_anchors
,
scale_anchors
=
self
.
_path_stride
)
...
...
@@ -428,14 +428,13 @@ class ScaledLoss(YoloLossBase):
association.
"""
self
.
_anchor_generator
=
loss_utils
.
GridGenerator
(
masks
=
self
.
_masks
,
anchors
=
self
.
_anchors
,
scale_anchors
=
self
.
_path_stride
)
if
self
.
_ignore_thresh
>
0.0
:
self
.
_search_pairs
=
loss_utils
.
PairWiseSearch
(
iou_type
=
self
.
_loss_type
,
any_match
=
False
,
min_conf
=
0.25
)
self
.
_cls_normalizer
=
self
.
_cls_normalizer
*
self
.
_classes
/
80
return
...
...
@@ -550,7 +549,8 @@ class ScaledLoss(YoloLossBase):
return
(
loss
,
box_loss
,
conf_loss
,
class_loss
,
mean_loss
,
iou
,
pred_conf
,
ind_mask
,
grid_mask
)
def
post_path_aggregation
(
self
,
loss
,
ground_truths
,
predictions
):
def
post_path_aggregation
(
self
,
loss
,
box_loss
,
conf_loss
,
class_loss
,
ground_truths
,
predictions
):
"""This method allows for post processing of a loss value.
By default the model will have about 3 FPN levels {3, 4, 5}, on
...
...
@@ -559,19 +559,12 @@ class ScaledLoss(YoloLossBase):
magintude as the model with 3 FPN levels. This helps to prevent gradient
explosions.
Args:
loss: `tf.float` scalar for the actual loss.
ground_truths: `Dict` holding all the ground truth tensors.
predictions: `Dict` holding all the predicted values.
Returns:
loss: `tf.float` scalar for the scaled loss.
"""
scale
=
tf
.
stop_gradient
(
3
/
len
(
list
(
predictions
.
keys
())))
return
loss
*
scale
def
cross_replica_aggregation
(
self
,
loss
,
num_replicas_in_sync
):
"""
In the scaled loss, take the sum of the loss across replicas.
"""
"""
this method is not specific to each loss path, but each loss type
"""
return
loss
...
...
@@ -582,7 +575,6 @@ class YoloLoss:
keys
,
classes
,
anchors
,
masks
=
None
,
path_strides
=
None
,
truth_thresholds
=
None
,
ignore_thresholds
=
None
,
...
...
@@ -606,8 +598,6 @@ class YoloLoss:
anchors: `List[List[int]]` for the anchor boxes that are used in the model
at all levels. For anchor free prediction set the anchor list to be the
same as the image resolution.
masks: `List[int]` for the output level that this specific model output
level
path_strides: `Dict[int]` for how much to scale this level to get the
orginal input shape for each FPN path.
truth_thresholds: `Dict[float]` for the IOU value over which the loss is
...
...
@@ -649,13 +639,12 @@ class YoloLoss:
loss_type
=
'scaled'
else
:
loss_type
=
'darknet'
self
.
_loss_dict
=
{}
for
key
in
keys
:
self
.
_loss_dict
[
key
]
=
losses
[
loss_type
](
classes
=
classes
,
anchors
=
anchors
,
mask
=
masks
[
key
],
anchors
=
anchors
[
key
],
truth_thresh
=
truth_thresholds
[
key
],
ignore_thresh
=
ignore_thresholds
[
key
],
loss_type
=
loss_types
[
key
],
...
...
@@ -691,7 +680,7 @@ class YoloLoss:
# after computing the loss, scale loss as needed for aggregation
# across FPN levels
loss
=
self
.
_loss_dict
[
key
].
post_path_aggregation
(
loss
,
ground_truth
,
predictions
)
loss
,
loss_box
,
loss_conf
,
loss_class
,
ground_truth
,
predictions
)
# after completing the scaling of the loss on each replica, handle
# scaling the loss for mergeing the loss across replicas
...
...
official/vision/beta/projects/yolo/losses/yolo_loss_test.py
View file @
9474c108
...
...
@@ -42,10 +42,9 @@ class YoloDecoderTest(parameterized.TestCase, tf.test.TestCase):
'5'
:
[
1
,
13
,
13
,
255
]
}
classes
=
80
masks
=
{
'3'
:
[
0
,
1
,
2
],
'4'
:
[
3
,
4
,
5
],
'5'
:
[
6
,
7
,
8
]}
anchors
=
[[
12.0
,
19.0
],
[
31.0
,
46.0
],
[
96.0
,
54.0
],
[
46.0
,
114.0
],
[
133.0
,
127.0
],
[
79.0
,
225.0
],
[
301.0
,
150.0
],
[
172.0
,
286.0
],
[
348.0
,
340.0
]]
anchors
=
{
'3'
:
[[
12.0
,
19.0
],
[
31.0
,
46.0
],
[
96.0
,
54.0
]],
'4'
:
[[
46.0
,
114.0
],[
133.0
,
127.0
],
[
79.0
,
225.0
]],
'5'
:
[[
301.0
,
150.0
],
[
172.0
,
286.0
],
[
348.0
,
340.0
]]}
keys
=
[
'3'
,
'4'
,
'5'
]
path_strides
=
{
key
:
2
**
int
(
key
)
for
key
in
keys
}
...
...
@@ -53,7 +52,6 @@ class YoloDecoderTest(parameterized.TestCase, tf.test.TestCase):
keys
,
classes
,
anchors
,
masks
=
masks
,
path_strides
=
path_strides
,
truth_thresholds
=
{
key
:
1.0
for
key
in
keys
},
ignore_thresholds
=
{
key
:
0.7
for
key
in
keys
},
...
...
official/vision/beta/projects/yolo/modeling/layers/detection_generator.py
View file @
9474c108
...
...
@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains common building blocks for yolo layer (detection layer)."""
import
tensorflow
as
tf
...
...
@@ -26,7 +25,6 @@ class YoloLayer(tf.keras.Model):
"""Yolo layer (detection generator)."""
def
__init__
(
self
,
masks
,
anchors
,
classes
,
iou_thresh
=
0.0
,
...
...
@@ -52,8 +50,6 @@ class YoloLayer(tf.keras.Model):
"""Parameters for the loss functions used at each detection head output.
Args:
masks: `List[int]` for the output level that this specific model output
level.
anchors: `List[List[int]]` for the anchor boxes that are used in the
model.
classes: `int` for the number of classes.
...
...
@@ -107,7 +103,6 @@ class YoloLayer(tf.keras.Model):
**kwargs: Addtional keyword arguments.
"""
super
().
__init__
(
**
kwargs
)
self
.
_masks
=
masks
self
.
_anchors
=
anchors
self
.
_thresh
=
iou_thresh
self
.
_ignore_thresh
=
ignore_thresh
...
...
@@ -127,30 +122,24 @@ class YoloLayer(tf.keras.Model):
self
.
_pre_nms_points
=
pre_nms_points
self
.
_label_smoothing
=
label_smoothing
self
.
_keys
=
list
(
masks
.
keys
())
self
.
_keys
=
list
(
anchors
.
keys
())
self
.
_len_keys
=
len
(
self
.
_keys
)
self
.
_box_type
=
box_type
self
.
_path_scale
=
path_scale
or
{
key
:
2
**
int
(
key
)
for
key
,
_
in
masks
.
items
()
}
self
.
_path_scale
=
path_scale
or
{
key
:
2
**
int
(
key
)
for
key
in
self
.
_keys
}
self
.
_nms_type
=
nms_type
self
.
_scale_xy
=
scale_xy
or
{
key
:
1.0
for
key
,
_
in
mask
s
.
items
()}
self
.
_scale_xy
=
scale_xy
or
{
key
:
1.0
for
key
,
_
in
anchor
s
.
items
()}
self
.
_generator
=
{}
self
.
_len_mask
=
{}
for
key
in
self
.
_keys
:
anchors
=
[
self
.
_anchors
[
mask
]
for
mask
in
self
.
_masks
[
key
]
]
self
.
_generator
[
key
]
=
self
.
get_generators
(
anchors
,
self
.
_path_scale
[
key
],
# pylint: disable=assignment-from-none
key
)
self
.
_len_mask
[
key
]
=
len
(
self
.
_masks
[
key
]
)
anchors
=
self
.
_anchors
[
key
]
self
.
_generator
[
key
]
=
loss_utils
.
GridGenerator
(
anchors
,
scale_anchors
=
self
.
_path_scale
[
key
]
)
self
.
_len_mask
[
key
]
=
len
(
anchors
)
return
def
get_generators
(
self
,
anchors
,
path_scale
,
path_key
):
anchor_generator
=
loss_utils
.
GridGenerator
(
anchors
,
scale_anchors
=
path_scale
)
return
anchor_generator
def
parse_prediction_path
(
self
,
key
,
inputs
):
shape_
=
tf
.
shape
(
inputs
)
shape
=
inputs
.
get_shape
().
as_list
()
...
...
@@ -290,7 +279,6 @@ class YoloLayer(tf.keras.Model):
keys
=
self
.
_keys
,
classes
=
self
.
_classes
,
anchors
=
self
.
_anchors
,
masks
=
self
.
_masks
,
path_strides
=
self
.
_path_scale
,
truth_thresholds
=
self
.
_truth_thresh
,
ignore_thresholds
=
self
.
_ignore_thresh
,
...
...
@@ -309,7 +297,6 @@ class YoloLayer(tf.keras.Model):
def
get_config
(
self
):
return
{
'masks'
:
dict
(
self
.
_masks
),
'anchors'
:
[
list
(
a
)
for
a
in
self
.
_anchors
],
'thresh'
:
self
.
_thresh
,
'max_boxes'
:
self
.
_max_boxes
,
...
...
official/vision/beta/projects/yolo/modeling/layers/detection_generator_test.py
View file @
9474c108
...
...
@@ -14,6 +14,7 @@
"""Tests for yolo detection generator."""
from
official.vision.beta.projects.yolo.ops
import
anchor
from
absl.testing
import
parameterized
import
tensorflow
as
tf
...
...
@@ -35,14 +36,13 @@ class YoloDecoderTest(parameterized.TestCase, tf.test.TestCase):
'5'
:
[
1
,
13
,
13
,
255
]
}
classes
=
80
masks
=
{
'3'
:
[
0
,
1
,
2
],
'4'
:
[
3
,
4
,
5
],
'5'
:
[
6
,
7
,
8
]}
anchors
=
[[
12.0
,
19.0
],
[
31.0
,
46.0
],
[
96.0
,
54.0
],
[
46.0
,
114.0
],
[
133.0
,
127.0
],
[
79.0
,
225.0
],
[
301.0
,
150.0
],
[
172.0
,
286.0
],
[
348.0
,
340.0
]]
box_type
=
{
key
:
'scaled'
for
key
in
masks
.
keys
()}
layer
=
dg
.
YoloLayer
(
masks
,
anchors
,
classes
,
box_type
=
box_type
,
max_boxes
=
10
)
anchors
=
{
'3'
:
[[
12.0
,
19.0
],
[
31.0
,
46.0
],
[
96.0
,
54.0
]],
'4'
:
[[
46.0
,
114.0
],[
133.0
,
127.0
],
[
79.0
,
225.0
]],
'5'
:
[[
301.0
,
150.0
],
[
172.0
,
286.0
],
[
348.0
,
340.0
]]}
box_type
=
{
key
:
'scaled'
for
key
in
anchors
.
keys
()}
layer
=
dg
.
YoloLayer
(
anchors
,
classes
,
box_type
=
box_type
,
max_boxes
=
10
)
inputs
=
{}
for
key
in
input_shape
:
...
...
official/vision/beta/projects/yolo/modeling/layers/nn_blocks.py
View file @
9474c108
...
...
@@ -1665,7 +1665,13 @@ class DarkRouteProcess(tf.keras.layers.Layer):
class
Reorg
(
tf
.
keras
.
layers
.
Layer
):
"""Splits a high resolution image into 4 lower resolution images.
Used in YOLOR to process very high resolution inputs efficiently.
for example an input image of [1280, 1280, 3] will become [640, 640, 12],
the images are sampled in such a way that the spatial resoltion is
retained.
"""
def
call
(
self
,
x
,
training
=
None
):
return
tf
.
concat
([
x
[...,
::
2
,
::
2
,
:],
x
[...,
1
::
2
,
::
2
,
:],
...
...
official/vision/beta/projects/yolo/modeling/yolo_model.py
View file @
9474c108
...
...
@@ -16,7 +16,6 @@
import
tensorflow
as
tf
# static base Yolo Models that do not require configuration
# similar to a backbone model id.
...
...
official/vision/beta/projects/yolo/ops/anchor.py
View file @
9474c108
...
...
@@ -13,12 +13,14 @@
# limitations under the License.
"""Yolo Anchor labler."""
import
numpy
as
np
import
tensorflow
as
tf
from
tensorflow.python.ops.gen_math_ops
import
maximum
,
minimum
from
official.vision.beta.projects.yolo.ops
import
box_ops
from
official.vision.beta.projects.yolo.ops
import
preprocessing_ops
from
official.vision.beta.projects.yolo.ops
import
loss_utils
INF
=
10000000
def
get_best_anchor
(
y_true
,
anchors
,
...
...
@@ -28,15 +30,22 @@ def get_best_anchor(y_true,
iou_thresh
=
0.25
,
best_match_only
=
False
,
use_tie_breaker
=
True
):
"""
get the correct anchor that is assoiciated with each box using IOU
"""Get the correct anchor that is assoiciated with each box using IOU.
Args:
y_true: tf.Tensor[] for the list of bounding boxes in the yolo format
y_true: tf.Tensor[] for the list of bounding boxes in the yolo format
.
anchors: list or tensor for the anchor boxes to be used in prediction
found via Kmeans
width: int for the image width
height: int for the image height
found via Kmeans.
width: int for the image width.
height: int for the image height.
iou_thresh: `float` the minimum iou threshold to use for selecting boxes for
each level.
best_match_only: `bool` if the box only has one match and it is less than
the iou threshold, when set to True, this match will be dropped as no
anchors can be linked to it.
use_tie_breaker: `bool` if there is many anchors for a given box, then
attempt to use all of them, if False, only the first matching box will
be used.
Return:
tf.Tensor: y_true with the anchor associated with each ground truth
box known
...
...
@@ -46,7 +55,10 @@ def get_best_anchor(y_true,
height
=
tf
.
cast
(
height
,
dtype
=
tf
.
float32
)
scaler
=
tf
.
convert_to_tensor
([
width
,
height
])
# scale to levels houts width and height
true_wh
=
tf
.
cast
(
y_true
[...,
2
:
4
],
dtype
=
tf
.
float32
)
*
scaler
# scale down from large anchor to small anchor type
anchors
=
tf
.
cast
(
anchors
,
dtype
=
tf
.
float32
)
/
stride
k
=
tf
.
shape
(
anchors
)[
0
]
...
...
@@ -71,7 +83,6 @@ def get_best_anchor(y_true,
values
=
-
values
ind_mask
=
tf
.
cast
(
values
<
iou_thresh
,
dtype
=
indexes
.
dtype
)
else
:
# iou_raw = box_ops.compute_iou(truth_comp, anchors)
truth_comp
=
box_ops
.
xcycwh_to_yxyx
(
truth_comp
)
anchors
=
box_ops
.
xcycwh_to_yxyx
(
anchors
)
iou_raw
=
box_ops
.
aggregated_comparitive_iou
(
...
...
@@ -80,7 +91,7 @@ def get_best_anchor(y_true,
iou_type
=
3
,
)
values
,
indexes
=
tf
.
math
.
top_k
(
iou_raw
,
#tf.transpose(iou_raw, perm=[0, 2, 1]),
iou_raw
,
k
=
tf
.
cast
(
k
,
dtype
=
tf
.
int32
),
sorted
=
True
)
ind_mask
=
tf
.
cast
(
values
>=
iou_thresh
,
dtype
=
indexes
.
dtype
)
...
...
@@ -102,18 +113,73 @@ def get_best_anchor(y_true,
return
tf
.
cast
(
iou_index
,
dtype
=
tf
.
float32
),
tf
.
cast
(
values
,
dtype
=
tf
.
float32
)
class
YoloAnchorLabeler
:
"""Anchor labeler for the Yolo Models"""
def
__init__
(
self
,
anchors
=
None
,
anchor_free_level_limits
=
None
,
level_strides
=
None
,
center_radius
=
None
,
max_num_instances
=
200
,
match_threshold
=
0.25
,
best_matches_only
=
False
,
use_tie_breaker
=
True
):
use_tie_breaker
=
True
,
darknet
=
False
,
dtype
=
'float32'
):
"""Initialization for anchor labler.
Args:
anchors: `Dict[List[Union[int, float]]]` values for each anchor box.
anchor_free_level_limits: `List` the box sizes that will be allowed at
each FPN level as is done in the FCOS and YOLOX paper for anchor free
box assignment.
level_strides: `Dict[int]` for how much the model scales down the
images at the each level.
center_radius: `Dict[float]` for radius around each box center to search
for extra centers in each level.
max_num_instances: `int` for the number of boxes to compute loss on.
match_threshold: `float` indicating the threshold over which an anchor
will be considered for prediction, at zero, all the anchors will be used
and at 1.0 only the best will be used. for anchor thresholds larger than
1.0 we stop using the IOU for anchor comparison and resort directly to
comparing the width and height, this is used for the scaled models.
best_matches_only: `boolean` indicating how boxes are selected for
optimization.
use_tie_breaker: `boolean` indicating whether to use the anchor threshold
value.
darknet: `boolean` indicating which data pipeline to use. Setting to True
swaps the pipeline to output images realtive to Yolov4 and older.
dtype: `str` indicating the output datatype of the datapipeline selecting
from {"float32", "float16", "bfloat16"}.
"""
self
.
anchors
=
anchors
self
.
masks
=
self
.
_get_mask
()
self
.
anchor_free_level_limits
=
self
.
_get_level_limits
(
anchor_free_level_limits
)
if
darknet
and
self
.
anchor_free_level_limits
is
None
:
center_radius
=
None
self
.
keys
=
self
.
anchors
.
keys
()
if
self
.
anchor_free_level_limits
is
not
None
:
maxim
=
2000
match_threshold
=
-
0.01
self
.
num_instances
=
{
key
:
maxim
for
key
in
self
.
keys
}
elif
not
darknet
:
self
.
num_instances
=
{
key
:
(
6
-
i
)
*
max_num_instances
for
i
,
key
in
enumerate
(
self
.
keys
)}
else
:
self
.
num_instances
=
{
key
:
max_num_instances
for
key
in
self
.
keys
}
self
.
center_radius
=
center_radius
self
.
level_strides
=
level_strides
self
.
match_threshold
=
match_threshold
self
.
best_matches_only
=
best_matches_only
self
.
use_tie_breaker
=
use_tie_breaker
self
.
dtype
=
dtype
def
_get_mask
(
self
):
"""For each level get indexs of each anchor for box search across levels."""
masks
=
{}
start
=
0
...
...
@@ -124,8 +190,21 @@ class YoloAnchorLabeler:
masks
[
str
(
i
)]
=
list
(
range
(
start
,
per_scale
+
start
))
start
+=
per_scale
return
masks
def
_get_level_limits
(
self
,
level_limits
):
"""For each level receptive feild range for anchor free box placement."""
if
level_limits
is
not
None
:
level_limits_dict
=
{}
level_limits
=
[
0.0
]
+
level_limits
+
[
np
.
inf
]
for
i
,
key
in
enumerate
(
self
.
anchors
.
keys
()):
level_limits_dict
[
key
]
=
level_limits
[
i
:
i
+
2
]
else
:
level_limits_dict
=
None
return
level_limits_dict
def
_tie_breaking_search
(
self
,
anchors
,
mask
,
boxes
,
classes
):
"""After search, link each anchor ind to the correct map in ground truth."""
mask
=
tf
.
cast
(
tf
.
reshape
(
mask
,
[
1
,
1
,
1
,
-
1
]),
anchors
.
dtype
)
anchors
=
tf
.
expand_dims
(
anchors
,
axis
=-
1
)
viable
=
tf
.
where
(
tf
.
squeeze
(
anchors
==
mask
,
axis
=
0
))
...
...
@@ -140,10 +219,12 @@ class YoloAnchorLabeler:
anchor_id
=
tf
.
cast
(
anchor_id
,
boxes
.
dtype
)
return
boxes
,
classes
,
anchor_id
def
_get_anchor_id
(
self
,
key
,
boxes
,
classes
,
anchors
,
width
,
height
,
stride
):
def
_get_anchor_id
(
self
,
key
,
boxes
,
classes
,
width
,
height
,
stride
,
iou_index
=
None
):
"""Find the object anchor assignments in an anchor based paradigm. """
# find the best anchor
anchors
=
self
.
anchors
[
key
]
num_anchors
=
len
(
anchors
)
if
self
.
best_matches_only
:
# get the best anchor for each box
...
...
@@ -153,28 +234,20 @@ class YoloAnchorLabeler:
iou_thresh
=
self
.
match_threshold
)
mask
=
range
(
num_anchors
)
else
:
# stitch and search boxes across fpn levels
anchorsvec
=
[]
for
stitch
in
self
.
anchors
.
keys
():
anchorsvec
.
extend
(
self
.
anchors
[
stitch
])
# get the best anchor for each box
iou_index
,
_
=
get_best_anchor
(
boxes
,
anchorsvec
,
stride
,
width
=
width
,
height
=
height
,
best_match_only
=
False
,
use_tie_breaker
=
self
.
use_tie_breaker
,
iou_thresh
=
self
.
match_threshold
)
# search is done across FPN levels, get the mask of anchor indexes
# corralated to this level.
mask
=
self
.
masks
[
key
]
# search for the correct box to use
(
boxes
,
classes
,
anchors
)
=
self
.
_tie_breaking_search
(
iou_index
,
mask
,
boxes
,
classes
)
(
boxes
,
classes
,
anchors
)
=
self
.
_tie_breaking_search
(
iou_index
,
mask
,
boxes
,
classes
)
return
boxes
,
classes
,
anchors
,
num_anchors
def
_get_centers
(
self
,
boxes
,
classes
,
anchors
,
width
,
height
,
offset
):
def
_get_centers
(
self
,
boxes
,
classes
,
anchors
,
width
,
height
,
scale_xy
):
"""Find the object center assignments in an anchor based paradigm. """
grid_xy
,
wh
=
tf
.
split
(
boxes
,
2
,
axis
=
-
1
)
offset
=
tf
.
cast
(
0.5
*
(
scale_xy
-
1
),
boxes
.
dtype
)
grid_xy
,
_
=
tf
.
split
(
boxes
,
2
,
axis
=
-
1
)
wh_scale
=
tf
.
cast
(
tf
.
convert_to_tensor
([
width
,
height
]),
boxes
.
dtype
)
grid_xy
=
grid_xy
*
wh_scale
...
...
@@ -234,16 +307,16 @@ class YoloAnchorLabeler:
return
boxes
,
classes
,
centers
def
_get_anchor_free
(
self
,
key
,
boxes
,
classes
,
height
,
width
,
stride
,
fpn_limits
,
center_radius
=
2.5
):
"""Find the box assignements in an anchor free paradigm. """
gen
=
loss_utils
.
GridGenerator
(
masks
=
None
,
anchors
=
[[
1
,
1
]],
scale_anchors
=
stride
)
center_radius
):
"""Find the box assignements in an anchor free paradigm."""
level_limits
=
self
.
anchor_free_level_limits
[
key
]
gen
=
loss_utils
.
GridGenerator
(
anchors
=
[[
1
,
1
]],
scale_anchors
=
stride
)
grid_points
=
gen
(
width
,
height
,
1
,
boxes
.
dtype
)[
0
]
grid_points
=
tf
.
squeeze
(
grid_points
,
axis
=
0
)
box_list
=
boxes
...
...
@@ -266,10 +339,10 @@ class YoloAnchorLabeler:
b_b
=
tlbr_boxes
[...,
2
]
-
y_centers
b_r
=
tlbr_boxes
[...,
3
]
-
x_centers
box_delta
=
tf
.
stack
([
b_t
,
b_l
,
b_b
,
b_r
],
axis
=-
1
)
if
fpn
_limits
is
not
None
:
if
level
_limits
is
not
None
:
max_reg_targets_per_im
=
tf
.
reduce_max
(
box_delta
,
axis
=-
1
)
gt_min
=
max_reg_targets_per_im
>=
fpn
_limits
[
0
]
gt_max
=
max_reg_targets_per_im
<=
fpn
_limits
[
1
]
gt_min
=
max_reg_targets_per_im
>=
level
_limits
[
0
]
gt_max
=
max_reg_targets_per_im
<=
level
_limits
[
1
]
is_in_boxes
=
tf
.
logical_and
(
gt_min
,
gt_max
)
else
:
is_in_boxes
=
tf
.
reduce_min
(
box_delta
,
axis
=-
1
)
>
0.0
...
...
@@ -290,11 +363,10 @@ class YoloAnchorLabeler:
is_in_boxes_and_center
=
tf
.
logical_and
(
is_in_index
,
is_in_boxes_and_center
)
if
self
.
use_tie_breaker
:
inf
=
10000000
boxes_all
=
tf
.
cast
(
is_in_boxes_and_center
,
area
.
dtype
)
boxes_all
=
((
boxes_all
*
area
)
+
((
1
-
boxes_all
)
*
inf
))
boxes_all
=
((
boxes_all
*
area
)
+
((
1
-
boxes_all
)
*
INF
))
boxes_min
=
tf
.
reduce_min
(
boxes_all
,
axis
=
-
1
,
keepdims
=
True
)
boxes_min
=
tf
.
where
(
boxes_min
==
inf
,
-
1.0
,
boxes_min
)
boxes_min
=
tf
.
where
(
boxes_min
==
INF
,
-
1.0
,
boxes_min
)
is_in_boxes_and_center
=
boxes_all
==
boxes_min
# construct the index update grid
...
...
@@ -314,33 +386,60 @@ class YoloAnchorLabeler:
indexes
=
tf
.
concat
([
y
,
x
,
tf
.
zeros_like
(
t
)],
axis
=-
1
)
return
indexes
,
samples
def
__call__
(
self
,
key
,
boxes
,
classes
,
anchors
,
width
,
height
,
stride
,
scale_xy
,
num_instances
,
fpn_limits
=
None
):
def
build_label_per_path
(
self
,
key
,
boxes
,
classes
,
width
,
height
,
iou_index
=
None
):
"""Builds the labels for one path."""
stride
=
self
.
level_strides
[
key
]
scale_xy
=
self
.
center_radius
[
key
]
if
self
.
center_radius
is
not
None
else
1
width
=
tf
.
cast
(
width
//
stride
,
boxes
.
dtype
)
height
=
tf
.
cast
(
height
//
stride
,
boxes
.
dtype
)
if
self
.
anchor_free_level_limits
is
None
:
(
boxes
,
classes
,
anchors
,
num_anchors
)
=
self
.
_get_anchor_id
(
key
,
boxes
,
classes
,
width
,
height
,
stride
,
iou_index
=
iou_index
)
boxes
,
classes
,
centers
=
self
.
_get_centers
(
boxes
,
classes
,
anchors
,
width
,
height
,
scale_xy
)
ind_mask
=
tf
.
ones_like
(
classes
)
updates
=
tf
.
concat
([
boxes
,
ind_mask
,
classes
],
axis
=
-
1
)
else
:
num_anchors
=
1
(
centers
,
updates
)
=
self
.
_get_anchor_free
(
key
,
boxes
,
classes
,
height
,
width
,
stride
,
scale_xy
)
boxes
,
ind_mask
,
classes
=
tf
.
split
(
updates
,
[
4
,
1
,
1
],
axis
=
-
1
)
width
=
tf
.
cast
(
width
,
tf
.
int32
)
height
=
tf
.
cast
(
height
,
tf
.
int32
)
full
=
tf
.
zeros
([
height
,
width
,
num_anchors
,
1
],
dtype
=
classes
.
dtype
)
full
=
tf
.
tensor_scatter_nd_add
(
full
,
centers
,
ind_mask
)
num_instances
=
int
(
self
.
num_instances
[
key
])
centers
=
preprocessing_ops
.
pad_max_instances
(
centers
,
num_instances
,
pad_value
=
0
,
pad_axis
=
0
)
updates
=
preprocessing_ops
.
pad_max_instances
(
updates
,
num_instances
,
pad_value
=
0
,
pad_axis
=
0
)
updates
=
tf
.
cast
(
updates
,
self
.
dtype
)
full
=
tf
.
cast
(
full
,
self
.
dtype
)
return
centers
,
updates
,
full
def
__call__
(
self
,
boxes
,
classes
,
width
,
height
):
"""Builds the labels for a single image, not functional in batch mode.
Args:
boxes: `Tensor` of shape [None, 4] indicating the object locations in
an image.
classes: `Tensor` of shape [None] indicating the each objects classes.
anchors: `List[List[int, float]]` representing the anchor boxes to build
the model against.
width: `int` for the images width.
height: `int` for the images height.
stride: `int` for how much the image gets scaled at this level.
scale_xy: `float` for the center shifts to apply when finding center
assignments for a box.
num_instances: `int` for the maximum number of expanded boxes to allow.
fpn_limits: `List[int]` given no anchor boxes this is used to limit the
boxes assied to the each fpn level based on the levels receptive feild.
Returns:
centers: `Tensor` of shape [None, 3] of indexes in the final grid where
...
...
@@ -349,35 +448,27 @@ class YoloAnchorLabeler:
full: `Tensor` of [width/stride, height/stride, num_anchors, 1] holding
a mask of where boxes are locates for confidence losses.
"""
boxes
=
box_ops
.
yxyx_to_xcycwh
(
boxes
)
indexes
=
{}
updates
=
{}
true_grids
=
{}
iou_index
=
None
width
//=
stride
height
//=
stride
width
=
tf
.
cast
(
width
,
boxes
.
dtype
)
height
=
tf
.
cast
(
height
,
boxes
.
dtype
)
if
fpn_limits
is
None
:
offset
=
tf
.
cast
(
0.5
*
(
scale_xy
-
1
),
boxes
.
dtype
)
(
boxes
,
classes
,
anchors
,
num_anchors
)
=
self
.
_get_anchor_id
(
key
,
boxes
,
classes
,
anchors
,
width
,
height
,
stride
)
boxes
,
classes
,
centers
=
self
.
_get_centers
(
boxes
,
classes
,
anchors
,
width
,
height
,
offset
)
ind_mask
=
tf
.
ones_like
(
classes
)
updates
=
tf
.
concat
([
boxes
,
ind_mask
,
classes
],
axis
=
-
1
)
else
:
(
centers
,
updates
)
=
self
.
_get_anchor_free
(
boxes
,
classes
,
height
,
width
,
stride
,
fpn_limits
)
boxes
,
ind_mask
,
classes
=
tf
.
split
(
updates
,
[
4
,
1
,
1
],
axis
=
-
1
)
num_anchors
=
1
boxes
=
box_ops
.
yxyx_to_xcycwh
(
boxes
)
if
not
self
.
best_matches_only
and
self
.
anchor_free_level_limits
is
None
:
# stitch and search boxes across fpn levels
anchorsvec
=
[]
for
stitch
in
self
.
anchors
:
anchorsvec
.
extend
(
self
.
anchors
[
stitch
])
stride
=
tf
.
cast
([
width
,
height
],
boxes
.
dtype
)
# get the best anchor for each box
iou_index
,
_
=
get_best_anchor
(
boxes
,
anchorsvec
,
stride
,
width
=
1.0
,
height
=
1.0
,
best_match_only
=
False
,
use_tie_breaker
=
self
.
use_tie_breaker
,
iou_thresh
=
self
.
match_threshold
)
width
=
tf
.
cast
(
width
,
tf
.
int32
)
height
=
tf
.
cast
(
height
,
tf
.
int32
)
full
=
tf
.
zeros
([
height
,
width
,
num_anchors
,
1
],
dtype
=
classes
.
dtype
)
full
=
tf
.
tensor_scatter_nd_add
(
full
,
centers
,
ind_mask
)
centers
=
preprocessing_ops
.
pad_max_instances
(
centers
,
int
(
num_instances
),
pad_value
=
0
,
pad_axis
=
0
)
updates
=
preprocessing_ops
.
pad_max_instances
(
updates
,
int
(
num_instances
),
pad_value
=
0
,
pad_axis
=
0
)
return
centers
,
updates
,
full
for
key
in
self
.
keys
:
indexes
[
key
],
updates
[
key
],
true_grids
[
key
]
=
self
.
build_label_per_path
(
key
,
boxes
,
classes
,
width
,
height
,
iou_index
=
iou_index
)
return
indexes
,
updates
,
true_grids
\ No newline at end of file
official/vision/beta/projects/yolo/ops/loss_utils.py
View file @
9474c108
...
...
@@ -13,6 +13,7 @@
# limitations under the License.
"""Yolo loss utility functions."""
import
numpy
as
np
import
tensorflow
as
tf
...
...
@@ -129,6 +130,10 @@ def build_grid(indexes, truths, preds, ind_mask, update=False, grid=None):
indexes
=
apply_mask
(
tf
.
cast
(
ind_mask
,
indexes
.
dtype
),
indexes
)
indexes
=
(
indexes
+
(
ind_mask
-
1
))
# mask truths
truths
=
apply_mask
(
tf
.
cast
(
ind_mask
,
truths
.
dtype
),
truths
)
truths
=
(
truths
+
(
tf
.
cast
(
ind_mask
,
truths
.
dtype
)
-
1
))
# reshape the indexes into the correct shape for the loss,
# just flatten all indexes but the last
indexes
=
tf
.
reshape
(
indexes
,
[
-
1
,
4
])
...
...
@@ -157,26 +162,16 @@ def build_grid(indexes, truths, preds, ind_mask, update=False, grid=None):
class
GridGenerator
:
"""Grid generator that generates anchor grids for box decoding."""
def
__init__
(
self
,
anchors
,
masks
=
None
,
scale_anchors
=
None
):
def
__init__
(
self
,
anchors
,
scale_anchors
=
None
):
"""Initialize Grid Generator.
Args:
anchors: A `List[List[int]]` for the anchor boxes that are used in the
model at all levels.
masks: A `List[int]` for the output level that this specific model output
Level.
scale_anchors: An `int` for how much to scale this level to get the
original input shape.
"""
self
.
dtype
=
tf
.
keras
.
backend
.
floatx
()
if
masks
is
not
None
:
self
.
_num
=
len
(
masks
)
else
:
self
.
_num
=
tf
.
shape
(
anchors
)[
0
]
if
masks
is
not
None
:
anchors
=
[
anchors
[
mask
]
for
mask
in
masks
]
self
.
_scale_anchors
=
scale_anchors
self
.
_anchors
=
tf
.
convert_to_tensor
(
anchors
)
return
...
...
@@ -331,18 +326,10 @@ class PairWiseSearch:
pred_classes
,
boxes
,
classes
,
scale
=
None
,
yxyx
=
True
,
clip_thresh
=
0.0
):
num_boxes
=
tf
.
shape
(
boxes
)[
-
2
]
num_tiles
=
(
num_boxes
//
TILE_SIZE
)
-
1
if
yxyx
:
boxes
=
box_ops
.
yxyx_to_xcycwh
(
boxes
)
if
scale
is
not
None
:
boxes
=
boxes
*
tf
.
stop_gradient
(
scale
)
if
self
.
_min_conf
>
0.0
:
pred_classes
=
tf
.
cast
(
pred_classes
>
self
.
_min_conf
,
pred_classes
.
dtype
)
...
...
@@ -540,7 +527,6 @@ def _anchor_free_scale_boxes(encoded_boxes,
height
,
stride
,
grid_points
,
scale_xy
,
darknet
=
False
):
"""Decode models boxes using FPN stride under anchor free conditions."""
# split the boxes
...
...
@@ -549,7 +535,6 @@ def _anchor_free_scale_boxes(encoded_boxes,
# build a scaling tensor to get the offset of th ebox relative to the image
scaler
=
tf
.
convert_to_tensor
([
height
,
width
,
height
,
width
])
scale_xy
=
tf
.
cast
(
scale_xy
,
encoded_boxes
.
dtype
)
scale_down
=
lambda
x
,
y
:
x
/
y
scale_up
=
lambda
x
,
y
:
x
*
y
...
...
@@ -557,10 +542,6 @@ def _anchor_free_scale_boxes(encoded_boxes,
scale_down
=
tf
.
grad_pass_through
(
scale_down
)
scale_up
=
tf
.
grad_pass_through
(
scale_up
)
# scale the centers and find the offset of each box relative to
# their center pixel
pred_xy
=
pred_xy
*
scale_xy
-
0.5
*
(
scale_xy
-
1
)
# scale the offsets and add them to the grid points or a tensor that is
# the realtive location of each pixel
box_xy
=
(
grid_points
+
pred_xy
)
...
...
@@ -624,7 +605,7 @@ def get_predicted_box(width,
if
box_type
==
'anchor_free'
:
(
scaler
,
scaled_box
,
pred_box
)
=
_anchor_free_scale_boxes
(
encoded_boxes
,
width
,
height
,
stride
,
grid_points
,
scale_xy
,
darknet
=
darknet
)
grid_points
,
darknet
=
darknet
)
elif
darknet
:
# pylint:disable=unbalanced-tuple-unpacking
...
...
official/vision/beta/projects/yolo/ops/mosaic.py
View file @
9474c108
...
...
@@ -17,7 +17,7 @@ import random
import
tensorflow
as
tf
import
tensorflow_addons
as
tfa
from
yolo.ops
import
preprocessing_ops
from
official.vision.beta.projects.
yolo.ops
import
preprocessing_ops
from
official.vision.beta.ops
import
box_ops
from
official.vision.beta.ops
import
preprocess_ops
...
...
@@ -396,4 +396,3 @@ class Mosaic:
return
self
.
_apply
else
:
return
self
.
_skip
\ No newline at end of file
official/vision/beta/projects/yolo/ops/preprocessing_ops.py
View file @
9474c108
...
...
@@ -4,8 +4,6 @@ import random
import
os
import
tensorflow_addons
as
tfa
from
official.vision.beta.projects.yolo.ops
import
box_ops
from
official.vision.beta.projects.yolo.ops
import
loss_utils
from
official.vision.beta.ops
import
box_ops
as
bbox_ops
PAD_VALUE
=
114
...
...
@@ -122,6 +120,11 @@ def pad_max_instances(value, instances, pad_value=0, pad_axis=0):
nshape
=
tf
.
concat
([
shape
[:
pad_axis
],
pad
,
shape
[(
pad_axis
+
1
):]],
axis
=
0
)
pad_tensor
=
tf
.
fill
(
nshape
,
tf
.
cast
(
pad_value
,
dtype
=
value
.
dtype
))
value
=
tf
.
concat
([
value
,
pad_tensor
],
axis
=
pad_axis
)
if
isinstance
(
instances
,
int
):
vshape
=
value
.
get_shape
().
as_list
()
vshape
[
pad_axis
]
=
instances
value
.
set_shape
(
vshape
)
return
value
...
...
@@ -317,10 +320,43 @@ def resize_and_jitter_image(image,
cut
=
None
,
method
=
tf
.
image
.
ResizeMethod
.
BILINEAR
,
seed
=
None
):
"""WIP"""
"""Resize, Pad, and distort a given input image.
Args:
image: a `Tensor` of shape [height, width, 3] representing an image.
desired_size: a `Tensor` or `int` list/tuple of two elements representing
[height, width] of the desired actual output image size.
jitter: an `int` representing the maximum jittering that can be applied to
the image.
letter_box: a `bool` representing if letterboxing should be applied.
random_pad: a `bool` representing if random padding should be applied.
crop_only: a `bool` representing if only cropping will be applied.
shiftx: a `float` indicating if the image is in the
left or right.
shifty: a `float` value indicating if the image is in the
top or bottom.
cut: a `float` value indicating the desired center of the final patched
image.
method: function to resize input image to scaled image.
seed: seed for random scale jittering.
Returns:
image_: a `Tensor` of shape [height, width, 3] where [height, width]
equals to `desired_size`.
infos: a 2D `Tensor` that encodes the information of the image and the
applied preprocessing. It is in the format of
[[original_height, original_width], [desired_height, desired_width],
[y_scale, x_scale], [y_offset, x_offset]], where [desired_height,
desired_width] is the actual scaled image size, and [y_scale, x_scale] is
the scaling factor, which is the ratio of
scaled dimension / original dimension.
cast([original_width, original_height, width, height, ptop, pleft, pbottom,
pright], tf.float32): a `Tensor` containing the information of the image
andthe applied preprocessing.
"""
def
intersection
(
a
,
b
):
"""Find the intersection
of 2 crop boxes.
"""
"""Find the intersection
between 2 crops
"""
minx
=
tf
.
maximum
(
a
[
0
],
b
[
0
])
miny
=
tf
.
maximum
(
a
[
1
],
b
[
1
])
maxx
=
tf
.
minimum
(
a
[
2
],
b
[
2
])
...
...
@@ -328,11 +364,10 @@ def resize_and_jitter_image(image,
return
tf
.
convert_to_tensor
([
minx
,
miny
,
maxx
,
maxy
])
def
cast
(
values
,
dtype
):
"""Cast a list of items to a givne data type to reduce lines of code"""
return
[
tf
.
cast
(
value
,
dtype
)
for
value
in
values
]
if
jitter
>
0.5
or
jitter
<
0
:
raise
Exception
(
"
maximum change in aspect ratio must be between 0 and 0.5
"
)
raise
Exception
(
'
maximum change in aspect ratio must be between 0 and 0.5
'
)
with
tf
.
name_scope
(
'resize_and_jitter_image'
):
# Cast all parameters to a usable float data type.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment