Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
a15e242e
Commit
a15e242e
authored
Sep 17, 2021
by
Vishnu Banna
Browse files
config
parent
b768c248
Changes
10
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
2267 additions
and
1053 deletions
+2267
-1053
official/vision/beta/projects/yolo/dataloaders/classification_tfds_decoder.py
.../projects/yolo/dataloaders/classification_tfds_decoder.py
+0
-34
official/vision/beta/projects/yolo/dataloaders/classification_vision.py
...n/beta/projects/yolo/dataloaders/classification_vision.py
+118
-0
official/vision/beta/projects/yolo/dataloaders/yolo_detection_input.py
...on/beta/projects/yolo/dataloaders/yolo_detection_input.py
+0
-319
official/vision/beta/projects/yolo/dataloaders/yolo_detection_input_test.py
...ta/projects/yolo/dataloaders/yolo_detection_input_test.py
+0
-103
official/vision/beta/projects/yolo/dataloaders/yolo_input.py
official/vision/beta/projects/yolo/dataloaders/yolo_input.py
+511
-0
official/vision/beta/projects/yolo/ops/loss_utils.py
official/vision/beta/projects/yolo/ops/loss_utils.py
+20
-6
official/vision/beta/projects/yolo/ops/mosaic.py
official/vision/beta/projects/yolo/ops/mosaic.py
+389
-0
official/vision/beta/projects/yolo/ops/preprocess_ops.py
official/vision/beta/projects/yolo/ops/preprocess_ops.py
+0
-523
official/vision/beta/projects/yolo/ops/preprocess_ops_test.py
...cial/vision/beta/projects/yolo/ops/preprocess_ops_test.py
+0
-68
official/vision/beta/projects/yolo/ops/preprocessing_ops.py
official/vision/beta/projects/yolo/ops/preprocessing_ops.py
+1229
-0
No files found.
official/vision/beta/projects/yolo/dataloaders/classification_tfds_decoder.py
deleted
100644 → 0
View file @
b768c248
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""TFDS Classification decoder."""
import
tensorflow
as
tf
from
official.vision.beta.dataloaders
import
decoder
class
Decoder
(
decoder
.
Decoder
):
"""A tf.Example decoder for classification task."""
def
__init__
(
self
):
return
def
decode
(
self
,
serialized_example
):
sample_dict
=
{
'image/encoded'
:
tf
.
io
.
encode_jpeg
(
serialized_example
[
'image'
],
quality
=
100
),
'image/class/label'
:
serialized_example
[
'label'
],
}
return
sample_dict
official/vision/beta/projects/yolo/dataloaders/classification_vision.py
0 → 100755
View file @
a15e242e
"""Classification parser."""
# Import libraries
import
tensorflow
as
tf
import
tensorflow_datasets
as
tfds
import
tensorflow_addons
as
tfa
from
official.vision.beta.dataloaders
import
parser
from
official.vision.beta.ops
import
preprocess_ops
from
official.vision.beta.ops
import
augment
class
Parser
(
parser
.
Parser
):
"""Parser to parse an image and its annotations into a dictionary of tensors."""
def
__init__
(
self
,
output_size
,
aug_policy
,
scale
=
[
128
,
448
],
dtype
=
'float32'
):
"""Initializes parameters for parsing annotations in the dataset.
Args:
output_size: `Tensor` or `list` for [height, width] of output image. The
output_size should be divided by the largest feature stride 2^max_level.
num_classes: `float`, number of classes.
aug_rand_saturation: `bool`, if True, augment training with random
saturation.
aug_rand_brightness: `bool`, if True, augment training with random
brightness.
aug_rand_zoom: `bool`, if True, augment training with random
zoom.
aug_rand_rotate: `bool`, if True, augment training with random
rotate.
aug_rand_hue: `bool`, if True, augment training with random
hue.
aug_rand_aspect: `bool`, if True, augment training with random
aspect.
scale: 'list', `Tensor` or `list` for [low, high] of the bounds of the
random scale.
seed: an `int` for the seed used by tf.random
"""
self
.
_output_size
=
output_size
if
aug_policy
:
if
aug_policy
==
'autoaug'
:
self
.
_augmenter
=
augment
.
AutoAugment
()
elif
aug_policy
==
'randaug'
:
self
.
_augmenter
=
augment
.
RandAugment
(
num_layers
=
2
,
magnitude
=
20
)
else
:
raise
ValueError
(
'Augmentation policy {} not supported.'
.
format
(
aug_policy
))
else
:
self
.
_augmenter
=
None
self
.
_scale
=
scale
if
dtype
==
'float32'
:
self
.
_dtype
=
tf
.
float32
elif
dtype
==
'float16'
:
self
.
_dtype
=
tf
.
float16
elif
dtype
==
'bfloat16'
:
self
.
_dtype
=
tf
.
bfloat16
else
:
raise
ValueError
(
'dtype {!r} is not supported!'
.
format
(
dtype
))
def
_parse_train_data
(
self
,
decoded_tensors
):
"""Generates images and labels that are usable for model training.
Args:
decoded_tensors: a dict of Tensors produced by the decoder.
Returns:
images: the image tensor.
labels: a dict of Tensors that contains labels.
"""
image
=
tf
.
io
.
decode_image
(
decoded_tensors
[
'image/encoded'
])
image
.
set_shape
((
None
,
None
,
3
))
image
=
tf
.
image
.
resize_with_pad
(
image
,
target_width
=
self
.
_output_size
[
0
],
target_height
=
self
.
_output_size
[
1
])
scale
=
tf
.
random
.
uniform
([],
minval
=
self
.
_scale
[
0
],
maxval
=
self
.
_scale
[
1
],
dtype
=
tf
.
int32
)
if
scale
>
self
.
_output_size
[
0
]:
image
=
tf
.
image
.
resize_with_crop_or_pad
(
image
,
target_height
=
scale
,
target_width
=
scale
)
else
:
image
=
tf
.
image
.
random_crop
(
image
,
(
scale
,
scale
,
3
))
if
self
.
_augmenter
is
not
None
:
image
=
self
.
_augmenter
.
distort
(
image
)
image
=
tf
.
image
.
random_flip_left_right
(
image
)
image
=
tf
.
cast
(
image
,
tf
.
float32
)
/
255
image
=
tf
.
image
.
resize
(
image
,
(
self
.
_output_size
[
0
],
self
.
_output_size
[
1
]))
label
=
decoded_tensors
[
'image/class/label'
]
return
image
,
label
def
_parse_eval_data
(
self
,
decoded_tensors
):
"""Generates images and labels that are usable for model evaluation.
Args:
decoded_tensors: a dict of Tensors produced by the decoder.
Returns:
images: the image tensor.
labels: a dict of Tensors that contains labels.
"""
image
=
tf
.
io
.
decode_image
(
decoded_tensors
[
'image/encoded'
])
image
.
set_shape
((
None
,
None
,
3
))
image
=
tf
.
cast
(
image
,
tf
.
float32
)
image
=
tf
.
image
.
resize_with_pad
(
image
,
target_width
=
self
.
_output_size
[
0
],
target_height
=
self
.
_output_size
[
1
])
# Final Output Shape
image
=
image
/
255.
# Normalize
#label = tf.one_hot(decoded_tensors['image/class/label'], self._num_classes)
label
=
decoded_tensors
[
'image/class/label'
]
return
image
,
label
official/vision/beta/projects/yolo/dataloaders/yolo_detection_input.py
deleted
100644 → 0
View file @
b768c248
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Detection Data parser and processing for YOLO.
Parse image and ground truths in a dataset to training targets and package them
into (image, labels) tuple for RetinaNet.
"""
import
tensorflow
as
tf
from
official.vision.beta.dataloaders
import
parser
from
official.vision.beta.ops
import
box_ops
from
official.vision.beta.ops
import
preprocess_ops
from
official.vision.beta.projects.yolo.ops
import
box_ops
as
yolo_box_ops
from
official.vision.beta.projects.yolo.ops
import
preprocess_ops
as
yolo_preprocess_ops
class
Parser
(
parser
.
Parser
):
"""Parser to parse an image and its annotations into a dictionary of tensors."""
def
__init__
(
self
,
output_size
,
num_classes
,
fixed_size
=
True
,
jitter_im
=
0.1
,
jitter_boxes
=
0.005
,
use_tie_breaker
=
True
,
min_level
=
3
,
max_level
=
5
,
masks
=
None
,
max_process_size
=
608
,
min_process_size
=
320
,
max_num_instances
=
200
,
random_flip
=
True
,
aug_rand_saturation
=
True
,
aug_rand_brightness
=
True
,
aug_rand_zoom
=
True
,
aug_rand_hue
=
True
,
anchors
=
None
,
seed
=
10
,
dtype
=
tf
.
float32
):
"""Initializes parameters for parsing annotations in the dataset.
Args:
output_size: a `Tuple` for (width, height) of input image.
num_classes: a `Tensor` or `int` for the number of classes.
fixed_size: a `bool` if True all output images have the same size.
jitter_im: a `float` representing a pixel value that is the maximum jitter
applied to the image for data augmentation during training.
jitter_boxes: a `float` representing a pixel value that is the maximum
jitter applied to the bounding box for data augmentation during
training.
use_tie_breaker: boolean value for wether or not to use the tie_breaker.
min_level: `int` number of minimum level of the output feature pyramid.
max_level: `int` number of maximum level of the output feature pyramid.
masks: a `Tensor`, `List` or `numpy.ndarray` for anchor masks.
max_process_size: an `int` for maximum image width and height.
min_process_size: an `int` for minimum image width and height.
max_num_instances: an `int` number of maximum number of instances in an
image.
random_flip: a `bool` if True, augment training with random horizontal
flip.
aug_rand_saturation: `bool`, if True, augment training with random
saturation.
aug_rand_brightness: `bool`, if True, augment training with random
brightness.
aug_rand_zoom: `bool`, if True, augment training with random zoom.
aug_rand_hue: `bool`, if True, augment training with random hue.
anchors: a `Tensor`, `List` or `numpy.ndarrray` for bounding box priors.
seed: an `int` for the seed used by tf.random
dtype: a `tf.dtypes.DType` object that represents the dtype the outputs
will be casted to. The available types are tf.float32, tf.float16, or
tf.bfloat16.
"""
self
.
_net_down_scale
=
2
**
max_level
self
.
_num_classes
=
num_classes
self
.
_image_w
=
(
output_size
[
0
]
//
self
.
_net_down_scale
)
*
self
.
_net_down_scale
self
.
_image_h
=
(
output_size
[
1
]
//
self
.
_net_down_scale
)
*
self
.
_net_down_scale
self
.
_max_process_size
=
max_process_size
self
.
_min_process_size
=
min_process_size
self
.
_fixed_size
=
fixed_size
self
.
_anchors
=
anchors
self
.
_masks
=
{
key
:
tf
.
convert_to_tensor
(
value
)
for
key
,
value
in
masks
.
items
()
}
self
.
_use_tie_breaker
=
use_tie_breaker
self
.
_jitter_im
=
0.0
if
jitter_im
is
None
else
jitter_im
self
.
_jitter_boxes
=
0.0
if
jitter_boxes
is
None
else
jitter_boxes
self
.
_max_num_instances
=
max_num_instances
self
.
_random_flip
=
random_flip
self
.
_aug_rand_saturation
=
aug_rand_saturation
self
.
_aug_rand_brightness
=
aug_rand_brightness
self
.
_aug_rand_zoom
=
aug_rand_zoom
self
.
_aug_rand_hue
=
aug_rand_hue
self
.
_seed
=
seed
self
.
_dtype
=
dtype
def
_build_grid
(
self
,
raw_true
,
width
,
batch
=
False
,
use_tie_breaker
=
False
):
mask
=
self
.
_masks
for
key
in
self
.
_masks
.
keys
():
if
not
batch
:
mask
[
key
]
=
yolo_preprocess_ops
.
build_grided_gt
(
raw_true
,
self
.
_masks
[
key
],
width
//
2
**
int
(
key
),
raw_true
[
'bbox'
].
dtype
,
use_tie_breaker
)
else
:
mask
[
key
]
=
yolo_preprocess_ops
.
build_batch_grided_gt
(
raw_true
,
self
.
_masks
[
key
],
width
//
2
**
int
(
key
),
raw_true
[
'bbox'
].
dtype
,
use_tie_breaker
)
return
mask
def
_parse_train_data
(
self
,
data
):
"""Generates images and labels that are usable for model training.
Args:
data: a dict of Tensors produced by the decoder.
Returns:
images: the image tensor.
labels: a dict of Tensors that contains labels.
"""
shape
=
tf
.
shape
(
data
[
'image'
])
image
=
data
[
'image'
]
/
255
boxes
=
data
[
'groundtruth_boxes'
]
width
=
shape
[
0
]
height
=
shape
[
1
]
image
,
boxes
=
yolo_preprocess_ops
.
fit_preserve_aspect_ratio
(
image
,
boxes
,
width
=
width
,
height
=
height
,
target_dim
=
self
.
_max_process_size
)
image_shape
=
tf
.
shape
(
image
)[:
2
]
if
self
.
_random_flip
:
image
,
boxes
,
_
=
preprocess_ops
.
random_horizontal_flip
(
image
,
boxes
,
seed
=
self
.
_seed
)
randscale
=
self
.
_image_w
//
self
.
_net_down_scale
if
not
self
.
_fixed_size
:
do_scale
=
tf
.
greater
(
tf
.
random
.
uniform
([],
minval
=
0
,
maxval
=
1
,
seed
=
self
.
_seed
),
0.5
)
if
do_scale
:
# This scales the image to a random multiple of net_down_scale
# between 320 to 608
randscale
=
tf
.
random
.
uniform
(
[],
minval
=
self
.
_min_process_size
//
self
.
_net_down_scale
,
maxval
=
self
.
_max_process_size
//
self
.
_net_down_scale
,
seed
=
self
.
_seed
,
dtype
=
tf
.
int32
)
*
self
.
_net_down_scale
if
self
.
_jitter_boxes
!=
0.0
:
boxes
=
box_ops
.
denormalize_boxes
(
boxes
,
image_shape
)
boxes
=
box_ops
.
jitter_boxes
(
boxes
,
0.025
)
boxes
=
box_ops
.
normalize_boxes
(
boxes
,
image_shape
)
# YOLO loss function uses x-center, y-center format
boxes
=
yolo_box_ops
.
yxyx_to_xcycwh
(
boxes
)
if
self
.
_jitter_im
!=
0.0
:
image
,
boxes
=
yolo_preprocess_ops
.
random_translate
(
image
,
boxes
,
self
.
_jitter_im
,
seed
=
self
.
_seed
)
if
self
.
_aug_rand_zoom
:
image
,
boxes
=
yolo_preprocess_ops
.
resize_crop_filter
(
image
,
boxes
,
default_width
=
self
.
_image_w
,
default_height
=
self
.
_image_h
,
target_width
=
randscale
,
target_height
=
randscale
)
image
=
tf
.
image
.
resize
(
image
,
(
416
,
416
),
preserve_aspect_ratio
=
False
)
if
self
.
_aug_rand_brightness
:
image
=
tf
.
image
.
random_brightness
(
image
=
image
,
max_delta
=
.
1
)
# Brightness
if
self
.
_aug_rand_saturation
:
image
=
tf
.
image
.
random_saturation
(
image
=
image
,
lower
=
0.75
,
upper
=
1.25
)
# Saturation
if
self
.
_aug_rand_hue
:
image
=
tf
.
image
.
random_hue
(
image
=
image
,
max_delta
=
.
3
)
# Hue
image
=
tf
.
clip_by_value
(
image
,
0.0
,
1.0
)
# Find the best anchor for the ground truth labels to maximize the iou
best_anchors
=
yolo_preprocess_ops
.
get_best_anchor
(
boxes
,
self
.
_anchors
,
width
=
self
.
_image_w
,
height
=
self
.
_image_h
)
# Padding
boxes
=
preprocess_ops
.
clip_or_pad_to_fixed_size
(
boxes
,
self
.
_max_num_instances
,
0
)
classes
=
preprocess_ops
.
clip_or_pad_to_fixed_size
(
data
[
'groundtruth_classes'
],
self
.
_max_num_instances
,
-
1
)
best_anchors
=
preprocess_ops
.
clip_or_pad_to_fixed_size
(
best_anchors
,
self
.
_max_num_instances
,
0
)
area
=
preprocess_ops
.
clip_or_pad_to_fixed_size
(
data
[
'groundtruth_area'
],
self
.
_max_num_instances
,
0
)
is_crowd
=
preprocess_ops
.
clip_or_pad_to_fixed_size
(
tf
.
cast
(
data
[
'groundtruth_is_crowd'
],
tf
.
int32
),
self
.
_max_num_instances
,
0
)
labels
=
{
'source_id'
:
data
[
'source_id'
],
'bbox'
:
tf
.
cast
(
boxes
,
self
.
_dtype
),
'classes'
:
tf
.
cast
(
classes
,
self
.
_dtype
),
'area'
:
tf
.
cast
(
area
,
self
.
_dtype
),
'is_crowd'
:
is_crowd
,
'best_anchors'
:
tf
.
cast
(
best_anchors
,
self
.
_dtype
),
'width'
:
width
,
'height'
:
height
,
'num_detections'
:
tf
.
shape
(
data
[
'groundtruth_classes'
])[
0
],
}
if
self
.
_fixed_size
:
grid
=
self
.
_build_grid
(
labels
,
self
.
_image_w
,
use_tie_breaker
=
self
.
_use_tie_breaker
)
labels
.
update
({
'grid_form'
:
grid
})
return
image
,
labels
def
_parse_eval_data
(
self
,
data
):
"""Generates images and labels that are usable for model training.
Args:
data: a dict of Tensors produced by the decoder.
Returns:
images: the image tensor.
labels: a dict of Tensors that contains labels.
"""
shape
=
tf
.
shape
(
data
[
'image'
])
image
=
data
[
'image'
]
/
255
boxes
=
data
[
'groundtruth_boxes'
]
width
=
shape
[
0
]
height
=
shape
[
1
]
image
,
boxes
=
yolo_preprocess_ops
.
fit_preserve_aspect_ratio
(
image
,
boxes
,
width
=
width
,
height
=
height
,
target_dim
=
self
.
_image_w
)
boxes
=
yolo_box_ops
.
yxyx_to_xcycwh
(
boxes
)
# Find the best anchor for the ground truth labels to maximize the iou
best_anchors
=
yolo_preprocess_ops
.
get_best_anchor
(
boxes
,
self
.
_anchors
,
width
=
self
.
_image_w
,
height
=
self
.
_image_h
)
boxes
=
yolo_preprocess_ops
.
pad_max_instances
(
boxes
,
self
.
_max_num_instances
,
0
)
classes
=
yolo_preprocess_ops
.
pad_max_instances
(
data
[
'groundtruth_classes'
],
self
.
_max_num_instances
,
0
)
best_anchors
=
yolo_preprocess_ops
.
pad_max_instances
(
best_anchors
,
self
.
_max_num_instances
,
0
)
area
=
yolo_preprocess_ops
.
pad_max_instances
(
data
[
'groundtruth_area'
],
self
.
_max_num_instances
,
0
)
is_crowd
=
yolo_preprocess_ops
.
pad_max_instances
(
tf
.
cast
(
data
[
'groundtruth_is_crowd'
],
tf
.
int32
),
self
.
_max_num_instances
,
0
)
labels
=
{
'source_id'
:
data
[
'source_id'
],
'bbox'
:
tf
.
cast
(
boxes
,
self
.
_dtype
),
'classes'
:
tf
.
cast
(
classes
,
self
.
_dtype
),
'area'
:
tf
.
cast
(
area
,
self
.
_dtype
),
'is_crowd'
:
is_crowd
,
'best_anchors'
:
tf
.
cast
(
best_anchors
,
self
.
_dtype
),
'width'
:
width
,
'height'
:
height
,
'num_detections'
:
tf
.
shape
(
data
[
'groundtruth_classes'
])[
0
],
}
grid
=
self
.
_build_grid
(
labels
,
self
.
_image_w
,
batch
=
False
,
use_tie_breaker
=
self
.
_use_tie_breaker
)
labels
.
update
({
'grid_form'
:
grid
})
return
image
,
labels
def
_postprocess_fn
(
self
,
image
,
label
):
randscale
=
self
.
_image_w
//
self
.
_net_down_scale
if
not
self
.
_fixed_size
:
do_scale
=
tf
.
greater
(
tf
.
random
.
uniform
([],
minval
=
0
,
maxval
=
1
,
seed
=
self
.
_seed
),
0.5
)
if
do_scale
:
# This scales the image to a random multiple of net_down_scale
# between 320 to 608
randscale
=
tf
.
random
.
uniform
(
[],
minval
=
self
.
_min_process_size
//
self
.
_net_down_scale
,
maxval
=
self
.
_max_process_size
//
self
.
_net_down_scale
,
seed
=
self
.
_seed
,
dtype
=
tf
.
int32
)
*
self
.
_net_down_scale
width
=
randscale
image
=
tf
.
image
.
resize
(
image
,
(
width
,
width
))
grid
=
self
.
_build_grid
(
label
,
width
,
batch
=
True
,
use_tie_breaker
=
self
.
_use_tie_breaker
)
label
.
update
({
'grid_form'
:
grid
})
return
image
,
label
def
postprocess_fn
(
self
,
is_training
=
True
):
return
self
.
_postprocess_fn
if
not
self
.
_fixed_size
and
is_training
else
None
official/vision/beta/projects/yolo/dataloaders/yolo_detection_input_test.py
deleted
100644 → 0
View file @
b768c248
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Test case for YOLO detection dataloader configuration definition."""
from
absl.testing
import
parameterized
import
dataclasses
import
tensorflow
as
tf
from
official.core
import
config_definitions
as
cfg
from
official.core
import
input_reader
from
official.modeling
import
hyperparams
from
official.vision.beta.dataloaders
import
tfds_detection_decoders
from
official.vision.beta.projects.yolo.dataloaders
import
yolo_detection_input
@
dataclasses
.
dataclass
class
Parser
(
hyperparams
.
Config
):
"""Dummy configuration for parser."""
output_size
:
int
=
(
416
,
416
)
num_classes
:
int
=
80
fixed_size
:
bool
=
True
jitter_im
:
float
=
0.1
jitter_boxes
:
float
=
0.005
min_process_size
:
int
=
320
max_process_size
:
int
=
608
max_num_instances
:
int
=
200
random_flip
:
bool
=
True
seed
:
int
=
10
shuffle_buffer_size
:
int
=
10000
@
dataclasses
.
dataclass
class
DataConfig
(
cfg
.
DataConfig
):
"""Input config for training."""
input_path
:
str
=
''
tfds_name
:
str
=
'coco/2017'
tfds_split
:
str
=
'train'
global_batch_size
:
int
=
10
is_training
:
bool
=
True
dtype
:
str
=
'float16'
decoder
=
None
parser
:
Parser
=
Parser
()
shuffle_buffer_size
:
int
=
10
class
YoloDetectionInputTest
(
tf
.
test
.
TestCase
,
parameterized
.
TestCase
):
@
parameterized
.
named_parameters
((
'training'
,
True
),
(
'testing'
,
False
))
def
test_yolo_input
(
self
,
is_training
):
params
=
DataConfig
(
is_training
=
is_training
)
decoder
=
tfds_detection_decoders
.
MSCOCODecoder
()
anchors
=
[[
12.0
,
19.0
],
[
31.0
,
46.0
],
[
96.0
,
54.0
],
[
46.0
,
114.0
],
[
133.0
,
127.0
],
[
79.0
,
225.0
],
[
301.0
,
150.0
],
[
172.0
,
286.0
],
[
348.0
,
340.0
]]
masks
=
{
'3'
:
[
0
,
1
,
2
],
'4'
:
[
3
,
4
,
5
],
'5'
:
[
6
,
7
,
8
]}
parser
=
yolo_detection_input
.
Parser
(
output_size
=
params
.
parser
.
output_size
,
num_classes
=
params
.
parser
.
num_classes
,
fixed_size
=
params
.
parser
.
fixed_size
,
jitter_im
=
params
.
parser
.
jitter_im
,
jitter_boxes
=
params
.
parser
.
jitter_boxes
,
min_process_size
=
params
.
parser
.
min_process_size
,
max_process_size
=
params
.
parser
.
max_process_size
,
max_num_instances
=
params
.
parser
.
max_num_instances
,
random_flip
=
params
.
parser
.
random_flip
,
seed
=
params
.
parser
.
seed
,
anchors
=
anchors
,
masks
=
masks
)
postprocess_fn
=
parser
.
postprocess_fn
(
is_training
=
is_training
)
reader
=
input_reader
.
InputReader
(
params
,
dataset_fn
=
tf
.
data
.
TFRecordDataset
,
decoder_fn
=
decoder
.
decode
,
parser_fn
=
parser
.
parse_fn
(
params
.
is_training
))
dataset
=
reader
.
read
(
input_context
=
None
).
batch
(
10
).
take
(
1
)
if
postprocess_fn
:
image
,
_
=
postprocess_fn
(
*
tf
.
data
.
experimental
.
get_single_element
(
dataset
))
else
:
image
,
_
=
tf
.
data
.
experimental
.
get_single_element
(
dataset
)
print
(
image
.
shape
)
self
.
assertAllEqual
(
image
.
shape
,
(
10
,
10
,
416
,
416
,
3
))
self
.
assertTrue
(
tf
.
reduce_all
(
tf
.
math
.
logical_and
(
image
>=
0
,
image
<=
1
)))
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/beta/projects/yolo/dataloaders/yolo_input.py
0 → 100755
View file @
a15e242e
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Detection Data parser and processing for YOLO."""
import
tensorflow
as
tf
import
numpy
as
np
from
official.vision.beta.projects.yolo.ops
import
preprocessing_ops
from
official.vision.beta.projects.yolo.ops
import
box_ops
as
box_utils
from
official.vision.beta.ops
import
preprocess_ops
from
official.vision.beta.dataloaders
import
parser
,
utils
def
_coco91_to_80
(
classif
,
box
,
areas
,
iscrowds
):
"""Function used to reduce COCO 91 to COCO 80, or to convert from the 2017
foramt to the 2014 format"""
# Vector where index i coralates to the class at index[i].
x
=
[
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
27
,
28
,
31
,
32
,
33
,
34
,
35
,
36
,
37
,
38
,
39
,
40
,
41
,
42
,
43
,
44
,
46
,
47
,
48
,
49
,
50
,
51
,
52
,
53
,
54
,
55
,
56
,
57
,
58
,
59
,
60
,
61
,
62
,
63
,
64
,
65
,
67
,
70
,
72
,
73
,
74
,
75
,
76
,
77
,
78
,
79
,
80
,
81
,
82
,
84
,
85
,
86
,
87
,
88
,
89
,
90
]
no
=
tf
.
expand_dims
(
tf
.
convert_to_tensor
(
x
),
axis
=
0
)
# Resahpe the classes to in order to build a class mask.
ce
=
tf
.
expand_dims
(
classif
,
axis
=-
1
)
# One hot the classificiations to match the 80 class format.
ind
=
ce
==
tf
.
cast
(
no
,
ce
.
dtype
)
# Select the max values.
co
=
tf
.
reshape
(
tf
.
math
.
argmax
(
tf
.
cast
(
ind
,
tf
.
float32
),
axis
=-
1
),
[
-
1
])
ind
=
tf
.
where
(
tf
.
reduce_any
(
ind
,
axis
=-
1
))
# Gather the valuable instances.
classif
=
tf
.
gather_nd
(
co
,
ind
)
box
=
tf
.
gather_nd
(
box
,
ind
)
areas
=
tf
.
gather_nd
(
areas
,
ind
)
iscrowds
=
tf
.
gather_nd
(
iscrowds
,
ind
)
# Restate the number of viable detections, ideally it should be the same.
num_detections
=
tf
.
shape
(
classif
)[
0
]
return
classif
,
box
,
areas
,
iscrowds
,
num_detections
class
Parser
(
parser
.
Parser
):
"""Parse the dataset in to the YOLO model format. """
def
__init__
(
self
,
output_size
,
masks
,
anchors
,
strides
,
anchor_free_limits
=
None
,
max_num_instances
=
200
,
area_thresh
=
0.1
,
aug_rand_hue
=
1.0
,
aug_rand_saturation
=
1.0
,
aug_rand_brightness
=
1.0
,
letter_box
=
False
,
random_pad
=
True
,
random_flip
=
True
,
jitter
=
0.0
,
aug_scale_min
=
1.0
,
aug_scale_max
=
1.0
,
aug_rand_translate
=
0.0
,
aug_rand_perspective
=
0.0
,
aug_rand_angle
=
0.0
,
anchor_t
=
4.0
,
scale_xy
=
None
,
best_match_only
=
False
,
coco91to80
=
False
,
darknet
=
False
,
use_tie_breaker
=
True
,
dtype
=
'float32'
,
seed
=
None
,
):
"""Initializes parameters for parsing annotations in the dataset.
Args:
output_size: `Tensor` or `List` for [height, width] of output image. The
output_size should be divided by the largest feature stride 2^max_level.
masks: `Dict[List[int]]` of values indicating the indexes in the
list of anchor boxes to use an each prediction level between min_level
and max_level. each level must have a list of indexes.
anchors: `List[List[Union[int, float]]]` values for each anchor box.
strides: `Dict[int]` for how much the model scales down the images at the
largest level.
anchor_free_limits: `List` the box sizes that will be allowed at each FPN
level as is done in the FCOS and YOLOX paper for anchor free box
assignment. Anchor free will perform worse than Anchor based, but only
slightly.
max_num_instances: `int` for the number of boxes to compute loss on.
area_thresh: `float` for the minimum area of a box to allow to pass
through for optimization.
aug_rand_hue: `float` indicating the maximum scaling value for
hue. saturation will be scaled between 1 - value and 1 + value.
aug_rand_saturation: `float` indicating the maximum scaling value for
saturation. saturation will be scaled between 1/value and value.
aug_rand_brightness: `float` indicating the maximum scaling value for
brightness. brightness will be scaled between 1/value and value.
letter_box: `boolean` indicating whether upon start of the datapipeline
regardless of the preprocessing ops that are used, the aspect ratio of
the images should be preserved.
random_pad: `bool` indiccating wether to use padding to apply random
translation true for darknet yolo false for scaled yolo.
random_flip: `boolean` indicating whether or not to randomly flip the
image horizontally.
jitter: `float` for the maximum change in aspect ratio expected in
each preprocessing step.
aug_scale_min: `float` indicating the minimum scaling value for image
scale jitter.
aug_scale_max: `float` indicating the maximum scaling value for image
scale jitter.
aug_rand_translate: `float` ranging from 0 to 1 indicating the maximum
amount to randomly translate an image.
aug_rand_perspective: `float` ranging from 0.000 to 0.001 indicating
how much to prespective warp the image.
aug_rand_angle: `float` indicating the maximum angle value for
angle. angle will be changes between 0 and value.
anchor_t: `float` indicating the threshold over which an anchor will be
considered for prediction, at zero, all the anchors will be used and at
1.0 only the best will be used. for anchor thresholds larger than 1.0
we stop using the IOU for anchor comparison and resort directly to
comparing the width and height, this is used for the scaled models.
scale_xy: dictionary `float` values inidcating how far each pixel can see
outside of its containment of 1.0. a value of 1.2 indicates there is a
20% extended radius around each pixel that this specific pixel can
predict values for a center at. the center can range from 0 - value/2
to 1 + value/2, this value is set in the yolo filter, and resused here.
there should be one value for scale_xy for each level from min_level to
max_level.
best_match_only: `boolean` indicating how boxes are selected for
optimization.
coco91to80: `bool` for wether to convert coco91 to coco80 to minimize
model parameters.
darknet: `boolean` indicating which data pipeline to use. Setting to True
swaps the pipeline to output images realtive to Yolov4 and older.
use_tie_breaker: `boolean` indicating whether to use the anchor threshold
value.
dtype: `str` indicating the output datatype of the datapipeline selecting
from {"float32", "float16", "bfloat16"}.
seed: `int` the seed for random number generation.
"""
for
key
in
masks
.
keys
():
# Assert that the width and height is viable
assert
output_size
[
1
]
%
strides
[
str
(
key
)]
==
0
assert
output_size
[
0
]
%
strides
[
str
(
key
)]
==
0
# scale of each FPN level
self
.
_strides
=
strides
# Set the width and height properly and base init:
self
.
_coco91to80
=
coco91to80
self
.
_image_w
=
output_size
[
1
]
self
.
_image_h
=
output_size
[
0
]
# Set the anchor boxes and masks for each scale
self
.
_anchors
=
anchors
self
.
_anchor_free_limits
=
anchor_free_limits
self
.
_masks
=
{
key
:
tf
.
convert_to_tensor
(
value
)
for
key
,
value
in
masks
.
items
()
}
self
.
_use_tie_breaker
=
use_tie_breaker
self
.
_best_match_only
=
best_match_only
self
.
_max_num_instances
=
max_num_instances
# Image scaling params
self
.
_jitter
=
0.0
if
jitter
is
None
else
jitter
self
.
_aug_scale_min
=
aug_scale_min
self
.
_aug_scale_max
=
aug_scale_max
self
.
_aug_rand_translate
=
aug_rand_translate
self
.
_aug_rand_perspective
=
aug_rand_perspective
# Image spatial distortion
self
.
_random_flip
=
random_flip
self
.
_letter_box
=
letter_box
self
.
_random_pad
=
random_pad
self
.
_aug_rand_angle
=
aug_rand_angle
# Color space distortion of the image
self
.
_aug_rand_saturation
=
aug_rand_saturation
self
.
_aug_rand_brightness
=
aug_rand_brightness
self
.
_aug_rand_hue
=
aug_rand_hue
# Set the per level values needed for operation
self
.
_scale_xy
=
scale_xy
self
.
_anchor_t
=
anchor_t
self
.
_darknet
=
darknet
self
.
_area_thresh
=
area_thresh
keys
=
list
(
self
.
_masks
.
keys
())
if
self
.
_anchor_free_limits
is
not
None
:
maxim
=
2000
self
.
_scale_up
=
{
key
:
maxim
//
self
.
_max_num_instances
for
key
in
keys
}
self
.
_anchor_t
=
-
0.01
elif
not
self
.
_darknet
:
self
.
_scale_up
=
{
key
:
6
-
i
for
i
,
key
in
enumerate
(
keys
)}
else
:
self
.
_scale_up
=
{
key
:
1
for
key
in
keys
}
self
.
_seed
=
seed
# Set the data type based on input string
self
.
_dtype
=
dtype
def
_get_identity_info
(
self
,
image
):
"""Get an identity image op to pad all info vectors, this is used because
graph compilation if there are a variable number of info objects in a list.
"""
shape_
=
tf
.
shape
(
image
)
val
=
tf
.
stack
([
tf
.
cast
(
shape_
[:
2
],
tf
.
float32
),
tf
.
cast
(
shape_
[:
2
],
tf
.
float32
),
tf
.
ones_like
(
tf
.
cast
(
shape_
[:
2
],
tf
.
float32
)),
tf
.
zeros_like
(
tf
.
cast
(
shape_
[:
2
],
tf
.
float32
)),
])
return
val
def
_jitter_scale
(
self
,
image
,
shape
,
letter_box
,
jitter
,
random_pad
,
aug_scale_min
,
aug_scale_max
,
translate
,
angle
,
perspective
):
if
(
aug_scale_min
!=
1.0
or
aug_scale_max
!=
1.0
):
crop_only
=
True
# jitter gives you only one info object, resize and crop gives you one,
# if crop only then there can be 1 form jitter and 1 from crop
reps
=
1
else
:
crop_only
=
False
reps
=
0
infos
=
[]
image
,
info_a
,
_
=
preprocessing_ops
.
resize_and_jitter_image
(
image
,
shape
,
letter_box
=
letter_box
,
jitter
=
jitter
,
crop_only
=
crop_only
,
random_pad
=
random_pad
,
seed
=
self
.
_seed
,
)
infos
.
extend
(
info_a
)
stale_a
=
self
.
_get_identity_info
(
image
)
for
_
in
range
(
reps
):
infos
.
append
(
stale_a
)
image
,
_
,
affine
=
preprocessing_ops
.
affine_warp_image
(
image
,
shape
,
scale_min
=
aug_scale_min
,
scale_max
=
aug_scale_max
,
translate
=
translate
,
degrees
=
angle
,
perspective
=
perspective
,
random_pad
=
random_pad
,
seed
=
self
.
_seed
,
)
return
image
,
infos
,
affine
def
reorg91to80
(
self
,
data
):
"""Function used to reduce COCO 91 to COCO 80, or to convert from the 2017
foramt to the 2014 format"""
if
self
.
_coco91to80
:
(
data
[
'groundtruth_classes'
],
data
[
'groundtruth_boxes'
],
data
[
'groundtruth_area'
],
data
[
'groundtruth_is_crowd'
],
_
)
=
_coco91_to_80
(
data
[
'groundtruth_classes'
],
data
[
'groundtruth_boxes'
],
data
[
'groundtruth_area'
],
data
[
'groundtruth_is_crowd'
])
return
data
def
_parse_train_data
(
self
,
data
):
"""Parses data for training and evaluation."""
# Down size coco 91 to coco 80 if the option is selected.
data
=
self
.
reorg91to80
(
data
)
# Initialize the shape constants.
image
=
data
[
'image'
]
boxes
=
data
[
'groundtruth_boxes'
]
classes
=
data
[
'groundtruth_classes'
]
if
self
.
_random_flip
:
# Randomly flip the image horizontally.
image
,
boxes
,
_
=
preprocess_ops
.
random_horizontal_flip
(
image
,
boxes
,
seed
=
self
.
_seed
)
if
not
data
[
'is_mosaic'
]:
image
,
infos
,
affine
=
self
.
_jitter_scale
(
image
,
[
self
.
_image_h
,
self
.
_image_w
],
self
.
_letter_box
,
self
.
_jitter
,
self
.
_random_pad
,
self
.
_aug_scale_min
,
self
.
_aug_scale_max
,
self
.
_aug_rand_translate
,
self
.
_aug_rand_angle
,
self
.
_aug_rand_perspective
)
# Clip and clean boxes.
boxes
,
inds
=
preprocessing_ops
.
apply_infos
(
boxes
,
infos
,
affine
=
affine
,
shuffle_boxes
=
False
,
area_thresh
=
self
.
_area_thresh
,
augment
=
True
,
seed
=
self
.
_seed
)
classes
=
tf
.
gather
(
classes
,
inds
)
info
=
infos
[
-
1
]
else
:
image
=
tf
.
image
.
resize
(
image
,
(
self
.
_image_h
,
self
.
_image_w
),
method
=
'nearest'
)
inds
=
tf
.
cast
(
tf
.
range
(
0
,
tf
.
shape
(
boxes
)[
0
]),
tf
.
int64
)
info
=
self
.
_get_identity_info
(
image
)
# Apply scaling to the hue saturation and brightness of an image.
image
=
tf
.
cast
(
image
,
dtype
=
self
.
_dtype
)
image
=
image
/
255
image
=
preprocessing_ops
.
image_rand_hsv
(
image
,
self
.
_aug_rand_hue
,
self
.
_aug_rand_saturation
,
self
.
_aug_rand_brightness
,
seed
=
self
.
_seed
,
darknet
=
self
.
_darknet
)
# Cast the image to the selcted datatype.
image
,
labels
=
self
.
_build_label
(
image
,
boxes
,
classes
,
self
.
_image_w
,
self
.
_image_h
,
info
,
inds
,
data
,
is_training
=
True
)
return
image
,
labels
def
_parse_eval_data
(
self
,
data
):
# Down size coco 91 to coco 80 if the option is selected.
data
=
self
.
reorg91to80
(
data
)
# Get the image shape constants and cast the image to the selcted datatype.
image
=
tf
.
cast
(
data
[
'image'
],
dtype
=
self
.
_dtype
)
boxes
=
data
[
'groundtruth_boxes'
]
classes
=
data
[
'groundtruth_classes'
]
height
,
width
=
self
.
_image_h
,
self
.
_image_w
image
,
infos
,
_
=
preprocessing_ops
.
resize_and_jitter_image
(
image
,
[
height
,
width
],
letter_box
=
self
.
_letter_box
,
random_pad
=
False
,
shiftx
=
0.5
,
shifty
=
0.5
,
jitter
=
0.0
)
# Clip and clean boxes.
image
=
image
/
255
boxes
,
inds
=
preprocessing_ops
.
apply_infos
(
boxes
,
infos
,
shuffle_boxes
=
False
,
area_thresh
=
0.0
,
augment
=
True
)
classes
=
tf
.
gather
(
classes
,
inds
)
info
=
infos
[
-
1
]
image
,
labels
=
self
.
_build_label
(
image
,
boxes
,
classes
,
width
,
height
,
info
,
inds
,
data
,
is_training
=
False
)
return
image
,
labels
def
set_shape
(
self
,
values
,
pad_axis
=
0
,
pad_value
=
0
,
inds
=
None
,
scale
=
1
):
if
inds
is
not
None
:
values
=
tf
.
gather
(
values
,
inds
)
vshape
=
values
.
get_shape
().
as_list
()
if
pad_value
is
not
None
:
values
=
preprocessing_ops
.
pad_max_instances
(
values
,
self
.
_max_num_instances
,
pad_axis
=
pad_axis
,
pad_value
=
pad_value
)
vshape
[
pad_axis
]
=
self
.
_max_num_instances
*
scale
values
.
set_shape
(
vshape
)
return
values
def
_build_grid
(
self
,
raw_true
,
width
,
height
,
use_tie_breaker
=
False
):
'''Private function for building the full scale object and class grid.'''
indexes
=
{}
updates
=
{}
true_grids
=
{}
if
self
.
_anchor_free_limits
is
not
None
:
self
.
_anchor_free_limits
=
[
0.0
]
+
self
.
_anchor_free_limits
+
[
np
.
inf
]
# for each prediction path generate a properly scaled output prediction map
for
i
,
key
in
enumerate
(
self
.
_masks
.
keys
()):
if
self
.
_anchor_free_limits
is
not
None
:
fpn_limits
=
self
.
_anchor_free_limits
[
i
:
i
+
2
]
else
:
fpn_limits
=
None
# build the actual grid as well and the list of boxes and classes AND
# their index in the prediction grid
scale_xy
=
self
.
_scale_xy
[
key
]
if
not
self
.
_darknet
else
1
(
indexes
[
key
],
updates
[
key
],
true_grids
[
key
])
=
preprocessing_ops
.
build_grided_gt_ind
(
raw_true
,
self
.
_masks
[
key
],
width
//
self
.
_strides
[
str
(
key
)],
height
//
self
.
_strides
[
str
(
key
)],
raw_true
[
'bbox'
].
dtype
,
scale_xy
,
self
.
_scale_up
[
key
],
use_tie_breaker
,
self
.
_strides
[
str
(
key
)],
fpn_limits
=
fpn_limits
)
# set/fix the shapes
indexes
[
key
]
=
self
.
set_shape
(
indexes
[
key
],
-
2
,
None
,
None
,
self
.
_scale_up
[
key
])
updates
[
key
]
=
self
.
set_shape
(
updates
[
key
],
-
2
,
None
,
None
,
self
.
_scale_up
[
key
])
# add all the values to the final dictionary
updates
[
key
]
=
tf
.
cast
(
updates
[
key
],
dtype
=
self
.
_dtype
)
return
indexes
,
updates
,
true_grids
def
_build_label
(
self
,
image
,
gt_boxes
,
gt_classes
,
width
,
height
,
info
,
inds
,
data
,
is_training
=
True
):
"""Label construction for both the train and eval data. """
# Set the image shape.
imshape
=
image
.
get_shape
().
as_list
()
imshape
[
-
1
]
=
3
image
.
set_shape
(
imshape
)
# Get the best anchors.
boxes
=
box_utils
.
yxyx_to_xcycwh
(
gt_boxes
)
best_anchors
,
ious
=
preprocessing_ops
.
get_best_anchor
(
boxes
,
self
.
_anchors
,
width
=
width
,
height
=
height
,
iou_thresh
=
self
.
_anchor_t
,
best_match_only
=
self
.
_best_match_only
)
# Set/fix the boxes shape.
boxes
=
self
.
set_shape
(
boxes
,
pad_axis
=
0
,
pad_value
=
0
)
classes
=
self
.
set_shape
(
gt_classes
,
pad_axis
=
0
,
pad_value
=-
1
)
best_anchors
=
self
.
set_shape
(
best_anchors
,
pad_axis
=
0
,
pad_value
=-
1
)
ious
=
self
.
set_shape
(
ious
,
pad_axis
=
0
,
pad_value
=
0
)
area
=
self
.
set_shape
(
data
[
'groundtruth_area'
],
pad_axis
=
0
,
pad_value
=
0
,
inds
=
inds
)
is_crowd
=
self
.
set_shape
(
data
[
'groundtruth_is_crowd'
],
pad_axis
=
0
,
pad_value
=
0
,
inds
=
inds
)
# Build the dictionary set.
labels
=
{
'source_id'
:
utils
.
process_source_id
(
data
[
'source_id'
]),
'bbox'
:
tf
.
cast
(
boxes
,
dtype
=
self
.
_dtype
),
'classes'
:
tf
.
cast
(
classes
,
dtype
=
self
.
_dtype
),
'best_anchors'
:
tf
.
cast
(
best_anchors
,
dtype
=
self
.
_dtype
),
'best_iou_match'
:
ious
,
}
# Build the grid formatted for loss computation in model output format.
labels
[
'inds'
],
labels
[
'upds'
],
labels
[
'true_conf'
]
=
self
.
_build_grid
(
labels
,
width
,
height
,
use_tie_breaker
=
self
.
_use_tie_breaker
)
# Update the labels dictionary.
labels
[
'bbox'
]
=
box_utils
.
xcycwh_to_yxyx
(
labels
[
'bbox'
])
if
not
is_training
:
# Sets up groundtruth data for evaluation.
groundtruths
=
{
'source_id'
:
labels
[
'source_id'
],
'height'
:
height
,
'width'
:
width
,
'num_detections'
:
tf
.
shape
(
gt_boxes
)[
0
],
'image_info'
:
info
,
'boxes'
:
gt_boxes
,
'classes'
:
gt_classes
,
'areas'
:
area
,
'is_crowds'
:
tf
.
cast
(
is_crowd
,
tf
.
int32
),
}
groundtruths
[
'source_id'
]
=
utils
.
process_source_id
(
groundtruths
[
'source_id'
])
groundtruths
=
utils
.
pad_groundtruths_to_fixed_size
(
groundtruths
,
self
.
_max_num_instances
)
labels
[
'groundtruths'
]
=
groundtruths
return
image
,
labels
official/vision/beta/projects/yolo/ops/loss_utils.py
View file @
a15e242e
...
...
@@ -535,8 +535,13 @@ def _darknet_new_coord_boxes(encoded_boxes, width, height, anchor_grid,
return
(
scaler
,
scaled_box
,
pred_box
),
delta
def
_anchor_free_scale_boxes
(
encoded_boxes
,
width
,
height
,
stride
,
grid_points
,
scale_xy
):
def
_anchor_free_scale_boxes
(
encoded_boxes
,
width
,
height
,
stride
,
grid_points
,
scale_xy
,
darknet
=
False
):
"""Decode models boxes using FPN stride under anchor free conditions."""
# split the boxes
pred_xy
=
encoded_boxes
[...,
0
:
2
]
...
...
@@ -546,21 +551,30 @@ def _anchor_free_scale_boxes(encoded_boxes, width, height, stride, grid_points,
scaler
=
tf
.
convert_to_tensor
([
height
,
width
,
height
,
width
])
scale_xy
=
tf
.
cast
(
scale_xy
,
encoded_boxes
.
dtype
)
scale_down
=
lambda
x
,
y
:
x
/
y
scale_up
=
lambda
x
,
y
:
x
*
y
if
darknet
:
scale_down
=
tf
.
grad_pass_through
(
scale_down
)
scale_up
=
tf
.
grad_pass_through
(
scale_up
)
# scale the centers and find the offset of each box relative to
# their center pixel
pred_xy
=
pred_xy
*
scale_xy
-
0.5
*
(
scale_xy
-
1
)
# scale the offsets and add them to the grid points or a tensor that is
# the realtive location of each pixel
box_xy
=
(
grid_points
+
pred_xy
)
*
stride
box_xy
=
(
grid_points
+
pred_xy
)
# scale the width and height of the predictions and corlate them
# to anchor boxes
box_wh
=
tf
.
math
.
exp
(
pred_wh
)
*
stride
box_wh
=
tf
.
math
.
exp
(
pred_wh
)
# build the final predicted box
scaled_box
=
tf
.
concat
([
box_xy
,
box_wh
],
axis
=-
1
)
pred_box
=
scaled_box
/
scaler
# properly scaling boxes gradeints
scaled_box
=
scale_up
(
scaled_box
,
stride
)
pred_box
=
scale_down
(
scaled_box
,
(
scaler
*
stride
))
return
(
scaler
,
scaled_box
,
pred_box
)
...
...
@@ -610,7 +624,7 @@ def get_predicted_box(width,
if
box_type
==
'anchor_free'
:
(
scaler
,
scaled_box
,
pred_box
)
=
_anchor_free_scale_boxes
(
encoded_boxes
,
width
,
height
,
stride
,
grid_points
,
scale_xy
)
grid_points
,
scale_xy
,
darknet
=
darknet
)
elif
darknet
:
# pylint:disable=unbalanced-tuple-unpacking
...
...
official/vision/beta/projects/yolo/ops/mosaic.py
0 → 100755
View file @
a15e242e
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Mosaic data aug for YOLO."""
import
random
import
tensorflow
as
tf
import
tensorflow_addons
as
tfa
from
official.vision.beta.projects.yolo.ops
import
preprocessing_ops
from
official.vision.beta.ops
import
box_ops
class
Mosaic
(
object
):
"""Stitch together sets of 4 images to generate samples with more boxes."""
def
__init__
(
self
,
output_size
,
mosaic_frequency
=
1.0
,
mixup_frequency
=
0.0
,
letter_box
=
True
,
jitter
=
0.0
,
mosaic_crop_mode
=
'scale'
,
mosaic_center
=
0.25
,
aug_scale_min
=
1.0
,
aug_scale_max
=
1.0
,
aug_rand_angle
=
0.0
,
aug_rand_perspective
=
0.0
,
aug_rand_translate
=
0.0
,
random_pad
=
False
,
area_thresh
=
0.1
,
seed
=
None
):
"""Initializes parameters for mosaic.
Args:
output_size: `Tensor` or `List` for [height, width] of output image.
mosaic_frequency: `float` indicating how often to apply mosaic.
mixup_frequency: `float` indicating how often to apply mixup.
letter_box: `boolean` indicating whether upon start of the datapipeline
regardless of the preprocessing ops that are used, the aspect ratio of
the images should be preserved.
jitter: `float` for the maximum change in aspect ratio expected in
each preprocessing step.
mosaic_crop_mode: `str` they type of mosaic to apply. The options are
{crop, scale, None}, crop will construct a mosaic by slicing images
togther, scale will create a mosaic by concatnating and shifting the
image, and None will default to scale and apply no post processing to
the created mosaic.
mosaic_center: `float` indicating how much to randomly deviate from the
from the center of the image when creating a mosaic.
aug_scale_min: `float` indicating the minimum scaling value for image
scale jitter.
aug_scale_max: `float` indicating the maximum scaling value for image
scale jitter.
aug_rand_angle: `float` indicating the maximum angle value for
angle. angle will be changes between 0 and value.
aug_rand_translate: `float` ranging from 0 to 1 indicating the maximum
amount to randomly translate an image.
aug_rand_perspective: `float` ranging from 0.000 to 0.001 indicating
how much to prespective warp the image.
random_pad: `bool` indiccating wether to use padding to apply random
translation true for darknet yolo false for scaled yolo.
area_thresh: `float` for the minimum area of a box to allow to pass
through for optimization.
seed: `int` the seed for random number generation.
"""
self
.
_output_size
=
output_size
self
.
_area_thresh
=
area_thresh
self
.
_mosaic_frequency
=
mosaic_frequency
self
.
_mixup_frequency
=
mixup_frequency
self
.
_letter_box
=
letter_box
self
.
_random_crop
=
jitter
self
.
_mosaic_crop_mode
=
mosaic_crop_mode
self
.
_mosaic_center
=
mosaic_center
self
.
_aug_scale_min
=
aug_scale_min
self
.
_aug_scale_max
=
aug_scale_max
self
.
_random_pad
=
random_pad
self
.
_aug_rand_translate
=
aug_rand_translate
self
.
_aug_rand_angle
=
aug_rand_angle
self
.
_aug_rand_perspective
=
aug_rand_perspective
self
.
_deterministic
=
seed
!=
None
self
.
_seed
=
seed
if
seed
is
not
None
else
random
.
randint
(
0
,
2
**
30
)
def
_generate_cut
(
self
):
"""Generate a random center to use for slicing and patching the images."""
if
self
.
_mosaic_crop_mode
==
'crop'
:
min_offset
=
self
.
_mosaic_center
cut_x
=
preprocessing_ops
.
rand_uniform_strong
(
self
.
_output_size
[
1
]
*
min_offset
,
self
.
_output_size
[
1
]
*
(
1
-
min_offset
),
seed
=
self
.
_seed
)
cut_y
=
preprocessing_ops
.
rand_uniform_strong
(
self
.
_output_size
[
0
]
*
min_offset
,
self
.
_output_size
[
0
]
*
(
1
-
min_offset
),
seed
=
self
.
_seed
)
cut
=
[
cut_x
,
cut_y
]
ishape
=
tf
.
convert_to_tensor
(
[
self
.
_output_size
[
1
],
self
.
_output_size
[
0
],
3
])
else
:
cut
=
None
ishape
=
tf
.
convert_to_tensor
(
[
self
.
_output_size
[
1
]
*
2
,
self
.
_output_size
[
0
]
*
2
,
3
])
return
cut
,
ishape
def
_augment_image
(
self
,
image
,
boxes
,
classes
,
is_crowd
,
area
,
xs
=
0.0
,
ys
=
0.0
,
cut
=
None
):
"""Process a single image prior to the application of patching."""
# Randomly flip the image horizontally.
letter_box
=
self
.
_letter_box
image
,
infos
,
crop_points
=
preprocessing_ops
.
resize_and_jitter_image
(
image
,
[
self
.
_output_size
[
0
],
self
.
_output_size
[
1
]],
random_pad
=
False
,
letter_box
=
letter_box
,
jitter
=
self
.
_random_crop
,
shiftx
=
xs
,
shifty
=
ys
,
cut
=
cut
,
seed
=
self
.
_seed
)
# Clip and clean boxes.
boxes
,
inds
=
preprocessing_ops
.
apply_infos
(
boxes
,
infos
,
area_thresh
=
self
.
_area_thresh
,
shuffle_boxes
=
False
,
augment
=
True
,
seed
=
self
.
_seed
)
classes
=
tf
.
gather
(
classes
,
inds
)
is_crowd
=
tf
.
gather
(
is_crowd
,
inds
)
area
=
tf
.
gather
(
area
,
inds
)
return
image
,
boxes
,
classes
,
is_crowd
,
area
,
crop_points
def
_mosaic_crop_image
(
self
,
image
,
boxes
,
classes
,
is_crowd
,
area
):
"""Process a patched image in preperation for final output."""
if
self
.
_mosaic_crop_mode
!=
"crop"
:
shape
=
tf
.
cast
(
preprocessing_ops
.
get_image_shape
(
image
),
tf
.
float32
)
center
=
shape
*
self
.
_mosaic_center
# shift the center of the image by applying a translation to the whole
# image
ch
=
tf
.
math
.
round
(
preprocessing_ops
.
rand_uniform_strong
(
-
center
[
0
],
center
[
0
],
seed
=
self
.
_seed
))
cw
=
tf
.
math
.
round
(
preprocessing_ops
.
rand_uniform_strong
(
-
center
[
1
],
center
[
1
],
seed
=
self
.
_seed
))
# clip the boxes to those with in the image
image
=
tfa
.
image
.
translate
(
image
,
[
cw
,
ch
],
fill_value
=
preprocessing_ops
.
get_pad_value
())
boxes
=
box_ops
.
denormalize_boxes
(
boxes
,
shape
[:
2
])
boxes
=
boxes
+
tf
.
cast
([
ch
,
cw
,
ch
,
cw
],
boxes
.
dtype
)
boxes
=
box_ops
.
clip_boxes
(
boxes
,
shape
[:
2
])
boxes
=
box_ops
.
normalize_boxes
(
boxes
,
shape
[:
2
])
# warp and scale the fully stitched sample
image
,
_
,
affine
=
preprocessing_ops
.
affine_warp_image
(
image
,
[
self
.
_output_size
[
0
],
self
.
_output_size
[
1
]],
scale_min
=
self
.
_aug_scale_min
,
scale_max
=
self
.
_aug_scale_max
,
translate
=
self
.
_aug_rand_translate
,
degrees
=
self
.
_aug_rand_angle
,
perspective
=
self
.
_aug_rand_perspective
,
random_pad
=
self
.
_random_pad
,
seed
=
self
.
_seed
)
height
,
width
=
self
.
_output_size
[
0
],
self
.
_output_size
[
1
]
image
=
tf
.
image
.
resize
(
image
,
(
height
,
width
))
# clip and clean boxes
boxes
,
inds
=
preprocessing_ops
.
apply_infos
(
boxes
,
None
,
affine
=
affine
,
area_thresh
=
self
.
_area_thresh
,
augment
=
True
,
seed
=
self
.
_seed
)
classes
=
tf
.
gather
(
classes
,
inds
)
is_crowd
=
tf
.
gather
(
is_crowd
,
inds
)
area
=
tf
.
gather
(
area
,
inds
)
return
image
,
boxes
,
classes
,
is_crowd
,
area
,
area
def
scale_boxes
(
self
,
patch
,
ishape
,
boxes
,
classes
,
xs
,
ys
):
"""Scale and translate the boxes for each image prior to patching."""
xs
=
tf
.
cast
(
xs
,
boxes
.
dtype
)
ys
=
tf
.
cast
(
ys
,
boxes
.
dtype
)
pshape
=
tf
.
cast
(
tf
.
shape
(
patch
),
boxes
.
dtype
)
ishape
=
tf
.
cast
(
ishape
,
boxes
.
dtype
)
translate
=
tf
.
cast
((
ishape
-
pshape
),
boxes
.
dtype
)
boxes
=
box_ops
.
denormalize_boxes
(
boxes
,
pshape
[:
2
])
boxes
=
boxes
+
tf
.
cast
([
translate
[
0
]
*
ys
,
translate
[
1
]
*
xs
,
translate
[
0
]
*
ys
,
translate
[
1
]
*
xs
],
boxes
.
dtype
)
boxes
=
box_ops
.
normalize_boxes
(
boxes
,
ishape
[:
2
])
return
boxes
,
classes
# mosaic full frequency doubles model speed
def
_process_image
(
self
,
sample
,
shiftx
,
shifty
,
cut
,
ishape
):
"""Process and augment each image."""
(
image
,
boxes
,
classes
,
is_crowd
,
area
,
crop_points
)
=
self
.
_augment_image
(
sample
[
'image'
],
sample
[
'groundtruth_boxes'
],
sample
[
'groundtruth_classes'
],
sample
[
'groundtruth_is_crowd'
],
sample
[
'groundtruth_area'
],
shiftx
,
shifty
,
cut
)
if
cut
is
None
and
ishape
is
None
:
cut
,
ishape
=
self
.
_generate_cut
()
(
boxes
,
classes
)
=
self
.
scale_boxes
(
image
,
ishape
,
boxes
,
classes
,
1
-
shiftx
,
1
-
shifty
)
sample
[
'image'
]
=
image
sample
[
'groundtruth_boxes'
]
=
boxes
sample
[
'groundtruth_classes'
]
=
classes
sample
[
'groundtruth_is_crowd'
]
=
is_crowd
sample
[
'groundtruth_area'
]
=
area
sample
[
'cut'
]
=
cut
sample
[
'shiftx'
]
=
shiftx
sample
[
'shifty'
]
=
shifty
sample
[
'crop_points'
]
=
crop_points
return
sample
def
_patch2
(
self
,
one
,
two
):
"""Stitch together 2 images in totality"""
sample
=
one
sample
[
'image'
]
=
tf
.
concat
([
one
[
"image"
],
two
[
"image"
]],
axis
=-
2
)
sample
[
'groundtruth_boxes'
]
=
tf
.
concat
(
[
one
[
'groundtruth_boxes'
],
two
[
'groundtruth_boxes'
]],
axis
=
0
)
sample
[
'groundtruth_classes'
]
=
tf
.
concat
(
[
one
[
'groundtruth_classes'
],
two
[
'groundtruth_classes'
]],
axis
=
0
)
sample
[
'groundtruth_is_crowd'
]
=
tf
.
concat
(
[
one
[
'groundtruth_is_crowd'
],
two
[
'groundtruth_is_crowd'
]],
axis
=
0
)
sample
[
'groundtruth_area'
]
=
tf
.
concat
(
[
one
[
'groundtruth_area'
],
two
[
'groundtruth_area'
]],
axis
=
0
)
return
sample
def
_patch
(
self
,
one
,
two
):
"""Build the full 4 patch of images from sets of 2 images."""
image
=
tf
.
concat
([
one
[
"image"
],
two
[
"image"
]],
axis
=-
3
)
boxes
=
tf
.
concat
([
one
[
'groundtruth_boxes'
],
two
[
'groundtruth_boxes'
]],
axis
=
0
)
classes
=
tf
.
concat
(
[
one
[
'groundtruth_classes'
],
two
[
'groundtruth_classes'
]],
axis
=
0
)
is_crowd
=
tf
.
concat
(
[
one
[
'groundtruth_is_crowd'
],
two
[
'groundtruth_is_crowd'
]],
axis
=
0
)
area
=
tf
.
concat
([
one
[
'groundtruth_area'
],
two
[
'groundtruth_area'
]],
axis
=
0
)
if
self
.
_mosaic_crop_mode
is
not
None
:
image
,
boxes
,
classes
,
is_crowd
,
area
,
_
=
self
.
_mosaic_crop_image
(
image
,
boxes
,
classes
,
is_crowd
,
area
)
sample
=
one
height
,
width
=
preprocessing_ops
.
get_image_shape
(
image
)
sample
[
'image'
]
=
tf
.
cast
(
image
,
tf
.
uint8
)
sample
[
'groundtruth_boxes'
]
=
boxes
sample
[
'groundtruth_area'
]
=
area
sample
[
'groundtruth_classes'
]
=
tf
.
cast
(
classes
,
sample
[
'groundtruth_classes'
].
dtype
)
sample
[
'groundtruth_is_crowd'
]
=
tf
.
cast
(
is_crowd
,
tf
.
bool
)
sample
[
'width'
]
=
tf
.
cast
(
width
,
sample
[
'width'
].
dtype
)
sample
[
'height'
]
=
tf
.
cast
(
height
,
sample
[
'height'
].
dtype
)
sample
[
'num_detections'
]
=
tf
.
shape
(
sample
[
'groundtruth_boxes'
])[
1
]
sample
[
'is_mosaic'
]
=
tf
.
cast
(
1.0
,
tf
.
bool
)
del
sample
[
'shiftx'
],
sample
[
'shifty'
],
sample
[
'crop_points'
],
sample
[
'cut'
]
return
sample
def
_mosaic
(
self
,
one
,
two
,
three
,
four
):
"""Stitch together 4 images to build a mosaic."""
if
self
.
_mosaic_frequency
>=
1.0
:
domo
=
1.0
else
:
domo
=
preprocessing_ops
.
rand_uniform_strong
(
0.0
,
1.0
,
dtype
=
tf
.
float32
,
seed
=
self
.
_seed
)
noop
=
one
.
copy
()
if
domo
>=
(
1
-
self
.
_mosaic_frequency
):
cut
,
ishape
=
self
.
_generate_cut
()
one
=
self
.
_process_image
(
one
,
1.0
,
1.0
,
cut
,
ishape
)
two
=
self
.
_process_image
(
two
,
0.0
,
1.0
,
cut
,
ishape
)
three
=
self
.
_process_image
(
three
,
1.0
,
0.0
,
cut
,
ishape
)
four
=
self
.
_process_image
(
four
,
0.0
,
0.0
,
cut
,
ishape
)
patch1
=
self
.
_patch2
(
one
,
two
)
patch2
=
self
.
_patch2
(
three
,
four
)
stitched
=
self
.
_patch
(
patch1
,
patch2
)
return
stitched
else
:
return
self
.
_add_param
(
noop
)
def
_mixup
(
self
,
one
,
two
):
"""Blend together 2 images for the mixup data augmentation."""
if
self
.
_mixup_frequency
>=
1.0
:
domo
=
1.0
else
:
domo
=
preprocessing_ops
.
rand_uniform_strong
(
0.0
,
1.0
,
dtype
=
tf
.
float32
,
seed
=
self
.
_seed
)
noop
=
one
.
copy
()
if
domo
>=
(
1
-
self
.
_mixup_frequency
):
sample
=
one
otype
=
one
[
"image"
].
dtype
r
=
preprocessing_ops
.
rand_uniform_strong
(
0.4
,
0.6
,
tf
.
float32
,
seed
=
self
.
_seed
)
sample
[
'image'
]
=
(
r
*
tf
.
cast
(
one
[
"image"
],
tf
.
float32
)
+
(
1
-
r
)
*
tf
.
cast
(
two
[
"image"
],
tf
.
float32
))
sample
[
'image'
]
=
tf
.
cast
(
sample
[
'image'
],
otype
)
sample
[
'groundtruth_boxes'
]
=
tf
.
concat
(
[
one
[
'groundtruth_boxes'
],
two
[
'groundtruth_boxes'
]],
axis
=
0
)
sample
[
'groundtruth_classes'
]
=
tf
.
concat
(
[
one
[
'groundtruth_classes'
],
two
[
'groundtruth_classes'
]],
axis
=
0
)
sample
[
'groundtruth_is_crowd'
]
=
tf
.
concat
(
[
one
[
'groundtruth_is_crowd'
],
two
[
'groundtruth_is_crowd'
]],
axis
=
0
)
sample
[
'groundtruth_area'
]
=
tf
.
concat
(
[
one
[
'groundtruth_area'
],
two
[
'groundtruth_area'
]],
axis
=
0
)
return
sample
else
:
return
self
.
_add_param
(
noop
)
def
_add_param
(
self
,
sample
):
"""Add parameters to handle skipped images."""
sample
[
'is_mosaic'
]
=
tf
.
cast
(
0.0
,
tf
.
bool
)
sample
[
'num_detections'
]
=
tf
.
shape
(
sample
[
'groundtruth_boxes'
])[
0
]
return
sample
def
_apply
(
self
,
dataset
):
"""Apply mosaic to an input dataset."""
determ
=
self
.
_deterministic
one
=
dataset
.
shuffle
(
100
,
seed
=
self
.
_seed
,
reshuffle_each_iteration
=
True
)
two
=
dataset
.
shuffle
(
100
,
seed
=
self
.
_seed
+
1
,
reshuffle_each_iteration
=
True
)
three
=
dataset
.
shuffle
(
100
,
seed
=
self
.
_seed
+
2
,
reshuffle_each_iteration
=
True
)
four
=
dataset
.
shuffle
(
100
,
seed
=
self
.
_seed
+
3
,
reshuffle_each_iteration
=
True
)
dataset
=
tf
.
data
.
Dataset
.
zip
((
one
,
two
,
three
,
four
))
dataset
=
dataset
.
map
(
self
.
_mosaic
,
num_parallel_calls
=
tf
.
data
.
AUTOTUNE
,
deterministic
=
determ
)
if
self
.
_mixup_frequency
>
0
:
one
=
dataset
.
shuffle
(
100
,
seed
=
self
.
_seed
+
4
,
reshuffle_each_iteration
=
True
)
two
=
dataset
.
shuffle
(
100
,
seed
=
self
.
_seed
+
5
,
reshuffle_each_iteration
=
True
)
dataset
=
tf
.
data
.
Dataset
.
zip
((
one
,
two
))
dataset
=
dataset
.
map
(
self
.
_mixup
,
num_parallel_calls
=
tf
.
data
.
AUTOTUNE
,
deterministic
=
determ
)
return
dataset
def
_skip
(
self
,
dataset
):
"""Skip samples in a dataset."""
determ
=
self
.
_deterministic
return
dataset
.
map
(
self
.
_add_param
,
num_parallel_calls
=
tf
.
data
.
AUTOTUNE
,
deterministic
=
determ
)
def
mosaic_fn
(
self
,
is_training
=
True
):
"""Determine which function to apply based on whether model is training"""
if
is_training
and
self
.
_mosaic_frequency
>
0.0
:
return
self
.
_apply
else
:
return
self
.
_skip
official/vision/beta/projects/yolo/ops/preprocess_ops.py
deleted
100644 → 0
View file @
b768c248
This diff is collapsed.
Click to expand it.
official/vision/beta/projects/yolo/ops/preprocess_ops_test.py
deleted
100644 → 0
View file @
b768c248
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""preprocess_ops tests."""
from
absl.testing
import
parameterized
import
numpy
as
np
import
tensorflow
as
tf
from
official.vision.beta.projects.yolo.ops
import
preprocess_ops
class
PreprocessOpsTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
((
416
,
416
,
5
,
300
,
300
),
(
100
,
200
,
6
,
50
,
50
))
def
test_resize_crop_filter
(
self
,
default_width
,
default_height
,
num_boxes
,
target_width
,
target_height
):
image
=
tf
.
convert_to_tensor
(
np
.
random
.
rand
(
default_width
,
default_height
,
3
))
boxes
=
tf
.
convert_to_tensor
(
np
.
random
.
rand
(
num_boxes
,
4
))
resized_image
,
resized_boxes
=
preprocess_ops
.
resize_crop_filter
(
image
,
boxes
,
default_width
,
default_height
,
target_width
,
target_height
)
resized_image_shape
=
tf
.
shape
(
resized_image
)
resized_boxes_shape
=
tf
.
shape
(
resized_boxes
)
self
.
assertAllEqual
([
default_height
,
default_width
,
3
],
resized_image_shape
.
numpy
())
self
.
assertAllEqual
([
num_boxes
,
4
],
resized_boxes_shape
.
numpy
())
@
parameterized
.
parameters
((
7
,
7.
,
5.
),
(
25
,
35.
,
45.
))
def
test_translate_boxes
(
self
,
num_boxes
,
translate_x
,
translate_y
):
boxes
=
tf
.
convert_to_tensor
(
np
.
random
.
rand
(
num_boxes
,
4
))
translated_boxes
=
preprocess_ops
.
translate_boxes
(
boxes
,
translate_x
,
translate_y
)
translated_boxes_shape
=
tf
.
shape
(
translated_boxes
)
self
.
assertAllEqual
([
num_boxes
,
4
],
translated_boxes_shape
.
numpy
())
@
parameterized
.
parameters
((
100
,
200
,
75.
,
25.
),
(
400
,
600
,
25.
,
75.
))
def
test_translate_image
(
self
,
image_height
,
image_width
,
translate_x
,
translate_y
):
image
=
tf
.
convert_to_tensor
(
np
.
random
.
rand
(
image_height
,
image_width
,
4
))
translated_image
=
preprocess_ops
.
translate_image
(
image
,
translate_x
,
translate_y
)
translated_image_shape
=
tf
.
shape
(
translated_image
)
self
.
assertAllEqual
([
image_height
,
image_width
,
4
],
translated_image_shape
.
numpy
())
@
parameterized
.
parameters
(([
1
,
2
],
20
,
0
),
([
13
,
2
,
4
],
15
,
0
))
def
test_pad_max_instances
(
self
,
input_shape
,
instances
,
pad_axis
):
expected_output_shape
=
input_shape
expected_output_shape
[
pad_axis
]
=
instances
output
=
preprocess_ops
.
pad_max_instances
(
np
.
ones
(
input_shape
),
instances
,
pad_axis
=
pad_axis
)
self
.
assertAllEqual
(
expected_output_shape
,
tf
.
shape
(
output
).
numpy
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/beta/projects/yolo/ops/preprocessing_ops.py
0 → 100755
View file @
a15e242e
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment