Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
dcuai
dlexamples
Commits
0016b0a7
Commit
0016b0a7
authored
Jan 11, 2023
by
sunxx1
Browse files
Merge branch 'dtk22.04' into 'main'
Dtk22.04 See merge request dcutoolkit/deeplearing/dlexamples_new!49
parents
17bc28d5
7a382d5d
Changes
335
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2512 additions
and
0 deletions
+2512
-0
Keras/keras-cv/keras_cv/datasets/pascal_voc/__init__.py
Keras/keras-cv/keras_cv/datasets/pascal_voc/__init__.py
+14
-0
Keras/keras-cv/keras_cv/datasets/pascal_voc/load.py
Keras/keras-cv/keras_cv/datasets/pascal_voc/load.py
+104
-0
Keras/keras-cv/keras_cv/datasets/pascal_voc/segmentation.py
Keras/keras-cv/keras_cv/datasets/pascal_voc/segmentation.py
+353
-0
Keras/keras-cv/keras_cv/datasets/pascal_voc/segmentation_test.py
...eras-cv/keras_cv/datasets/pascal_voc/segmentation_test.py
+287
-0
Keras/keras-cv/keras_cv/datasets/pascal_voc/test_data/VOC_mini.tar
...as-cv/keras_cv/datasets/pascal_voc/test_data/VOC_mini.tar
+0
-0
Keras/keras-cv/keras_cv/keypoint/__init__.py
Keras/keras-cv/keras_cv/keypoint/__init__.py
+18
-0
Keras/keras-cv/keras_cv/keypoint/converters.py
Keras/keras-cv/keras_cv/keypoint/converters.py
+195
-0
Keras/keras-cv/keras_cv/keypoint/converters_test.py
Keras/keras-cv/keras_cv/keypoint/converters_test.py
+149
-0
Keras/keras-cv/keras_cv/keypoint/formats.py
Keras/keras-cv/keras_cv/keypoint/formats.py
+63
-0
Keras/keras-cv/keras_cv/keypoint/utils.py
Keras/keras-cv/keras_cv/keypoint/utils.py
+46
-0
Keras/keras-cv/keras_cv/keypoint/utils_test.py
Keras/keras-cv/keras_cv/keypoint/utils_test.py
+51
-0
Keras/keras-cv/keras_cv/layers/__init__.py
Keras/keras-cv/keras_cv/layers/__init__.py
+75
-0
Keras/keras-cv/keras_cv/layers/feature_pyramid.py
Keras/keras-cv/keras_cv/layers/feature_pyramid.py
+205
-0
Keras/keras-cv/keras_cv/layers/feature_pyramid_test.py
Keras/keras-cv/keras_cv/layers/feature_pyramid_test.py
+113
-0
Keras/keras-cv/keras_cv/layers/object_detection/__init__.py
Keras/keras-cv/keras_cv/layers/object_detection/__init__.py
+13
-0
Keras/keras-cv/keras_cv/layers/object_detection/anchor_generator.py
...s-cv/keras_cv/layers/object_detection/anchor_generator.py
+279
-0
Keras/keras-cv/keras_cv/layers/object_detection/anchor_generator_test.py
...keras_cv/layers/object_detection/anchor_generator_test.py
+172
-0
Keras/keras-cv/keras_cv/layers/object_detection/nms_prediction_decoder.py
...eras_cv/layers/object_detection/nms_prediction_decoder.py
+135
-0
Keras/keras-cv/keras_cv/layers/object_detection/nms_prediction_decoder_test.py
...cv/layers/object_detection/nms_prediction_decoder_test.py
+50
-0
Keras/keras-cv/keras_cv/layers/object_detection/non_max_suppression.py
...v/keras_cv/layers/object_detection/non_max_suppression.py
+190
-0
No files found.
Keras/keras-cv/keras_cv/datasets/pascal_voc/__init__.py
0 → 100644
View file @
0016b0a7
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
keras_cv.datasets.pascal_voc.load
import
load
Keras/keras-cv/keras_cv/datasets/pascal_voc/load.py
0 → 100644
View file @
0016b0a7
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
tensorflow
as
tf
import
tensorflow_datasets
as
tfds
from
tensorflow
import
keras
from
keras_cv
import
bounding_box
def
curry_map_function
(
bounding_box_format
,
img_size
):
"""Mapping function to create batched image and bbox coordinates"""
if
img_size
is
not
None
:
resizing
=
keras
.
layers
.
Resizing
(
height
=
img_size
[
0
],
width
=
img_size
[
1
],
crop_to_aspect_ratio
=
False
)
# TODO(lukewood): update `keras.layers.Resizing` to support bounding boxes.
def
apply
(
inputs
):
# Support image size none.
if
img_size
is
not
None
:
inputs
[
"image"
]
=
resizing
(
inputs
[
"image"
])
inputs
[
"objects"
][
"bbox"
]
=
bounding_box
.
convert_format
(
inputs
[
"objects"
][
"bbox"
],
images
=
inputs
[
"image"
],
source
=
"rel_yxyx"
,
target
=
bounding_box_format
,
)
bounding_boxes
=
inputs
[
"objects"
][
"bbox"
]
labels
=
tf
.
cast
(
inputs
[
"objects"
][
"label"
],
tf
.
float32
)
labels
=
tf
.
expand_dims
(
labels
,
axis
=-
1
)
bounding_boxes
=
tf
.
concat
([
bounding_boxes
,
labels
],
axis
=-
1
)
return
{
"images"
:
inputs
[
"image"
],
"bounding_boxes"
:
bounding_boxes
}
return
apply
def
load
(
split
,
bounding_box_format
,
batch_size
=
None
,
shuffle_buffer
=
None
,
shuffle_files
=
True
,
img_size
=
None
,
):
"""Loads the PascalVOC 2007 dataset.
Usage:
```python
dataset, ds_info = keras_cv.datasets.pascal_voc.load(
split="train", bounding_box_format="xywh", batch_size=9
)
```
Args:
split: the split string passed to the `tensorflow_datasets.load()` call. Should
be one of "train", "test", or "validation."
bounding_box_format: the keras_cv bounding box format to load the boxes into.
For a list of supported formats, please Refer
[to the keras.io docs](https://keras.io/api/keras_cv/bounding_box/formats/)
for more details on supported bounding box formats.
batch_size: (Optional) how many instances to include in batches after loading. If
not provided, no batching will occur.
shuffle_buffer: (Optional) the size of the buffer to use in shuffling.
shuffle_files: (Optional) whether or not to shuffle files, defaults to True.
img_size: (Optional) size to resize the images to. By default, images are not
resized `tf.RaggedTensor` batches are produced if batching occurs.
Returns:
tf.data.Dataset containing PascalVOC. Each entry is a dictionary containing
keys {"images": images, "bounding_boxes": bounding_boxes} where images is a
Tensor of shape [batch, H, W, 3] and bounding_boxes is a `tf.RaggedTensor` of
shape [batch, None, 5].
"""
dataset
,
dataset_info
=
tfds
.
load
(
"voc/2007"
,
split
=
split
,
shuffle_files
=
shuffle_files
,
with_info
=
True
)
dataset
=
dataset
.
map
(
curry_map_function
(
bounding_box_format
=
bounding_box_format
,
img_size
=
img_size
),
num_parallel_calls
=
tf
.
data
.
AUTOTUNE
,
)
if
shuffle_buffer
:
dataset
=
dataset
.
shuffle
(
shuffle_buffer
,
reshuffle_each_iteration
=
True
)
if
batch_size
is
not
None
:
dataset
=
dataset
.
apply
(
tf
.
data
.
experimental
.
dense_to_ragged_batch
(
batch_size
=
batch_size
)
)
return
dataset
,
dataset_info
Keras/keras-cv/keras_cv/datasets/pascal_voc/segmentation.py
0 → 100644
View file @
0016b0a7
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Data loader for Pascal VOC 2012 segmentation dataset.
The image classification and object detection (bounding box) data is covered by existing
TF datasets in https://www.tensorflow.org/datasets/catalog/voc. The segmentation data (
both class segmentation and instance segmentation) are included in the VOC 2012, but not
offered by TF-DS yet. This module is trying to fill this gap while TFDS team can
address this feature (b/252870855, https://github.com/tensorflow/datasets/issues/27 and
https://github.com/tensorflow/datasets/pull/1198).
The schema design is similar to the existing design of TFDS, but trimmed to fit the need
of Keras CV models.
This module contains following functionalities:
1. Download and unpack original data from Pascal VOC.
2. Reprocess and build up dataset that include image, class label, object bounding boxes,
class and instance segmentation masks.
3. Produce tfrecords from the dataset.
4. Load existing tfrecords from result in 3.
"""
import
logging
import
multiprocessing
import
os.path
import
tarfile
import
xml
import
tensorflow
as
tf
DATA_URL
=
"http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar"
# Note that this list doesn't contain the background class. In the classification use
# case, the label is 0 based (aeroplane -> 0), whereas in segmentation use case, the 0 is
# reserved for background, so aeroplane maps to 1.
CLASSES
=
[
"aeroplane"
,
"bicycle"
,
"bird"
,
"boat"
,
"bottle"
,
"bus"
,
"car"
,
"cat"
,
"chair"
,
"cow"
,
"diningtable"
,
"dog"
,
"horse"
,
"motorbike"
,
"person"
,
"pottedplant"
,
"sheep"
,
"sofa"
,
"train"
,
"tvmonitor"
,
]
# This is used to map between string class to index.
CLASS_TO_INDEX
=
{
name
:
index
for
index
,
name
in
enumerate
(
CLASSES
)}
# For the mask data in the PNG file, the encoded raw pixel value need be to converted
# to the proper class index. In the following map, [0, 0, 0] will be convert to 0, and
# [128, 0, 0] will be conveted to 1, so on so forth. Also note that the mask class is 1
# base since class 0 is reserved for the background. The [128, 0, 0] (class 1) is mapped
# to `aeroplane`.
VOC_PNG_COLOR_VALUE
=
[
[
0
,
0
,
0
],
[
128
,
0
,
0
],
[
0
,
128
,
0
],
[
128
,
128
,
0
],
[
0
,
0
,
128
],
[
128
,
0
,
128
],
[
0
,
128
,
128
],
[
128
,
128
,
128
],
[
64
,
0
,
0
],
[
192
,
0
,
0
],
[
64
,
128
,
0
],
[
192
,
128
,
0
],
[
64
,
0
,
128
],
[
192
,
0
,
128
],
[
64
,
128
,
128
],
[
192
,
128
,
128
],
[
0
,
64
,
0
],
[
128
,
64
,
0
],
[
0
,
192
,
0
],
[
128
,
192
,
0
],
[
0
,
64
,
128
],
]
# Will be populated by _maybe_populate_voc_color_mapping() below.
VOC_PNG_COLOR_MAPPING
=
None
def
_maybe_populate_voc_color_mapping
():
# Lazy creation of VOC_PNG_COLOR_MAPPING, which could take 64M memory.
global
VOC_PNG_COLOR_MAPPING
if
VOC_PNG_COLOR_MAPPING
is
None
:
VOC_PNG_COLOR_MAPPING
=
[
0
]
*
(
256
**
3
)
for
i
,
colormap
in
enumerate
(
VOC_PNG_COLOR_VALUE
):
VOC_PNG_COLOR_MAPPING
[
(
colormap
[
0
]
*
256
+
colormap
[
1
])
*
256
+
colormap
[
2
]
]
=
i
# There is a special mapping with [224, 224, 192] -> 255
VOC_PNG_COLOR_MAPPING
[
224
*
256
*
256
+
224
*
256
+
192
]
=
255
VOC_PNG_COLOR_MAPPING
=
tf
.
constant
(
VOC_PNG_COLOR_MAPPING
)
return
VOC_PNG_COLOR_MAPPING
def
_download_pascal_voc_2012
(
data_url
,
local_dir_path
=
None
,
override_extract
=
False
):
"""Fetch the original Pascal VOC 2012 from remote URL.
Args:
data_url: string, the URL for the Pascal VOC data, should be in a tar package.
local_dir_path: string, the local directory path to save the data.
Returns:
the path to the folder of extracted Pascal VOC data.
"""
if
not
local_dir_path
:
fname
=
"pascal_voc_2012/data.tar"
else
:
# Make sure the directory exists
if
not
os
.
path
.
exists
(
local_dir_path
):
os
.
makedirs
(
local_dir_path
,
exist_ok
=
True
)
fname
=
os
.
path
.
join
(
local_dir_path
,
"data.tar"
)
data_file_path
=
tf
.
keras
.
utils
.
get_file
(
fname
=
fname
,
origin
=
data_url
)
logging
.
info
(
"Received data file from %s"
,
data_file_path
)
# Extra the data into the same directory as the tar file.
data_directory
=
os
.
path
.
dirname
(
data_file_path
)
# Note that the extracted data will be located in a folder `VOCdevkit` (from tar).
# If the folder is already there and `override_extract` is False, then we will skip
# extracting the folder again.
if
override_extract
or
not
os
.
path
.
exists
(
os
.
path
.
join
(
data_directory
,
"VOCdevkit"
)
):
logging
.
info
(
"Extract data into %s"
,
data_directory
)
with
tarfile
.
open
(
data_file_path
)
as
f
:
f
.
extractall
(
data_directory
)
return
os
.
path
.
join
(
data_directory
,
"VOCdevkit"
,
"VOC2012"
)
def
_parse_annotation_data
(
annotation_file_path
):
"""Parse the annotation XML file for the image.
The annotation contains the metadata, as well as the object bounding box information.
"""
with
tf
.
io
.
gfile
.
GFile
(
annotation_file_path
,
"r"
)
as
f
:
root
=
xml
.
etree
.
ElementTree
.
parse
(
f
).
getroot
()
size
=
root
.
find
(
"size"
)
width
=
int
(
size
.
find
(
"width"
).
text
)
height
=
int
(
size
.
find
(
"height"
).
text
)
objects
=
[]
for
obj
in
root
.
findall
(
"object"
):
# Get object's label name.
label
=
CLASS_TO_INDEX
[
obj
.
find
(
"name"
).
text
.
lower
()]
# Get objects' pose name.
pose
=
obj
.
find
(
"pose"
).
text
.
lower
()
is_truncated
=
obj
.
find
(
"truncated"
).
text
==
"1"
is_difficult
=
obj
.
find
(
"difficult"
).
text
==
"1"
bndbox
=
obj
.
find
(
"bndbox"
)
xmax
=
int
(
bndbox
.
find
(
"xmax"
).
text
)
xmin
=
int
(
bndbox
.
find
(
"xmin"
).
text
)
ymax
=
int
(
bndbox
.
find
(
"ymax"
).
text
)
ymin
=
int
(
bndbox
.
find
(
"ymin"
).
text
)
objects
.
append
(
{
"label"
:
label
,
"pose"
:
pose
,
"bbox"
:
[
ymin
,
xmin
,
ymax
,
xmax
],
"is_truncated"
:
is_truncated
,
"is_difficult"
:
is_difficult
,
}
)
return
{
"width"
:
width
,
"height"
:
height
,
"objects"
:
objects
}
def
_get_image_ids
(
data_dir
,
split
):
data_file_mapping
=
{
"train"
:
"train.txt"
,
"eval"
:
"val.txt"
,
None
:
"trainval.txt"
}
with
tf
.
io
.
gfile
.
GFile
(
os
.
path
.
join
(
data_dir
,
"ImageSets"
,
"Segmentation"
,
data_file_mapping
[
split
]),
"r"
,
)
as
f
:
image_ids
=
f
.
read
().
splitlines
()
logging
.
info
(
f
"Received
{
len
(
image_ids
)
}
images for
{
split
}
dataset."
)
return
image_ids
def
_parse_single_image
(
image_file_path
):
data_dir
,
image_file_name
=
os
.
path
.
split
(
image_file_path
)
data_dir
=
os
.
path
.
normpath
(
os
.
path
.
join
(
data_dir
,
os
.
path
.
pardir
))
image_id
,
_
=
os
.
path
.
splitext
(
image_file_name
)
class_segmentation_file_path
=
os
.
path
.
join
(
data_dir
,
"SegmentationClass"
,
image_id
+
".png"
)
object_segmentation_file_path
=
os
.
path
.
join
(
data_dir
,
"SegmentationObject"
,
image_id
+
".png"
)
annotation_file_path
=
os
.
path
.
join
(
data_dir
,
"Annotations"
,
image_id
+
".xml"
)
image_annotations
=
_parse_annotation_data
(
annotation_file_path
)
result
=
{
"image/filename"
:
image_id
+
".jpg"
,
"image/file_path"
:
image_file_path
,
"segmentation/class/file_path"
:
class_segmentation_file_path
,
"segmentation/object/file_path"
:
object_segmentation_file_path
,
}
result
.
update
(
image_annotations
)
# Labels field should be same as the 'object.label'
labels
=
list
(
set
([
o
[
"label"
]
for
o
in
result
[
"objects"
]]))
result
[
"labels"
]
=
sorted
(
labels
)
return
result
def
_build_metadata
(
data_dir
,
image_ids
):
# Parallel process all the images.
image_file_paths
=
[
os
.
path
.
join
(
data_dir
,
"JPEGImages"
,
i
+
".jpg"
)
for
i
in
image_ids
]
pool_size
=
10
if
len
(
image_ids
)
>
10
else
len
(
image_ids
)
with
multiprocessing
.
Pool
(
pool_size
)
as
p
:
metadata
=
p
.
map
(
_parse_single_image
,
image_file_paths
)
# Transpose the metadata which convert from list of dict to dict of list.
keys
=
[
"image/filename"
,
"image/file_path"
,
"segmentation/class/file_path"
,
"segmentation/object/file_path"
,
"labels"
,
"width"
,
"height"
,
]
result
=
{}
for
key
in
keys
:
values
=
[
value
[
key
]
for
value
in
metadata
]
result
[
key
]
=
values
# The ragged objects need some special handling
for
key
in
[
"label"
,
"pose"
,
"bbox"
,
"is_truncated"
,
"is_difficult"
]:
values
=
[]
objects
=
[
value
[
"objects"
]
for
value
in
metadata
]
for
object
in
objects
:
values
.
append
([
o
[
key
]
for
o
in
object
])
result
[
"objects/"
+
key
]
=
values
return
result
# With jit_compile=True, there will be 0.4 sec compilation overhead, but save about 0.2
# sec per 1000 images. See https://github.com/keras-team/keras-cv/pull/943#discussion_r1001092882
# for more details.
@
tf
.
function
(
jit_compile
=
True
)
def
_decode_png_mask
(
mask
):
"""Decode the raw PNG image and convert it to 2D tensor with probably class."""
# Cast the mask to int32 since the original uint8 will overflow when multiple with 256
mask
=
tf
.
cast
(
mask
,
tf
.
int32
)
mask
=
mask
[:,
:,
0
]
*
256
*
256
+
mask
[:,
:,
1
]
*
256
+
mask
[:,
:,
2
]
mask
=
tf
.
expand_dims
(
tf
.
gather
(
VOC_PNG_COLOR_MAPPING
,
mask
),
-
1
)
mask
=
tf
.
cast
(
mask
,
tf
.
uint8
)
return
mask
def
_load_images
(
example
):
image_file_path
=
example
.
pop
(
"image/file_path"
)
segmentation_class_file_path
=
example
.
pop
(
"segmentation/class/file_path"
)
segmentation_object_file_path
=
example
.
pop
(
"segmentation/object/file_path"
)
image
=
tf
.
io
.
read_file
(
image_file_path
)
image
=
tf
.
image
.
decode_jpeg
(
image
)
segmentation_class_mask
=
tf
.
io
.
read_file
(
segmentation_class_file_path
)
segmentation_class_mask
=
tf
.
image
.
decode_png
(
segmentation_class_mask
)
segmentation_class_mask
=
_decode_png_mask
(
segmentation_class_mask
)
segmentation_object_mask
=
tf
.
io
.
read_file
(
segmentation_object_file_path
)
segmentation_object_mask
=
tf
.
image
.
decode_png
(
segmentation_object_mask
)
segmentation_object_mask
=
_decode_png_mask
(
segmentation_object_mask
)
example
.
update
(
{
"image"
:
image
,
"class_segmentation"
:
segmentation_class_mask
,
"object_segmentation"
:
segmentation_object_mask
,
}
)
return
example
def
_build_dataset_from_metadata
(
metadata
):
# The objects need some manual conversion to ragged tensor.
metadata
[
"labels"
]
=
tf
.
ragged
.
constant
(
metadata
[
"labels"
])
metadata
[
"objects/label"
]
=
tf
.
ragged
.
constant
(
metadata
[
"objects/label"
])
metadata
[
"objects/pose"
]
=
tf
.
ragged
.
constant
(
metadata
[
"objects/pose"
])
metadata
[
"objects/is_truncated"
]
=
tf
.
ragged
.
constant
(
metadata
[
"objects/is_truncated"
]
)
metadata
[
"objects/is_difficult"
]
=
tf
.
ragged
.
constant
(
metadata
[
"objects/is_difficult"
]
)
metadata
[
"objects/bbox"
]
=
tf
.
ragged
.
constant
(
metadata
[
"objects/bbox"
],
ragged_rank
=
1
)
dataset
=
tf
.
data
.
Dataset
.
from_tensor_slices
(
metadata
)
dataset
=
dataset
.
map
(
_load_images
,
num_parallel_calls
=
tf
.
data
.
AUTOTUNE
)
return
dataset
def
load
(
split
=
"train"
,
data_dir
=
None
,
):
"""Load the Pacal VOC 2012 dataset.
This function will download the data tar file from remote if needed, and untar to
the local `data_dir`, and build dataset from it.
Args:
split: string, can be 'train', 'eval', or None. When None, both train and eval data
will be loaded. Default to `train`
data_dir: string, local directory path for the loaded data. This will be used to
download the data file, and unzip. It will be used as a cach directory.
Default to None, and `~/.keras/pascal_voc_2012` will be used.
"""
supported_split_value
=
[
"train"
,
"eval"
,
None
]
if
split
not
in
supported_split_value
:
raise
ValueError
(
f
"The support value for `split` are
{
supported_split_value
}
. "
f
"Got:
{
split
}
"
)
if
data_dir
is
not
None
:
data_dir
=
os
.
path
.
expanduser
(
data_dir
)
data_dir
=
_download_pascal_voc_2012
(
DATA_URL
,
local_dir_path
=
data_dir
)
image_ids
=
_get_image_ids
(
data_dir
,
split
)
metadata
=
_build_metadata
(
data_dir
,
image_ids
)
_maybe_populate_voc_color_mapping
()
dataset
=
_build_dataset_from_metadata
(
metadata
)
return
dataset
Keras/keras-cv/keras_cv/datasets/pascal_voc/segmentation_test.py
0 → 100644
View file @
0016b0a7
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
pathlib
import
sys
import
tensorflow
as
tf
from
absl
import
flags
from
keras_cv.datasets.pascal_voc
import
segmentation
class
PascalVocSegmentationDataTest
(
tf
.
test
.
TestCase
):
def
setUp
(
self
):
super
().
setUp
()
self
.
tempdir
=
self
.
get_tempdir
()
# Note that this will not work with bazel, need to be rewrite into relying on
# FLAGS.test_srcdir
self
.
test_data_tar_path
=
os
.
path
.
abspath
(
os
.
path
.
join
(
os
.
path
.
abspath
(
__file__
),
os
.
path
.
pardir
,
"test_data"
,
"VOC_mini.tar"
)
)
def
get_tempdir
(
self
):
try
:
flags
.
FLAGS
.
test_tmpdir
except
flags
.
UnparsedFlagAccessError
:
# Need to initialize flags when running `pytest`.
flags
.
FLAGS
(
sys
.
argv
,
known_only
=
True
)
return
self
.
create_tempdir
().
full_path
def
test_download_data
(
self
):
# Since the original data package is too large, we use a small package as a
# replacement.
local_data_dir
=
os
.
path
.
join
(
self
.
tempdir
,
"pascal_voc_2012/"
)
test_data_dir
=
segmentation
.
_download_pascal_voc_2012
(
data_url
=
pathlib
.
Path
(
self
.
test_data_tar_path
).
as_uri
(),
local_dir_path
=
local_data_dir
,
)
self
.
assertTrue
(
os
.
path
.
exists
(
test_data_dir
))
# Make sure the data is unzipped correctly and populated with correct content
expected_subdirs
=
[
"Annotations"
,
"ImageSets"
,
"JPEGImages"
,
"SegmentationClass"
,
"SegmentationObject"
,
]
for
sub_dir
in
expected_subdirs
:
self
.
assertTrue
(
os
.
path
.
exists
(
os
.
path
.
join
(
test_data_dir
,
sub_dir
)))
def
test_skip_download_and_override
(
self
):
local_data_dir
=
os
.
path
.
join
(
self
.
tempdir
,
"pascal_voc_2012/"
)
test_data_dir
=
segmentation
.
_download_pascal_voc_2012
(
data_url
=
pathlib
.
Path
(
self
.
test_data_tar_path
).
as_uri
(),
local_dir_path
=
local_data_dir
,
)
# Touch a file in the test_data_dir and make sure it exists (not being override)
# when invoke the _download_pascal_voc_2012 again
os
.
makedirs
(
os
.
path
.
join
(
test_data_dir
,
"Annotations"
,
"dummy_dir"
))
segmentation
.
_download_pascal_voc_2012
(
data_url
=
pathlib
.
Path
(
self
.
test_data_tar_path
).
as_uri
(),
local_dir_path
=
local_data_dir
,
override_extract
=
False
,
)
self
.
assertTrue
(
os
.
path
.
exists
(
os
.
path
.
join
(
test_data_dir
,
"Annotations"
,
"dummy_dir"
))
)
def
test_get_image_ids
(
self
):
local_data_dir
=
os
.
path
.
join
(
self
.
tempdir
,
"pascal_voc_2012/"
)
data_dir
=
segmentation
.
_download_pascal_voc_2012
(
data_url
=
pathlib
.
Path
(
self
.
test_data_tar_path
).
as_uri
(),
local_dir_path
=
local_data_dir
,
)
train_ids
=
[
"2007_000032"
,
"2007_000039"
,
"2007_000063"
]
eval_ids
=
[
"2007_000033"
]
train_eval_ids
=
train_ids
+
eval_ids
self
.
assertEquals
(
segmentation
.
_get_image_ids
(
data_dir
,
"train"
),
train_ids
)
self
.
assertEquals
(
segmentation
.
_get_image_ids
(
data_dir
,
"eval"
),
eval_ids
)
self
.
assertEquals
(
segmentation
.
_get_image_ids
(
data_dir
,
None
),
train_eval_ids
)
def
test_parse_annotation_file
(
self
):
local_data_dir
=
os
.
path
.
join
(
self
.
tempdir
,
"pascal_voc_2012/"
)
data_dir
=
segmentation
.
_download_pascal_voc_2012
(
data_url
=
pathlib
.
Path
(
self
.
test_data_tar_path
).
as_uri
(),
local_dir_path
=
local_data_dir
,
)
# One of the train file.
annotation_file
=
os
.
path
.
join
(
data_dir
,
"Annotations"
,
"2007_000032.xml"
)
metadata
=
segmentation
.
_parse_annotation_data
(
annotation_file
)
expected_result
=
{
"height"
:
281
,
"width"
:
500
,
"objects"
:
[
{
"label"
:
0
,
"pose"
:
"frontal"
,
"bbox"
:
[
78
,
104
,
183
,
375
],
"is_truncated"
:
False
,
"is_difficult"
:
False
,
},
{
"label"
:
0
,
"pose"
:
"left"
,
"bbox"
:
[
88
,
133
,
123
,
197
],
"is_truncated"
:
False
,
"is_difficult"
:
False
,
},
{
"label"
:
14
,
"pose"
:
"rear"
,
"bbox"
:
[
180
,
195
,
229
,
213
],
"is_truncated"
:
False
,
"is_difficult"
:
False
,
},
{
"label"
:
14
,
"pose"
:
"rear"
,
"bbox"
:
[
189
,
26
,
238
,
44
],
"is_truncated"
:
False
,
"is_difficult"
:
False
,
},
],
}
self
.
assertEquals
(
metadata
,
expected_result
)
def
test_decode_png_mask
(
self
):
local_data_dir
=
os
.
path
.
join
(
self
.
tempdir
,
"pascal_voc_2012/"
)
data_dir
=
segmentation
.
_download_pascal_voc_2012
(
data_url
=
pathlib
.
Path
(
self
.
test_data_tar_path
).
as_uri
(),
local_dir_path
=
local_data_dir
,
)
mask_file
=
os
.
path
.
join
(
data_dir
,
"SegmentationClass"
,
"2007_000032.png"
)
mask
=
tf
.
io
.
decode_png
(
tf
.
io
.
read_file
(
mask_file
))
segmentation
.
_maybe_populate_voc_color_mapping
()
mask
=
segmentation
.
_decode_png_mask
(
mask
)
self
.
assertEquals
(
mask
.
shape
,
(
281
,
500
,
1
))
self
.
assertEquals
(
tf
.
reduce_max
(
mask
),
255
)
# The 255 value is for the boundary
self
.
assertEquals
(
tf
.
reduce_min
(
mask
),
0
)
# The 0 value is for the background
# The mask contains two classes, 1 and 15, see the label section in the previous
# test case.
self
.
assertEquals
(
tf
.
reduce_sum
(
tf
.
cast
(
tf
.
equal
(
mask
,
1
),
tf
.
int32
)),
4734
)
self
.
assertEquals
(
tf
.
reduce_sum
(
tf
.
cast
(
tf
.
equal
(
mask
,
15
),
tf
.
int32
)),
866
)
def
test_parse_single_image
(
self
):
local_data_dir
=
os
.
path
.
join
(
self
.
tempdir
,
"pascal_voc_2012/"
)
data_dir
=
segmentation
.
_download_pascal_voc_2012
(
data_url
=
pathlib
.
Path
(
self
.
test_data_tar_path
).
as_uri
(),
local_dir_path
=
local_data_dir
,
)
image_file
=
os
.
path
.
join
(
data_dir
,
"JPEGImages"
,
"2007_000032.jpg"
)
result_dict
=
segmentation
.
_parse_single_image
(
image_file
)
expected_result
=
{
"image/filename"
:
"2007_000032.jpg"
,
"image/file_path"
:
image_file
,
"height"
:
281
,
"width"
:
500
,
"objects"
:
[
{
"label"
:
0
,
"pose"
:
"frontal"
,
"bbox"
:
[
78
,
104
,
183
,
375
],
"is_truncated"
:
False
,
"is_difficult"
:
False
,
},
{
"label"
:
0
,
"pose"
:
"left"
,
"bbox"
:
[
88
,
133
,
123
,
197
],
"is_truncated"
:
False
,
"is_difficult"
:
False
,
},
{
"label"
:
14
,
"pose"
:
"rear"
,
"bbox"
:
[
180
,
195
,
229
,
213
],
"is_truncated"
:
False
,
"is_difficult"
:
False
,
},
{
"label"
:
14
,
"pose"
:
"rear"
,
"bbox"
:
[
189
,
26
,
238
,
44
],
"is_truncated"
:
False
,
"is_difficult"
:
False
,
},
],
"labels"
:
[
0
,
14
],
"segmentation/class/file_path"
:
os
.
path
.
join
(
data_dir
,
"SegmentationClass"
,
"2007_000032.png"
),
"segmentation/object/file_path"
:
os
.
path
.
join
(
data_dir
,
"SegmentationObject"
,
"2007_000032.png"
),
}
self
.
assertEquals
(
result_dict
,
expected_result
)
def
test_build_metadata
(
self
):
local_data_dir
=
os
.
path
.
join
(
self
.
tempdir
,
"pascal_voc_2012/"
)
data_dir
=
segmentation
.
_download_pascal_voc_2012
(
data_url
=
pathlib
.
Path
(
self
.
test_data_tar_path
).
as_uri
(),
local_dir_path
=
local_data_dir
,
)
image_ids
=
segmentation
.
_get_image_ids
(
data_dir
,
None
)
metadata
=
segmentation
.
_build_metadata
(
data_dir
,
image_ids
)
self
.
assertEquals
(
metadata
[
"image/filename"
],
[
"2007_000032.jpg"
,
"2007_000039.jpg"
,
"2007_000063.jpg"
,
"2007_000033.jpg"
,
],
)
expected_keys
=
[
"image/filename"
,
"image/file_path"
,
"segmentation/class/file_path"
,
"segmentation/object/file_path"
,
"labels"
,
"width"
,
"height"
,
"objects/label"
,
"objects/pose"
,
"objects/bbox"
,
"objects/is_truncated"
,
"objects/is_difficult"
,
]
for
key
in
expected_keys
:
self
.
assertLen
(
metadata
[
key
],
4
)
def
test_build_dataset
(
self
):
local_data_dir
=
os
.
path
.
join
(
self
.
tempdir
,
"pascal_voc_2012/"
)
data_dir
=
segmentation
.
_download_pascal_voc_2012
(
data_url
=
pathlib
.
Path
(
self
.
test_data_tar_path
).
as_uri
(),
local_dir_path
=
local_data_dir
,
)
image_ids
=
segmentation
.
_get_image_ids
(
data_dir
,
None
)
metadata
=
segmentation
.
_build_metadata
(
data_dir
,
image_ids
)
segmentation
.
_maybe_populate_voc_color_mapping
()
dataset
=
segmentation
.
_build_dataset_from_metadata
(
metadata
)
entry
=
next
(
dataset
.
take
(
1
).
as_numpy_iterator
())
self
.
assertEquals
(
entry
[
"image/filename"
],
b
"2007_000032.jpg"
)
expected_keys
=
[
"image"
,
"image/filename"
,
"labels"
,
"width"
,
"height"
,
"objects/label"
,
"objects/pose"
,
"objects/bbox"
,
"objects/is_truncated"
,
"objects/is_difficult"
,
"class_segmentation"
,
"object_segmentation"
,
]
for
key
in
expected_keys
:
self
.
assertIn
(
key
,
entry
)
# Check the mask png content
png
=
entry
[
"class_segmentation"
]
self
.
assertEquals
(
png
.
shape
,
(
281
,
500
,
1
))
self
.
assertEquals
(
tf
.
reduce_max
(
png
),
255
)
# The 255 value is for the boundary
self
.
assertEquals
(
tf
.
reduce_min
(
png
),
0
)
# The 0 value is for the background
# The mask contains two classes, 1 and 15, see the label section in the previous
# test case.
self
.
assertEquals
(
tf
.
reduce_sum
(
tf
.
cast
(
tf
.
equal
(
png
,
1
),
tf
.
int32
)),
4734
)
self
.
assertEquals
(
tf
.
reduce_sum
(
tf
.
cast
(
tf
.
equal
(
png
,
15
),
tf
.
int32
)),
866
)
Keras/keras-cv/keras_cv/datasets/pascal_voc/test_data/VOC_mini.tar
0 → 100644
View file @
0016b0a7
File added
Keras/keras-cv/keras_cv/keypoint/__init__.py
0 → 100644
View file @
0016b0a7
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
keras_cv.keypoint.converters
import
convert_format
from
keras_cv.keypoint.formats
import
REL_XY
from
keras_cv.keypoint.formats
import
XY
from
keras_cv.keypoint.utils
import
filter_out_of_image
Keras/keras-cv/keras_cv/keypoint/converters.py
0 → 100644
View file @
0016b0a7
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Converter functions for working with keypoints formats."""
import
tensorflow
as
tf
# Internal exception
class
_RequiresImagesException
(
Exception
):
pass
def
_rel_xy_to_xy
(
keypoints
,
images
=
None
):
if
images
is
None
:
raise
_RequiresImagesException
()
shape
=
tf
.
cast
(
tf
.
shape
(
images
),
keypoints
.
dtype
)
h
,
w
=
shape
[
1
],
shape
[
2
]
x
,
y
,
rest
=
tf
.
split
(
keypoints
,
[
1
,
1
,
keypoints
.
shape
[
-
1
]
-
2
],
axis
=-
1
)
return
tf
.
concat
([
x
*
w
,
y
*
h
,
rest
],
axis
=-
1
)
def
_xy_to_rel_xy
(
keypoints
,
images
=
None
):
if
images
is
None
:
raise
_RequiresImagesException
()
shape
=
tf
.
cast
(
tf
.
shape
(
images
),
keypoints
.
dtype
)
h
,
w
=
shape
[
1
],
shape
[
2
]
x
,
y
,
rest
=
tf
.
split
(
keypoints
,
[
1
,
1
,
keypoints
.
shape
[
-
1
]
-
2
],
axis
=-
1
)
return
tf
.
concat
([
x
/
w
,
y
/
h
,
rest
],
axis
=-
1
)
def
_xy_noop
(
keypoints
,
images
=
None
):
return
keypoints
TO_XY_CONVERTERS
=
{
"xy"
:
_xy_noop
,
"rel_xy"
:
_rel_xy_to_xy
,
}
FROM_XY_CONVERTERS
=
{
"xy"
:
_xy_noop
,
"rel_xy"
:
_xy_to_rel_xy
,
}
def
convert_format
(
keypoints
,
source
,
target
,
images
=
None
,
dtype
=
None
):
"""Converts keypoints from one format to another.
Supported formats are:
- `"xy"`, absolute pixel positions.
- `"rel_xyxy"`. relative pixel positions.
Formats are case insensitive. It is recommended that you
capitalize width and height to maximize the visual difference
between `"xyWH"` and `"xyxy"`.
Relative formats, abbreviated `rel`, make use of the shapes of the
`images` passsed. In these formats, the coordinates, widths, and
heights are all specified as percentages of the host image.
`images` may be a ragged Tensor. Note that using a ragged Tensor
for images may cause a substantial performance loss, as each image
will need to be processed separately due to the mismatching image
shapes.
Usage:
```python
images, keypoints = load_my_dataset()
keypoints_in_rel = keras_cv.keypoint.convert_format(
keypoint,
source='xy',
target='rel_xy',
images=images,
)
```
Args:
keypoints: tf.Tensor or tf.RaggedTensor representing keypoints
in the format specified in the `source` parameter.
`keypoints` can optionally have extra dimensions stacked
on the final axis to store metadata. keypoints should
have a rank between 2 and 4, with the shape
`[num_boxes,*]`, `[batch_size, num_boxes, *]` or
`[batch_size, num_groups, num_keypoints,*]`.
source: One of {" ".join([f'"{f}"' for f in
TO_XY_CONVERTERS.keys()])}. Used to specify the original
format of the `boxes` parameter.
target: One of {" ".join([f'"{f}"' for f in
TO_XY_CONVERTERS.keys()])}. Used to specify the
destination format of the `boxes` parameter.
images: (Optional) a batch of images aligned with `boxes` on
the first axis. Should be rank 3 (`HWC` format) or 4
(`BHWC` format). Used in some converters to compute
relative pixel values of the bounding box dimensions.
Required when transforming from a rel format to a non-rel
format.
dtype: the data type to use when transforming the boxes.
Defaults to None, i.e. `keypoints` dtype.
"""
source
=
source
.
lower
()
target
=
target
.
lower
()
if
source
not
in
TO_XY_CONVERTERS
:
raise
ValueError
(
f
"convert_format() received an unsupported format for the argument "
f
"`source`. `source` should be one of
{
TO_XY_CONVERTERS
.
keys
()
}
. "
f
"Got source=
{
source
}
"
)
if
target
not
in
FROM_XY_CONVERTERS
:
raise
ValueError
(
f
"convert_format() received an unsupported format for the argument "
f
"`target`. `target` should be one of
{
FROM_XY_CONVERTERS
.
keys
()
}
. "
f
"Got target=
{
target
}
"
)
if
dtype
:
keypoints
=
tf
.
cast
(
keypoints
,
dtype
)
if
source
==
target
:
return
keypoints
keypoints
,
images
,
squeeze_axis
=
_format_inputs
(
keypoints
,
images
)
try
:
in_xy
=
TO_XY_CONVERTERS
[
source
](
keypoints
,
images
=
images
)
result
=
FROM_XY_CONVERTERS
[
target
](
in_xy
,
images
=
images
)
except
_RequiresImagesException
:
raise
ValueError
(
"convert_format() must receive `images` when transforming "
f
"between relative and absolute formats. "
f
"convert_format() received source=`
{
source
}
`, target=`
{
target
}
`, "
f
"but images=
{
images
}
"
)
return
_format_outputs
(
result
,
squeeze_axis
)
def
_format_inputs
(
keypoints
,
images
):
keypoints_rank
=
len
(
keypoints
.
shape
)
if
keypoints_rank
>
4
:
raise
ValueError
(
"Expected keypoints rank to be in [2, 4], got "
f
"len(keypoints.shape)=
{
keypoints_rank
}
."
)
keypoints_includes_batch
=
keypoints_rank
>
2
keypoints_are_grouped
=
keypoints_rank
==
4
if
images
is
not
None
:
images_rank
=
len
(
images
.
shape
)
if
images_rank
>
4
or
images_rank
<
3
:
raise
ValueError
(
"Expected images rank to be 3 or 4, got "
f
"len(images.shape)=
{
images_rank
}
."
)
images_include_batch
=
images_rank
==
4
if
keypoints_includes_batch
!=
images_include_batch
:
raise
ValueError
(
"convert_format() expects both `keypoints` and `images` to be batched "
f
"or both unbatched. Received len(keypoints.shape)=
{
keypoints_rank
}
, "
f
"len(images.shape)=
{
images_rank
}
. Expected either "
"len(keypoints.shape)=2 and len(images.shape)=3, or "
"len(keypoints.shape)>=3 and len(images.shape)=4."
)
if
not
images_include_batch
:
images
=
tf
.
expand_dims
(
images
,
axis
=
0
)
squeeze_axis
=
[]
if
not
keypoints_includes_batch
:
keypoints
=
tf
.
expand_dims
(
keypoints
,
axis
=
0
)
squeeze_axis
.
append
(
0
)
if
not
keypoints_are_grouped
:
keypoints
=
tf
.
expand_dims
(
keypoints
,
axis
=
1
)
squeeze_axis
.
append
(
1
)
return
keypoints
,
images
,
squeeze_axis
def
_format_outputs
(
result
,
squeeze_axis
):
if
len
(
squeeze_axis
)
==
0
:
return
result
return
tf
.
squeeze
(
result
,
axis
=
squeeze_axis
)
Keras/keras-cv/keras_cv/keypoint/converters_test.py
0 → 100644
View file @
0016b0a7
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
itertools
import
tensorflow
as
tf
from
absl.testing
import
parameterized
from
keras_cv
import
keypoint
xy_keypoints
=
tf
.
constant
(
[[[
10
,
20
],
[
110
,
120
],
[
210
,
220
]],
[[
20
,
30
],
[
120
,
130
],
[
220
,
230
]]],
dtype
=
tf
.
float32
,
)
rel_xy_keypoints
=
tf
.
constant
(
[
[[
0.01
,
0.04
],
[
0.11
,
0.24
],
[
0.21
,
0.44
]],
[[
0.02
,
0.06
],
[
0.12
,
0.26
],
[
0.22
,
0.46
]],
],
dtype
=
tf
.
float32
,
)
images
=
tf
.
ones
([
2
,
500
,
1000
,
3
])
keypoints
=
{
"xy"
:
xy_keypoints
,
"rel_xy"
:
rel_xy_keypoints
,
}
test_cases
=
[
(
f
"
{
source
}
_
{
target
}
"
,
source
,
target
)
for
(
source
,
target
)
in
itertools
.
permutations
(
keypoints
.
keys
(),
2
)
]
+
[(
"xy_xy"
,
"xy"
,
"xy"
)]
class
ConvertersTestCase
(
tf
.
test
.
TestCase
,
parameterized
.
TestCase
):
@
parameterized
.
named_parameters
(
*
test_cases
)
def
test_converters
(
self
,
source
,
target
):
source_keypoints
=
keypoints
[
source
]
target_keypoints
=
keypoints
[
target
]
self
.
assertAllClose
(
keypoint
.
convert_format
(
source_keypoints
,
source
=
source
,
target
=
target
,
images
=
images
),
target_keypoints
,
)
@
parameterized
.
named_parameters
(
*
test_cases
)
def
test_converters_unbatched
(
self
,
source
,
target
):
source_keypoints
=
keypoints
[
source
][
0
]
target_keypoints
=
keypoints
[
target
][
0
]
self
.
assertAllClose
(
keypoint
.
convert_format
(
source_keypoints
,
source
=
source
,
target
=
target
,
images
=
images
[
0
]
),
target_keypoints
,
)
@
parameterized
.
named_parameters
(
*
test_cases
)
def
test_converters_ragged_groups
(
self
,
source
,
target
):
source_keypoints
=
keypoints
[
source
]
target_keypoints
=
keypoints
[
target
]
def
create_ragged_group
(
ins
):
res
=
[]
for
b
,
groups
in
zip
(
ins
,
[[
1
,
2
],
[
0
,
3
]]):
res
.
append
(
tf
.
RaggedTensor
.
from_row_lengths
(
b
,
groups
))
return
tf
.
stack
(
res
,
axis
=
0
)
source_keypoints
=
create_ragged_group
(
source_keypoints
)
target_keypoints
=
create_ragged_group
(
target_keypoints
)
self
.
assertAllClose
(
keypoint
.
convert_format
(
source_keypoints
,
source
=
source
,
target
=
target
,
images
=
images
),
target_keypoints
,
)
@
parameterized
.
named_parameters
(
*
test_cases
)
def
test_converters_with_metadata
(
self
,
source
,
target
):
source_keypoints
=
keypoints
[
source
]
target_keypoints
=
keypoints
[
target
]
def
add_metadata
(
ins
):
return
tf
.
concat
([
ins
,
tf
.
ones
([
2
,
3
,
5
])],
axis
=-
1
)
source_keypoints
=
add_metadata
(
source_keypoints
)
target_keypoints
=
add_metadata
(
target_keypoints
)
self
.
assertAllClose
(
keypoint
.
convert_format
(
source_keypoints
,
source
=
source
,
target
=
target
,
images
=
images
),
target_keypoints
,
)
def
test_raise_errors_when_missing_shape
(
self
):
with
self
.
assertRaises
(
ValueError
)
as
e
:
keypoint
.
convert_format
(
keypoints
[
"xy"
],
source
=
"xy"
,
target
=
"rel_xy"
)
self
.
assertEqual
(
str
(
e
.
exception
),
"convert_format() must receive `images` when transforming "
"between relative and absolute formats. convert_format() "
"received source=`xy`, target=`rel_xy`, but images=None"
,
)
@
parameterized
.
named_parameters
(
(
"keypoint_rank"
,
tf
.
ones
([
2
,
3
,
4
,
2
,
1
]),
None
,
"Expected keypoints rank to be in [2, 4], got len(keypoints.shape)=5."
,
),
(
"images_rank"
,
tf
.
ones
([
4
,
2
]),
tf
.
ones
([
35
,
35
]),
"Expected images rank to be 3 or 4, got len(images.shape)=2."
,
),
(
"batch_mismatch"
,
tf
.
ones
([
2
,
4
,
2
]),
tf
.
ones
([
35
,
35
,
3
]),
"convert_format() expects both `keypoints` and `images` to be batched or "
"both unbatched. Received len(keypoints.shape)=3, len(images.shape)=3. "
"Expected either len(keypoints.shape)=2 and len(images.shape)=3, or "
"len(keypoints.shape)>=3 and len(images.shape)=4."
,
),
)
def
test_input_format_exception
(
self
,
keypoints
,
images
,
expected
):
with
self
.
assertRaises
(
ValueError
)
as
e
:
keypoint
.
convert_format
(
keypoints
,
source
=
"xy"
,
target
=
"rel_xy"
,
images
=
images
)
self
.
assertEqual
(
str
(
e
.
exception
),
expected
)
Keras/keras-cv/keras_cv/keypoint/formats.py
0 → 100644
View file @
0016b0a7
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
formats.py contains axis information for each supported format.
"""
class
XY
:
"""XY contains axis indices for the XY format.
All values in the XY format should be absolute pixel values.
The XY format consists of the following required indices:
- X: the width position
- Y: the height position
and the following optional indices, used in some KerasCV components:
- CLASS: class of the keypoints
- CONFIDENCE: confidence of the keypoints
"""
X
=
0
Y
=
1
CLASS
=
2
CONFIDENCE
=
3
class
REL_XY
:
"""REL_XY contains axis indices for the REL_XY format.
REL_XY is like XY, but each value is relative to the width and height of the
origin image. Values are percentages of the origin images' width and height
respectively.
The REL_XY format consists of the following required indices:
- X: the width position
- Y: the height position
and the following optional indices, used in some KerasCV components:
- CLASS: class of the keypoints
- CONFIDENCE: confidence of the keypoints
"""
X
=
0
Y
=
1
CLASS
=
2
CONFIDENCE
=
3
Keras/keras-cv/keras_cv/keypoint/utils.py
0 → 100644
View file @
0016b0a7
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utility functions for keypoint transformation."""
import
tensorflow
as
tf
H_AXIS
=
-
3
W_AXIS
=
-
2
def
filter_out_of_image
(
keypoints
,
image
):
"""Discards keypoints if falling outside of the image.
Args:
keypoints: a, possibly ragged, 2D (ungrouped), 3D (grouped)
keypoint data in the 'xy' format.
image: a 3D tensor in the HWC format.
Returns:
tf.RaggedTensor: a 2D or 3D ragged tensor with at least one
ragged rank containing only keypoint in the image.
"""
image_shape
=
tf
.
cast
(
tf
.
shape
(
image
),
keypoints
.
dtype
)
mask
=
tf
.
math
.
logical_and
(
tf
.
math
.
logical_and
(
keypoints
[...,
0
]
>=
0
,
keypoints
[...,
0
]
<
image_shape
[
W_AXIS
]
),
tf
.
math
.
logical_and
(
keypoints
[...,
1
]
>=
0
,
keypoints
[...,
1
]
<
image_shape
[
H_AXIS
]
),
)
masked
=
tf
.
ragged
.
boolean_mask
(
keypoints
,
mask
)
if
isinstance
(
masked
,
tf
.
RaggedTensor
):
return
masked
return
tf
.
RaggedTensor
.
from_tensor
(
masked
)
Keras/keras-cv/keras_cv/keypoint/utils_test.py
0 → 100644
View file @
0016b0a7
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
tensorflow
as
tf
from
absl.testing
import
parameterized
from
keras_cv.keypoint.utils
import
filter_out_of_image
class
UtilsTestCase
(
tf
.
test
.
TestCase
,
parameterized
.
TestCase
):
@
parameterized
.
named_parameters
(
(
"all inside"
,
tf
.
constant
([[
10.0
,
20.0
],
[
30.0
,
40.0
],
[
50.0
,
50.0
]]),
tf
.
zeros
([
100
,
100
,
3
]),
tf
.
ragged
.
constant
([[
10.0
,
20.0
],
[
30.0
,
40.0
],
[
50.0
,
50.0
]]),
),
(
"some inside"
,
tf
.
constant
([[
10.0
,
20.0
],
[
30.0
,
40.0
],
[
50.0
,
50.0
]]),
tf
.
zeros
([
50
,
50
,
3
]),
tf
.
ragged
.
constant
([[
10.0
,
20.0
],
[
30.0
,
40.0
]]),
),
(
"ragged input"
,
tf
.
RaggedTensor
.
from_row_lengths
(
[[
10.0
,
20.0
],
[
30.0
,
40.0
],
[
50.0
,
50.0
]],
[
2
,
1
]
),
tf
.
zeros
([
50
,
50
,
3
]),
tf
.
RaggedTensor
.
from_row_lengths
([[
10.0
,
20.0
],
[
30.0
,
40.0
]],
[
2
,
0
]),
),
(
"height - width confusion"
,
tf
.
constant
([[[
10.0
,
20.0
]],
[[
40.0
,
30.0
]],
[[
30.0
,
40.0
]]]),
tf
.
zeros
((
50
,
40
,
3
)),
tf
.
ragged
.
constant
([[[
10.0
,
20.0
]],
[],
[[
30.0
,
40.0
]]],
ragged_rank
=
1
),
),
)
def
test_result
(
self
,
keypoints
,
image
,
expected
):
self
.
assertAllClose
(
filter_out_of_image
(
keypoints
,
image
),
expected
)
Keras/keras-cv/keras_cv/layers/__init__.py
0 → 100644
View file @
0016b0a7
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
tensorflow.keras.layers
import
CenterCrop
from
tensorflow.keras.layers
import
RandomBrightness
from
tensorflow.keras.layers
import
RandomContrast
from
tensorflow.keras.layers
import
RandomCrop
from
tensorflow.keras.layers
import
RandomHeight
from
tensorflow.keras.layers
import
RandomRotation
from
tensorflow.keras.layers
import
RandomTranslation
from
tensorflow.keras.layers
import
RandomWidth
from
tensorflow.keras.layers
import
RandomZoom
from
tensorflow.keras.layers
import
Rescaling
from
tensorflow.keras.layers
import
Resizing
from
keras_cv.layers.feature_pyramid
import
FeaturePyramid
from
keras_cv.layers.object_detection.anchor_generator
import
AnchorGenerator
from
keras_cv.layers.object_detection.nms_prediction_decoder
import
NmsPredictionDecoder
from
keras_cv.layers.object_detection.non_max_suppression
import
NonMaxSuppression
from
keras_cv.layers.object_detection.retina_net_label_encoder
import
(
RetinaNetLabelEncoder
,
)
from
keras_cv.layers.preprocessing.aug_mix
import
AugMix
from
keras_cv.layers.preprocessing.augmenter
import
Augmenter
from
keras_cv.layers.preprocessing.auto_contrast
import
AutoContrast
from
keras_cv.layers.preprocessing.base_image_augmentation_layer
import
(
BaseImageAugmentationLayer
,
)
from
keras_cv.layers.preprocessing.channel_shuffle
import
ChannelShuffle
from
keras_cv.layers.preprocessing.cut_mix
import
CutMix
from
keras_cv.layers.preprocessing.equalization
import
Equalization
from
keras_cv.layers.preprocessing.fourier_mix
import
FourierMix
from
keras_cv.layers.preprocessing.grayscale
import
Grayscale
from
keras_cv.layers.preprocessing.grid_mask
import
GridMask
from
keras_cv.layers.preprocessing.maybe_apply
import
MaybeApply
from
keras_cv.layers.preprocessing.mix_up
import
MixUp
from
keras_cv.layers.preprocessing.mosaic
import
Mosaic
from
keras_cv.layers.preprocessing.posterization
import
Posterization
from
keras_cv.layers.preprocessing.rand_augment
import
RandAugment
from
keras_cv.layers.preprocessing.random_augmentation_pipeline
import
(
RandomAugmentationPipeline
,
)
from
keras_cv.layers.preprocessing.random_channel_shift
import
RandomChannelShift
from
keras_cv.layers.preprocessing.random_choice
import
RandomChoice
from
keras_cv.layers.preprocessing.random_color_degeneration
import
(
RandomColorDegeneration
,
)
from
keras_cv.layers.preprocessing.random_color_jitter
import
RandomColorJitter
from
keras_cv.layers.preprocessing.random_crop_and_resize
import
RandomCropAndResize
from
keras_cv.layers.preprocessing.random_cutout
import
RandomCutout
from
keras_cv.layers.preprocessing.random_flip
import
RandomFlip
from
keras_cv.layers.preprocessing.random_gaussian_blur
import
RandomGaussianBlur
from
keras_cv.layers.preprocessing.random_hue
import
RandomHue
from
keras_cv.layers.preprocessing.random_jpeg_quality
import
RandomJpegQuality
from
keras_cv.layers.preprocessing.random_saturation
import
RandomSaturation
from
keras_cv.layers.preprocessing.random_sharpness
import
RandomSharpness
from
keras_cv.layers.preprocessing.random_shear
import
RandomShear
from
keras_cv.layers.preprocessing.randomly_zoomed_crop
import
RandomlyZoomedCrop
from
keras_cv.layers.preprocessing.solarization
import
Solarization
from
keras_cv.layers.regularization.drop_path
import
DropPath
from
keras_cv.layers.regularization.dropblock_2d
import
DropBlock2D
from
keras_cv.layers.regularization.squeeze_excite
import
SqueezeAndExcite2D
from
keras_cv.layers.regularization.stochastic_depth
import
StochasticDepth
from
keras_cv.layers.spatial_pyramid
import
SpatialPyramidPooling
Keras/keras-cv/keras_cv/layers/feature_pyramid.py
0 → 100644
View file @
0016b0a7
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
tensorflow
as
tf
# TODO(scottzhu): Register it later due to the conflict in the retina_net
# @tf.keras.utils.register_keras_serializable(package="keras_cv")
class
FeaturePyramid
(
tf
.
keras
.
layers
.
Layer
):
"""Implements a Feature Pyramid Network.
This implements the paper:
Tsung-Yi Lin, Piotr Dollar, Ross Girshick, Kaiming He, Bharath Hariharan, and
Serge Belongie.
Feature Pyramid Networks for Object Detection.
(https://arxiv.org/pdf/1612.03144)
Feature Pyramid Networks (FPNs) are basic components that are added to an
existing feature extractor (CNN) to combine features at different scales. For the
basic FPN, the inputs are features `Ci` from different levels of a CNN, which is
usually the last block for each level, where the feature is scaled from the image
by a factor of `1/2^i`.
There is an output associated with each level in the basic FPN. The output Pi
at level `i` (corresponding to Ci) is given by performing a merge operation on
the outputs of:
1) a lateral operation on Ci (usually a conv2D layer with kernel = 1 and strides = 1)
2) a top-down upsampling operation from Pi+1 (except for the top most level)
The final output of each level will also have a conv2D operation
(usually with kernel = 3 and strides = 1).
The inputs to the layer should be a dict with int keys should match the
pyramid_levels, e.g. for `pyramid_levels` = [2,3,4,5], the expected input dict should
be `{2:c2, 3:c3, 4:c4, 5:c5}`.
The output of the layer will have same structures as the inputs, a dict with int keys
and value for each of the level.
Args:
min_level: a python int for the lowest level of the pyramid for
feature extraction.
max_level: a python int for the highest level of the pyramid for
feature extraction.
num_channels: an integer representing the number of channels for the FPN
operations. Defaults to 256.
lateral_layers: a python dict with int keys that matches to each of the pyramid
level. The values of the dict should be `keras.Layer`, which will be called
with feature activation outputs from backbone at each level. Default to
None, and a `keras.Conv2D` layer with kernel 1x1 will be created for each
pyramid level.
output_layers: a python dict with int keys that matches to each of the pyramid
level. The values of the dict should be `keras.Layer`, which will be called
with feature inputs and merged result from upstream levels. Default to None,
and a `keras.Conv2D` layer with kernel 3x3 will be created for each pyramid
level.
Sample Usage:
```python
inp = tf.keras.layers.Input((384, 384, 3))
backbone = tf.keras.applications.EfficientNetB0(input_tensor=inp, include_top=False)
layer_names = ['block2b_add', 'block3b_add', 'block5c_add', 'top_activation']
backbone_outputs = {}
for i, layer_name in enumerate(layer_names):
backbone_outputs[i+2] = backbone.get_layer(layer_name).output
# output_dict is a dict with 2, 3, 4, 5 as keys
output_dict = keras_cv.layers.FeaturePyramid(min_level=2, max_level=5)(backbone_outputs)
```
"""
def
__init__
(
self
,
min_level
,
max_level
,
num_channels
=
256
,
lateral_layers
=
None
,
output_layers
=
None
,
**
kwargs
,
):
super
().
__init__
(
**
kwargs
)
self
.
min_level
=
min_level
self
.
max_level
=
max_level
self
.
pyramid_levels
=
list
(
range
(
min_level
,
max_level
+
1
))
self
.
num_channels
=
num_channels
# required for successful serialization
self
.
lateral_layers_passed
=
lateral_layers
self
.
output_layers_passed
=
output_layers
if
not
lateral_layers
:
# populate self.lateral_ops with default FPN Conv2D 1X1 layers
self
.
lateral_layers
=
{}
for
i
in
self
.
pyramid_levels
:
self
.
lateral_layers
[
i
]
=
tf
.
keras
.
layers
.
Conv2D
(
self
.
num_channels
,
kernel_size
=
1
,
strides
=
1
,
padding
=
"same"
,
name
=
f
"lateral_P
{
i
}
"
,
)
else
:
self
.
_validate_user_layers
(
lateral_layers
,
"lateral_layers"
)
self
.
lateral_layers
=
lateral_layers
# Output conv2d layers.
if
not
output_layers
:
self
.
output_layers
=
{}
for
i
in
self
.
pyramid_levels
:
self
.
output_layers
[
i
]
=
tf
.
keras
.
layers
.
Conv2D
(
self
.
num_channels
,
kernel_size
=
3
,
strides
=
1
,
padding
=
"same"
,
name
=
f
"output_P
{
i
}
"
,
)
else
:
self
.
_validate_user_layers
(
output_layers
,
"output_layers"
)
self
.
output_layers
=
output_layers
# the same upsampling layer is used for all levels
self
.
top_down_op
=
tf
.
keras
.
layers
.
UpSampling2D
(
size
=
2
)
# the same merge layer is used for all levels
self
.
merge_op
=
tf
.
keras
.
layers
.
Add
()
def
_validate_user_layers
(
self
,
user_input
,
param_name
):
if
(
not
isinstance
(
user_input
,
dict
)
or
sorted
(
user_input
.
keys
())
!=
self
.
pyramid_levels
):
raise
ValueError
(
f
"Expect
{
param_name
}
to be a dict with keys as "
f
"
{
self
.
pyramid_levels
}
, got
{
user_input
}
"
)
def
call
(
self
,
features
):
# Note that this assertion might not be true for all the subclasses. It is
# possible to have FPN that has high levels than the height of backbone outputs.
if
(
not
isinstance
(
features
,
dict
)
or
sorted
(
features
.
keys
())
!=
self
.
pyramid_levels
):
raise
ValueError
(
"FeaturePyramid expects input features to be a dict with int keys "
"that match the values provided in pyramid_levels. "
f
"Expect feature keys:
{
self
.
pyramid_levels
}
, got:
{
features
}
"
)
return
self
.
build_feature_pyramid
(
features
)
def
build_feature_pyramid
(
self
,
input_features
):
# To illustrate the connection/topology, the basic flow for a FPN with level
# 3, 4, 5 is like below:
#
# input_l5 -> conv2d_1x1_l5 ----V---> conv2d_3x3_l5 -> output_l5
# V
# upsample2d
# V
# input_l4 -> conv2d_1x1_l4 -> Add -> conv2d_3x3_l4 -> output_l4
# V
# upsample2d
# V
# input_l3 -> conv2d_1x1_l3 -> Add -> conv2d_3x3_l3 -> output_l3
output_features
=
{}
reversed_levels
=
list
(
sorted
(
input_features
.
keys
(),
reverse
=
True
))
top_level
=
reversed_levels
[
0
]
for
level
in
reversed_levels
:
output
=
self
.
lateral_layers
[
level
](
input_features
[
level
])
if
level
<
top_level
:
# for the top most output, it doesn't need to merge with any upper stream
# outputs
upstream_output
=
self
.
top_down_op
(
output_features
[
level
+
1
])
output
=
self
.
merge_op
([
output
,
upstream_output
])
output_features
[
level
]
=
output
# Post apply the output layers so that we don't leak them to the down stream level
for
level
in
reversed_levels
:
output_features
[
level
]
=
self
.
output_layers
[
level
](
output_features
[
level
])
return
output_features
def
get_config
(
self
):
config
=
{
"min_level"
:
self
.
min_level
,
"max_level"
:
self
.
max_level
,
"num_channels"
:
self
.
num_channels
,
"lateral_layers"
:
self
.
lateral_layers_passed
,
"output_layers"
:
self
.
output_layers_passed
,
}
base_config
=
super
().
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
Keras/keras-cv/keras_cv/layers/feature_pyramid_test.py
0 → 100644
View file @
0016b0a7
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
tensorflow
as
tf
from
keras_cv.layers
import
FeaturePyramid
class
FeaturePyramidTest
(
tf
.
test
.
TestCase
):
def
test_return_type_dict
(
self
):
layer
=
FeaturePyramid
(
min_level
=
2
,
max_level
=
5
)
c2
=
tf
.
ones
([
2
,
64
,
64
,
3
])
c3
=
tf
.
ones
([
2
,
32
,
32
,
3
])
c4
=
tf
.
ones
([
2
,
16
,
16
,
3
])
c5
=
tf
.
ones
([
2
,
8
,
8
,
3
])
inputs
=
{
2
:
c2
,
3
:
c3
,
4
:
c4
,
5
:
c5
}
output
=
layer
(
inputs
)
self
.
assertTrue
(
isinstance
(
output
,
dict
))
self
.
assertEquals
(
sorted
(
output
.
keys
()),
[
2
,
3
,
4
,
5
])
def
test_result_shapes
(
self
):
layer
=
FeaturePyramid
(
min_level
=
2
,
max_level
=
5
)
c2
=
tf
.
ones
([
2
,
64
,
64
,
3
])
c3
=
tf
.
ones
([
2
,
32
,
32
,
3
])
c4
=
tf
.
ones
([
2
,
16
,
16
,
3
])
c5
=
tf
.
ones
([
2
,
8
,
8
,
3
])
inputs
=
{
2
:
c2
,
3
:
c3
,
4
:
c4
,
5
:
c5
}
output
=
layer
(
inputs
)
for
level
in
inputs
.
keys
():
self
.
assertEquals
(
output
[
level
].
shape
[
1
],
inputs
[
level
].
shape
[
1
])
self
.
assertEquals
(
output
[
level
].
shape
[
2
],
inputs
[
level
].
shape
[
2
])
self
.
assertEquals
(
output
[
level
].
shape
[
3
],
layer
.
num_channels
)
# Test with different resolution and channel size
c2
=
tf
.
ones
([
2
,
64
,
128
,
4
])
c3
=
tf
.
ones
([
2
,
32
,
64
,
8
])
c4
=
tf
.
ones
([
2
,
16
,
32
,
16
])
c5
=
tf
.
ones
([
2
,
8
,
16
,
32
])
inputs
=
{
2
:
c2
,
3
:
c3
,
4
:
c4
,
5
:
c5
}
layer
=
FeaturePyramid
(
min_level
=
2
,
max_level
=
5
)
output
=
layer
(
inputs
)
for
level
in
inputs
.
keys
():
self
.
assertEquals
(
output
[
level
].
shape
[
1
],
inputs
[
level
].
shape
[
1
])
self
.
assertEquals
(
output
[
level
].
shape
[
2
],
inputs
[
level
].
shape
[
2
])
self
.
assertEquals
(
output
[
level
].
shape
[
3
],
layer
.
num_channels
)
def
test_with_keras_input_tensor
(
self
):
# This mimic the model building with Backbone network
layer
=
FeaturePyramid
(
min_level
=
2
,
max_level
=
5
)
c2
=
tf
.
keras
.
layers
.
Input
([
64
,
64
,
3
])
c3
=
tf
.
keras
.
layers
.
Input
([
32
,
32
,
3
])
c4
=
tf
.
keras
.
layers
.
Input
([
16
,
16
,
3
])
c5
=
tf
.
keras
.
layers
.
Input
([
8
,
8
,
3
])
inputs
=
{
2
:
c2
,
3
:
c3
,
4
:
c4
,
5
:
c5
}
output
=
layer
(
inputs
)
for
level
in
inputs
.
keys
():
self
.
assertEquals
(
output
[
level
].
shape
[
1
],
inputs
[
level
].
shape
[
1
])
self
.
assertEquals
(
output
[
level
].
shape
[
2
],
inputs
[
level
].
shape
[
2
])
self
.
assertEquals
(
output
[
level
].
shape
[
3
],
layer
.
num_channels
)
def
test_invalid_lateral_layers
(
self
):
lateral_layers
=
[
tf
.
keras
.
layers
.
Conv2D
(
256
,
1
)]
*
3
with
self
.
assertRaisesRegexp
(
ValueError
,
"Expect lateral_layers to be a dict"
):
_
=
FeaturePyramid
(
min_level
=
2
,
max_level
=
5
,
lateral_layers
=
lateral_layers
)
lateral_layers
=
{
2
:
tf
.
keras
.
layers
.
Conv2D
(
256
,
1
),
3
:
tf
.
keras
.
layers
.
Conv2D
(
256
,
1
),
4
:
tf
.
keras
.
layers
.
Conv2D
(
256
,
1
),
}
with
self
.
assertRaisesRegexp
(
ValueError
,
"with keys as .* [2, 3, 4, 5]"
):
_
=
FeaturePyramid
(
min_level
=
2
,
max_level
=
5
,
lateral_layers
=
lateral_layers
)
def
test_invalid_output_layers
(
self
):
output_layers
=
[
tf
.
keras
.
layers
.
Conv2D
(
256
,
3
)]
*
3
with
self
.
assertRaisesRegexp
(
ValueError
,
"Expect output_layers to be a dict"
):
_
=
FeaturePyramid
(
min_level
=
2
,
max_level
=
5
,
output_layers
=
output_layers
)
output_layers
=
{
2
:
tf
.
keras
.
layers
.
Conv2D
(
256
,
3
),
3
:
tf
.
keras
.
layers
.
Conv2D
(
256
,
3
),
4
:
tf
.
keras
.
layers
.
Conv2D
(
256
,
3
),
}
with
self
.
assertRaisesRegexp
(
ValueError
,
"with keys as .* [2, 3, 4, 5]"
):
_
=
FeaturePyramid
(
min_level
=
2
,
max_level
=
5
,
output_layers
=
output_layers
)
def
test_invalid_input_features
(
self
):
layer
=
FeaturePyramid
(
min_level
=
2
,
max_level
=
5
)
c2
=
tf
.
ones
([
2
,
64
,
64
,
3
])
c3
=
tf
.
ones
([
2
,
32
,
32
,
3
])
c4
=
tf
.
ones
([
2
,
16
,
16
,
3
])
c5
=
tf
.
ones
([
2
,
8
,
8
,
3
])
list_input
=
[
c2
,
c3
,
c4
,
c5
]
with
self
.
assertRaisesRegexp
(
ValueError
,
"expects input features to be a dict"
):
layer
(
list_input
)
dict_input_with_missing_feature
=
{
2
:
c2
,
3
:
c3
,
4
:
c4
}
with
self
.
assertRaisesRegexp
(
ValueError
,
"Expect feature keys.*[2, 3, 4, 5]"
):
layer
(
dict_input_with_missing_feature
)
Keras/keras-cv/keras_cv/layers/object_detection/__init__.py
0 → 100644
View file @
0016b0a7
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Keras/keras-cv/keras_cv/layers/object_detection/anchor_generator.py
0 → 100644
View file @
0016b0a7
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
tensorflow
as
tf
from
tensorflow
import
keras
from
keras_cv
import
bounding_box
class
AnchorGenerator
(
keras
.
layers
.
Layer
):
"""AnchorGenerator generates anchors for multiple feature maps.
AnchorGenerator takes multiple scales and generates anchor boxes based on the anchor
sizes, scales, aspect ratios, and strides provided. To invoke AnchorGenerator, call
it on the image that needs anchor boxes.
`sizes` and `strides` must match structurally - they are pairs. Scales and
aspect ratios can either be a list, that is then used for all of the sizes
(aka levels), or a dictionary from `{'level_{number}': [parameters at scale...]}`.
Args:
bounding_box_format: The format of bounding boxes to generate. Refer
[to the keras.io docs](https://keras.io/api/keras_cv/bounding_box/formats/)
for more details on supported bounding box formats.
sizes: A list of integers that represent the anchor sizes for each level,
or a dictionary of integer lists with each key representing a level.
For each anchor size, anchor height will be `anchor_size / sqrt(aspect_ratio)`,
and anchor width will be `anchor_size * sqrt(aspect_ratio)`. This is repeated
for each scale and aspect ratio.
scales: A list of floats corresponding to multipliers that will be
multiplied by each `anchor_size` to generate a level.
aspect_ratios: A list of floats representing the ratio of anchor width to height.
strides: iterable of ints that represent the anchor stride size between
center of anchors at each scale.
clip_boxes: Whether or not to clip generated anchor boxes to the image size.
Defaults to `False`.
Usage:
```python
strides = [8, 16, 32]
scales = [1, 1.2599210498948732, 1.5874010519681994]
sizes = [32.0, 64.0, 128.0]
aspect_ratios = [0.5, 1.0, 2.0]
image = tf.random.uniform((512, 512, 3))
anchor_generator = cv_layers.AnchorGenerator(
bounding_box_format="rel_yxyx",
sizes=sizes,
aspect_ratios=aspect_ratios,
scales=scales,
strides=strides,
clip_boxes=True,
)
anchors = anchor_generator(image)
print(anchors)
# > {0: ..., 1: ..., 2: ...}
```
Input shape: an image with shape `[H, W, C]`
Output: a dictionary with integer keys corresponding to each level of the feature
pyramid. The size of the anchors at each level will be
`(H/strides[i] * W/strides[i] * len(scales) * len(aspect_ratios), 4)`.
"""
def
__init__
(
self
,
bounding_box_format
,
sizes
,
scales
,
aspect_ratios
,
strides
,
clip_boxes
=
False
,
**
kwargs
,
):
super
().
__init__
(
**
kwargs
)
self
.
bounding_box_format
=
bounding_box_format
# aspect_ratio is a single list that is the same across all levels.
sizes
,
strides
=
self
.
_format_sizes_and_strides
(
sizes
,
strides
)
aspect_ratios
=
self
.
_match_param_structure_to_sizes
(
aspect_ratios
,
sizes
)
scales
=
self
.
_match_param_structure_to_sizes
(
scales
,
sizes
)
self
.
anchor_generators
=
{}
for
k
in
sizes
.
keys
():
self
.
anchor_generators
[
k
]
=
_SingleAnchorGenerator
(
bounding_box_format
,
sizes
[
k
],
scales
[
k
],
aspect_ratios
[
k
],
strides
[
k
],
clip_boxes
,
dtype
=
self
.
compute_dtype
,
)
self
.
built
=
True
@
staticmethod
def
_format_sizes_and_strides
(
sizes
,
strides
):
result_sizes
=
AnchorGenerator
.
_ensure_param_is_levels_dict
(
sizes
,
"sizes"
)
result_strides
=
AnchorGenerator
.
_ensure_param_is_levels_dict
(
strides
,
"strides"
)
if
sorted
(
result_strides
.
keys
())
!=
sorted
(
result_sizes
.
keys
()):
raise
ValueError
(
"Expected sizes and strides to be either lists of"
"the same length, or dictionaries with the same keys. Received "
f
"sizes=
{
sizes
}
, strides=
{
strides
}
"
)
return
result_sizes
,
result_strides
@
staticmethod
def
_ensure_param_is_levels_dict
(
param
,
param_name
):
"""Takes a param and its name, converts lists to dictionaries of levels.
For example, the list [1, 2] is converted to {0: 1, 1: 2}.
Raises:
ValueError: when param is not a dict, list or tuple.
"""
if
isinstance
(
param
,
dict
):
return
param
if
not
isinstance
(
param
,
(
list
,
tuple
)):
raise
ValueError
(
f
"Expected
{
param_name
}
to be a dict, list or tuple, received "
f
"
{
param_name
}
=
{
param
}
"
)
result
=
{}
for
i
in
range
(
len
(
param
)):
result
[
i
]
=
param
[
i
]
return
result
@
staticmethod
def
_match_param_structure_to_sizes
(
params
,
sizes
):
"""broadcast the params to match sizes."""
# if isinstance(sizes, (tuple, list)):
# return [params] * len(sizes)
if
not
isinstance
(
sizes
,
dict
):
raise
ValueError
(
"the structure of `sizes` must be a dict, "
f
"received sizes=
{
sizes
}
"
)
return
tf
.
nest
.
map_structure
(
lambda
_
:
params
,
sizes
)
def
__call__
(
self
,
image
=
None
,
image_shape
=
None
):
if
image
is
None
and
image_shape
is
None
:
raise
ValueError
(
"AnchorGenerator() requires `images` or `image_shape`."
)
if
image
is
not
None
:
if
image
.
shape
.
rank
!=
3
:
raise
ValueError
(
"Expected `image` to be a Tensor of rank 3. Got "
f
"image.shape.rank=
{
image
.
shape
.
rank
}
"
)
image_shape
=
tf
.
shape
(
image
)
anchor_generators
=
tf
.
nest
.
flatten
(
self
.
anchor_generators
)
results
=
[
anchor_gen
(
image_shape
)
for
anchor_gen
in
anchor_generators
]
results
=
tf
.
nest
.
pack_sequence_as
(
self
.
anchor_generators
,
results
)
for
key
in
results
:
results
[
key
]
=
bounding_box
.
convert_format
(
results
[
key
],
source
=
"yxyx"
,
target
=
self
.
bounding_box_format
,
image_shape
=
image_shape
,
)
return
results
# TODO(tanzheny): consider having customized anchor offset.
class
_SingleAnchorGenerator
:
"""Internal utility to generate anchors for a single feature map in `yxyx` format.
Example:
```python
anchor_gen = _SingleAnchorGenerator(32, [.5, 1., 2.], stride=16)
anchors = anchor_gen([512, 512, 3])
```
Input shape: the size of the image, `[H, W, C]`
Output shape: the size of anchors, `[(H / stride) * (W / stride), 4]`
Args:
sizes: A single int represents the base anchor size. The anchor
height will be `anchor_size / sqrt(aspect_ratio)`, anchor width will be
`anchor_size * sqrt(aspect_ratio)`.
scales: A list/tuple, or a list/tuple of a list/tuple of positive
floats representing the actual anchor size to the base `anchor_size`.
aspect_ratios: a list/tuple of positive floats representing the ratio of
anchor width to anchor height.
stride: A single int represents the anchor stride size between center of
each anchor.
clip_boxes: Boolean to represent whether the anchor coordinates should be
clipped to the image size. Defaults to `False`.
dtype: (Optional) The data type to use for the output anchors. Defaults to
'float32'.
"""
def
__init__
(
self
,
bounding_box_format
,
sizes
,
scales
,
aspect_ratios
,
stride
,
clip_boxes
=
False
,
dtype
=
"float32"
,
):
self
.
sizes
=
sizes
self
.
scales
=
scales
self
.
aspect_ratios
=
aspect_ratios
self
.
stride
=
stride
self
.
clip_boxes
=
clip_boxes
self
.
dtype
=
dtype
def
__call__
(
self
,
image_size
):
image_height
=
tf
.
cast
(
image_size
[
0
],
tf
.
float32
)
image_width
=
tf
.
cast
(
image_size
[
1
],
tf
.
float32
)
aspect_ratios
=
tf
.
cast
(
self
.
aspect_ratios
,
tf
.
float32
)
aspect_ratios_sqrt
=
tf
.
cast
(
tf
.
sqrt
(
aspect_ratios
),
dtype
=
tf
.
float32
)
anchor_size
=
tf
.
cast
(
self
.
sizes
,
tf
.
float32
)
# [K]
anchor_heights
=
[]
anchor_widths
=
[]
for
scale
in
self
.
scales
:
anchor_size_t
=
anchor_size
*
scale
anchor_height
=
anchor_size_t
/
aspect_ratios_sqrt
anchor_width
=
anchor_size_t
*
aspect_ratios_sqrt
anchor_heights
.
append
(
anchor_height
)
anchor_widths
.
append
(
anchor_width
)
anchor_heights
=
tf
.
concat
(
anchor_heights
,
axis
=
0
)
anchor_widths
=
tf
.
concat
(
anchor_widths
,
axis
=
0
)
half_anchor_heights
=
tf
.
reshape
(
0.5
*
anchor_heights
,
[
1
,
1
,
-
1
])
half_anchor_widths
=
tf
.
reshape
(
0.5
*
anchor_widths
,
[
1
,
1
,
-
1
])
stride
=
tf
.
cast
(
self
.
stride
,
tf
.
float32
)
# [W]
cx
=
tf
.
range
(
0.5
*
stride
,
image_width
+
1
,
stride
)
# [H]
cy
=
tf
.
range
(
0.5
*
stride
,
image_height
+
1
,
stride
)
# [H, W]
cx_grid
,
cy_grid
=
tf
.
meshgrid
(
cx
,
cy
)
# [H, W, 1]
cx_grid
=
tf
.
expand_dims
(
cx_grid
,
axis
=-
1
)
cy_grid
=
tf
.
expand_dims
(
cy_grid
,
axis
=-
1
)
y_min
=
tf
.
reshape
(
cy_grid
-
half_anchor_heights
,
(
-
1
,))
y_max
=
tf
.
reshape
(
cy_grid
+
half_anchor_heights
,
(
-
1
,))
x_min
=
tf
.
reshape
(
cx_grid
-
half_anchor_widths
,
(
-
1
,))
x_max
=
tf
.
reshape
(
cx_grid
+
half_anchor_widths
,
(
-
1
,))
# [H * W * K, 1]
y_min
=
tf
.
expand_dims
(
y_min
,
axis
=-
1
)
y_max
=
tf
.
expand_dims
(
y_max
,
axis
=-
1
)
x_min
=
tf
.
expand_dims
(
x_min
,
axis
=-
1
)
x_max
=
tf
.
expand_dims
(
x_max
,
axis
=-
1
)
if
self
.
clip_boxes
:
y_min
=
tf
.
maximum
(
tf
.
minimum
(
y_min
,
image_height
),
0.0
)
y_max
=
tf
.
maximum
(
tf
.
minimum
(
y_max
,
image_height
),
0.0
)
x_min
=
tf
.
maximum
(
tf
.
minimum
(
x_min
,
image_width
),
0.0
)
x_max
=
tf
.
maximum
(
tf
.
minimum
(
x_max
,
image_width
),
0.0
)
# [H * W * K, 4]
return
tf
.
cast
(
tf
.
concat
([
y_min
,
x_min
,
y_max
,
x_max
],
axis
=-
1
),
self
.
dtype
)
Keras/keras-cv/keras_cv/layers/object_detection/anchor_generator_test.py
0 → 100644
View file @
0016b0a7
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
tensorflow
as
tf
from
absl.testing
import
parameterized
from
keras_cv
import
layers
as
cv_layers
class
AnchorGeneratorTest
(
tf
.
test
.
TestCase
,
parameterized
.
TestCase
):
@
parameterized
.
named_parameters
(
(
"unequal_lists"
,
[
0
,
1
,
2
],
[
1
]),
(
"unequal_levels_dicts"
,
{
"level_1"
:
[
0
,
1
,
2
]},
{
"1"
:
[
0
,
1
,
2
]}),
)
def
test_raises_when_strides_not_equal_to_sizes
(
self
,
sizes
,
strides
):
with
self
.
assertRaises
(
ValueError
):
cv_layers
.
AnchorGenerator
(
bounding_box_format
=
"xyxy"
,
sizes
=
sizes
,
strides
=
strides
,
aspect_ratios
=
[
3
/
4
,
1
,
4
/
3
],
scales
=
[
0.5
,
1.0
,
1.5
],
)
def
test_raises_batched_images
(
self
):
strides
=
[
4
]
scales
=
[
1.0
]
sizes
=
[
4
]
aspect_ratios
=
[
1.0
]
anchor_generator
=
cv_layers
.
AnchorGenerator
(
bounding_box_format
=
"xyxy"
,
sizes
=
sizes
,
aspect_ratios
=
aspect_ratios
,
scales
=
scales
,
strides
=
strides
,
)
image
=
tf
.
random
.
uniform
((
4
,
8
,
8
,
3
))
with
self
.
assertRaisesRegex
(
ValueError
,
"rank"
):
_
=
anchor_generator
(
image
=
image
)
def
test_output_shapes_image
(
self
):
strides
=
[
2
**
i
for
i
in
range
(
3
,
8
)]
scales
=
[
2
**
x
for
x
in
[
0
,
1
/
3
,
2
/
3
]]
sizes
=
[
x
**
2
for
x
in
[
32.0
,
64.0
,
128.0
,
256.0
,
512.0
]]
aspect_ratios
=
[
0.5
,
1.0
,
2.0
]
image_shape
=
(
512
,
512
,
3
)
image
=
tf
.
random
.
uniform
(
image_shape
)
anchor_generator
=
cv_layers
.
AnchorGenerator
(
bounding_box_format
=
"yxyx"
,
sizes
=
sizes
,
aspect_ratios
=
aspect_ratios
,
scales
=
scales
,
strides
=
strides
,
)
boxes
=
anchor_generator
(
image
=
image
)
boxes
=
tf
.
concat
(
list
(
boxes
.
values
()),
axis
=
0
)
# 49104 is a number found by using the previous internal anchor generator from
# PR https://github.com/keras-team/keras-cv/pull/609
# This unit test was written to ensure compatibility with the existing model.
self
.
assertEqual
(
boxes
.
shape
,
[
49104
,
4
])
def
test_output_shapes_image_shape
(
self
):
strides
=
[
2
**
i
for
i
in
range
(
3
,
8
)]
scales
=
[
2
**
x
for
x
in
[
0
,
1
/
3
,
2
/
3
]]
sizes
=
[
x
**
2
for
x
in
[
32.0
,
64.0
,
128.0
,
256.0
,
512.0
]]
aspect_ratios
=
[
0.5
,
1.0
,
2.0
]
image_shape
=
(
512
,
512
,
3
)
anchor_generator
=
cv_layers
.
AnchorGenerator
(
bounding_box_format
=
"yxyx"
,
sizes
=
sizes
,
aspect_ratios
=
aspect_ratios
,
scales
=
scales
,
strides
=
strides
,
)
boxes
=
anchor_generator
(
image_shape
=
image_shape
)
boxes
=
tf
.
concat
(
list
(
boxes
.
values
()),
axis
=
0
)
# 49104 is a number found by using the previous internal anchor generator from
# PR https://github.com/keras-team/keras-cv/pull/609
# This unit test was written to ensure compatibility with the existing model.
self
.
assertEqual
(
boxes
.
shape
,
[
49104
,
4
])
def
test_hand_crafted_aspect_ratios
(
self
):
strides
=
[
4
]
scales
=
[
1.0
]
sizes
=
[
4
]
aspect_ratios
=
[
3
/
4
,
1.0
,
4
/
3
]
anchor_generator
=
cv_layers
.
AnchorGenerator
(
bounding_box_format
=
"xyxy"
,
sizes
=
sizes
,
aspect_ratios
=
aspect_ratios
,
scales
=
scales
,
strides
=
strides
,
)
image
=
tf
.
random
.
uniform
((
8
,
8
,
3
))
boxes
=
anchor_generator
(
image
=
image
)
level_0
=
boxes
[
0
]
# width/4 * height/4 * aspect_ratios =
self
.
assertAllEqual
(
level_0
.
shape
,
[
12
,
4
])
image
=
tf
.
random
.
uniform
((
4
,
4
,
3
))
boxes
=
anchor_generator
(
image
=
image
)
level_0
=
boxes
[
0
]
expected_boxes
=
[
[
0.267949224
,
-
0.309401035
,
3.7320509
,
4.30940104
],
[
0
,
0
,
4
,
4
],
[
-
0.309401035
,
0.267949104
,
4.30940104
,
3.7320509
],
]
self
.
assertAllClose
(
level_0
,
expected_boxes
)
def
test_hand_crafted_strides
(
self
):
strides
=
[
4
]
scales
=
[
1.0
]
sizes
=
[
4
]
aspect_ratios
=
[
1.0
]
anchor_generator
=
cv_layers
.
AnchorGenerator
(
bounding_box_format
=
"xyxy"
,
sizes
=
sizes
,
aspect_ratios
=
aspect_ratios
,
scales
=
scales
,
strides
=
strides
,
)
image
=
tf
.
random
.
uniform
((
8
,
8
,
3
))
boxes
=
anchor_generator
(
image
=
image
)
level_0
=
boxes
[
0
]
expected_boxes
=
[
[
0
,
0
,
4
,
4
],
[
4
,
0
,
8
,
4
],
[
0
,
4
,
4
,
8
],
[
4
,
4
,
8
,
8
],
]
self
.
assertAllClose
(
level_0
,
expected_boxes
)
def
test_relative_generation
(
self
):
strides
=
[
8
,
16
,
32
]
# 0, 1 / 3, 2 / 3
scales
=
[
2
**
x
for
x
in
[
0
,
1
/
3
,
2
/
3
]]
sizes
=
[
32.0
,
64.0
,
128.0
]
aspect_ratios
=
[
0.5
,
1.0
,
2.0
]
image
=
tf
.
random
.
uniform
((
512
,
512
,
3
))
anchor_generator
=
cv_layers
.
AnchorGenerator
(
bounding_box_format
=
"rel_yxyx"
,
sizes
=
sizes
,
aspect_ratios
=
aspect_ratios
,
scales
=
scales
,
strides
=
strides
,
clip_boxes
=
False
,
)
boxes
=
anchor_generator
(
image
=
image
)
boxes
=
tf
.
concat
(
list
(
boxes
.
values
()),
axis
=
0
)
self
.
assertAllLessEqual
(
boxes
,
1.5
)
self
.
assertAllGreaterEqual
(
boxes
,
-
0.50
)
Keras/keras-cv/keras_cv/layers/object_detection/nms_prediction_decoder.py
0 → 100644
View file @
0016b0a7
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
tensorflow
as
tf
from
keras_cv
import
bounding_box
from
keras_cv
import
layers
as
cv_layers
class
NmsPredictionDecoder
(
tf
.
keras
.
layers
.
Layer
):
"""A Keras layer that decodes predictions of an object detection model.
By default, NmsPredictionDecoder uses a
`keras_cv.layers.NonMaxSuppression` layer to perform box pruning. The layer may
optionally take a `suppression_layer`, which can perform an alternative suppression
operation, such as SoftNonMaxSuppression.
Arguments:
classes: Number of classes in the dataset.
bounding_box_format: The format of bounding boxes of input dataset. Refer
[to the keras.io docs](https://keras.io/api/keras_cv/bounding_box/formats/)
for more details on supported bounding box formats.
anchor_generator: a `keras_cv.layers.AnchorGenerator`.
suppression_layer: (Optional) a `keras.layers.Layer` that follows the same API
signature of the `keras_cv.layers.NonMaxSuppression` layer. This layer should
perform a suppression operation such as NonMaxSuppression, or
SoftNonMaxSuppression.
box_variance: (Optional) The scaling factors used to scale the bounding box
targets. Defaults to `(0.1, 0.1, 0.2, 0.2)`. **Important Note:**
`box_variance` is applied to the boxes in `xywh` format.
"""
def
__init__
(
self
,
bounding_box_format
,
anchor_generator
,
classes
=
None
,
suppression_layer
=
None
,
box_variance
=
(
0.1
,
0.1
,
0.2
,
0.2
),
**
kwargs
,
):
super
().
__init__
(
**
kwargs
)
if
not
suppression_layer
and
not
classes
:
raise
ValueError
(
"NmsPredictionDecoder() requires either `suppression_layer` "
f
"or `classes`. Received `suppression_layer=
{
suppression_layer
}
and "
f
"classes=
{
classes
}
`"
)
self
.
bounding_box_format
=
bounding_box_format
self
.
suppression_layer
=
suppression_layer
or
cv_layers
.
NonMaxSuppression
(
classes
=
classes
,
bounding_box_format
=
bounding_box_format
,
confidence_threshold
=
0.5
,
iou_threshold
=
0.5
,
max_detections
=
100
,
max_detections_per_class
=
100
,
)
if
self
.
suppression_layer
.
bounding_box_format
!=
self
.
bounding_box_format
:
raise
ValueError
(
"`suppression_layer` must have the same `bounding_box_format` "
"as the `NmsPredictionDecoder()` layer. "
"Received `NmsPredictionDecoder.bounding_box_format="
f
"
{
self
.
bounding_box_format
}
`, `suppression_layer=
{
suppression_layer
}
`."
)
self
.
anchor_generator
=
anchor_generator
self
.
box_variance
=
tf
.
convert_to_tensor
(
box_variance
,
dtype
=
tf
.
float32
)
self
.
built
=
True
# TODO(lukewood): provide this as general utility on top of bounding_box_format.
def
_decode_box_predictions
(
self
,
anchor_boxes
,
box_predictions
):
boxes
=
box_predictions
*
self
.
box_variance
boxes
=
tf
.
concat
(
[
boxes
[:,
:,
:
2
]
*
anchor_boxes
[:,
:,
2
:]
+
anchor_boxes
[:,
:,
:
2
],
tf
.
math
.
exp
(
boxes
[:,
:,
2
:])
*
anchor_boxes
[:,
:,
2
:],
],
axis
=-
1
,
)
return
boxes
def
call
(
self
,
images
,
predictions
):
"""Accepts images and raw predictions, and returns bounding box predictions.
Args:
images: Tensor of shape [batch, height, width, channels].
predictions: Dense Tensor of shape [batch, anchor_boxes, 6] in the
`bounding_box_format` specified in the constructor.
"""
if
isinstance
(
images
,
tf
.
RaggedTensor
):
raise
ValueError
(
"DecodePredictions() does not support tf.RaggedTensor inputs. "
f
"Received images=
{
images
}
."
)
anchor_boxes
=
self
.
anchor_generator
(
images
[
0
])
anchor_boxes
=
tf
.
concat
(
list
(
anchor_boxes
.
values
()),
axis
=
0
)
anchor_boxes
=
bounding_box
.
convert_format
(
anchor_boxes
,
source
=
self
.
anchor_generator
.
bounding_box_format
,
target
=
"xywh"
,
images
=
images
[
0
],
)
predictions
=
bounding_box
.
convert_format
(
predictions
,
source
=
self
.
bounding_box_format
,
target
=
"xywh"
,
images
=
images
)
box_predictions
=
predictions
[:,
:,
:
4
]
cls_predictions
=
tf
.
nn
.
sigmoid
(
predictions
[:,
:,
4
:])
classes
=
tf
.
math
.
argmax
(
cls_predictions
,
axis
=-
1
)
classes
=
tf
.
cast
(
classes
,
box_predictions
.
dtype
)
confidence
=
tf
.
math
.
reduce_max
(
cls_predictions
,
axis
=-
1
)
classes
=
tf
.
expand_dims
(
classes
,
axis
=-
1
)
confidence
=
tf
.
expand_dims
(
confidence
,
axis
=-
1
)
boxes
=
self
.
_decode_box_predictions
(
anchor_boxes
[
None
,
...],
box_predictions
)
boxes
=
tf
.
concat
([
boxes
,
classes
,
confidence
],
axis
=-
1
)
boxes
=
bounding_box
.
convert_format
(
boxes
,
source
=
"xywh"
,
target
=
self
.
suppression_layer
.
bounding_box_format
,
images
=
images
,
)
return
self
.
suppression_layer
(
boxes
,
images
=
images
)
Keras/keras-cv/keras_cv/layers/object_detection/nms_prediction_decoder_test.py
0 → 100644
View file @
0016b0a7
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
tensorflow
as
tf
from
keras_cv
import
layers
as
cv_layers
class
NmsPredictionDecoderTest
(
tf
.
test
.
TestCase
):
def
test_decode_predictions_output_shapes
(
self
):
classes
=
10
images_shape
=
(
8
,
512
,
1024
,
3
)
predictions_shape
=
(
8
,
98208
,
4
+
classes
)
images
=
tf
.
random
.
uniform
(
shape
=
images_shape
)
predictions
=
tf
.
random
.
uniform
(
shape
=
predictions_shape
,
minval
=
0.0
,
maxval
=
1.0
,
dtype
=
tf
.
float32
)
strides
=
[
2
**
i
for
i
in
range
(
3
,
8
)]
scales
=
[
2
**
x
for
x
in
[
0
,
1
/
3
,
2
/
3
]]
sizes
=
[
x
**
2
for
x
in
[
32.0
,
64.0
,
128.0
,
256.0
,
512.0
]]
aspect_ratios
=
[
0.5
,
1.0
,
2.0
]
anchor_generator
=
cv_layers
.
AnchorGenerator
(
bounding_box_format
=
"yxyx"
,
sizes
=
sizes
,
aspect_ratios
=
aspect_ratios
,
scales
=
scales
,
strides
=
strides
,
)
layer
=
cv_layers
.
NmsPredictionDecoder
(
anchor_generator
=
anchor_generator
,
classes
=
classes
,
bounding_box_format
=
"rel_xyxy"
,
)
result
=
layer
(
images
=
images
,
predictions
=
predictions
)
self
.
assertEqual
(
result
.
shape
,
[
8
,
None
,
6
])
Keras/keras-cv/keras_cv/layers/object_detection/non_max_suppression.py
0 → 100644
View file @
0016b0a7
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
tensorflow
as
tf
from
keras_cv
import
bounding_box
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
"keras_cv"
)
class
NonMaxSuppression
(
tf
.
keras
.
layers
.
Layer
):
"""
Implements the non-max suppression layer.
Non-maximal suppression is used to suppress potentially repeated boxes by:
1) picking the highest ranked boxes
2) pruning away all boxes that have a high IoU with the chosen boxes.
References:
- [Yolo paper](https://arxiv.org/pdf/1506.02640)
Args:
classes: an integer representing the number of classes that a bounding
box can belong to.
bounding_box_format: a case-insensitive string which is one of `"xyxy"`,
`"rel_xyxy"`, `"xyWH"`, `"center_xyWH"`, `"yxyx"`, `"rel_yxyx"`. The
position and shape of the bounding box will be followed by the class and
confidence values (in that order). This is required for proper ranking of
the bounding boxes. Therefore, each bounding box is defined by 6 values.
For detailed information on the supported format, see the
[KerasCV bounding box documentation](https://keras.io/api/keras_cv/bounding_box/formats/).
confidence_threshold: a float value in the range [0, 1]. All boxes with
confidence below this value will be discarded. Defaults to 0.05.
iou_threshold: a float value in the range [0, 1] representing the minimum
IoU threshold for two boxes to be considered same for suppression. Defaults
to 0.5.
max_detections: the maximum detections to consider after nms is applied. A large
number may trigger significant memory overhead. Defaults to 100.
max_detections_per_class: the maximum detections to consider per class after
nms is applied. Defaults to 100.
Usage:
```python
images = np.zeros((2, 480, 480, 3), dtype = np.float32)
ex_boxes = np.array([
[
[0, 0, 1, 1, 4, 0.9],
[0, 0, 2, 3, 4, 0.76],
[4, 5, 3, 6, 3, 0.89],
[2, 2, 3, 3, 6, 0.04],
],
[
[0, 0, 5, 6, 4, 0.9],
[0, 0, 7, 3, 1, 0.76],
[4, 5, 5, 6, 4, 0.04],
[2, 1, 3, 3, 7, 0.48],
],
], dtype = np.float32)
nms = NonMaxSuppression(
classes=8,
bounding_box_format="center_xyWH",
iou_threshold=0.1
)
boxes = nms(boxes, images)
```
"""
def
__init__
(
self
,
classes
,
bounding_box_format
,
confidence_threshold
=
0.05
,
iou_threshold
=
0.5
,
max_detections
=
100
,
max_detections_per_class
=
100
,
**
kwargs
,
):
super
().
__init__
(
**
kwargs
)
self
.
classes
=
classes
self
.
bounding_box_format
=
bounding_box_format
self
.
confidence_threshold
=
confidence_threshold
self
.
iou_threshold
=
iou_threshold
self
.
max_detections
=
max_detections
self
.
max_detections_per_class
=
max_detections_per_class
self
.
built
=
True
def
call
(
self
,
predictions
,
images
=
None
):
if
predictions
.
shape
[
-
1
]
!=
6
:
raise
ValueError
(
"keras_cv.layers.NonMaxSuppression() expects `call()` "
"argument `predictions` to be of shape (None, None, 6). Received "
f
"predictions.shape=
{
tuple
(
predictions
.
shape
)
}
."
)
# convert to yxyx for the TF NMS operation
predictions
=
bounding_box
.
convert_format
(
predictions
,
source
=
self
.
bounding_box_format
,
target
=
"yxyx"
,
images
=
images
,
)
# preparing the predictions for TF NMS op
boxes
=
tf
.
expand_dims
(
predictions
[...,
:
4
],
axis
=
2
)
class_predictions
=
tf
.
cast
(
predictions
[...,
4
],
tf
.
int32
)
scores
=
predictions
[...,
5
]
class_predictions
=
tf
.
one_hot
(
class_predictions
,
self
.
classes
)
scores
=
tf
.
expand_dims
(
scores
,
axis
=-
1
)
*
class_predictions
# applying the NMS operation
nmsed_boxes
=
tf
.
image
.
combined_non_max_suppression
(
boxes
,
scores
,
self
.
max_detections_per_class
,
self
.
max_detections
,
self
.
iou_threshold
,
self
.
confidence_threshold
,
clip_boxes
=
False
,
)
# output will be a ragged tensor because num_boxes will change across the batch
boxes
=
self
.
_decode_nms_boxes_to_tensor
(
nmsed_boxes
)
# converting all boxes to the original format
boxes
=
self
.
_encode_to_ragged
(
boxes
,
nmsed_boxes
.
valid_detections
)
return
bounding_box
.
convert_format
(
boxes
,
source
=
"yxyx"
,
target
=
self
.
bounding_box_format
,
images
=
images
,
)
def
_decode_nms_boxes_to_tensor
(
self
,
nmsed_boxes
):
boxes
=
tf
.
TensorArray
(
tf
.
float32
,
size
=
0
,
infer_shape
=
False
,
element_shape
=
(
6
,),
dynamic_size
=
True
)
for
i
in
tf
.
range
(
tf
.
shape
(
nmsed_boxes
.
nmsed_boxes
)[
0
]):
num_detections
=
nmsed_boxes
.
valid_detections
[
i
]
# recombining with classes and scores
boxes_recombined
=
tf
.
concat
(
[
nmsed_boxes
.
nmsed_boxes
[
i
][:
num_detections
],
tf
.
expand_dims
(
nmsed_boxes
.
nmsed_classes
[
i
][:
num_detections
],
axis
=-
1
),
tf
.
expand_dims
(
nmsed_boxes
.
nmsed_scores
[
i
][:
num_detections
],
axis
=-
1
),
],
axis
=-
1
,
)
# iterate through the boxes and append it to TensorArray
for
j
in
range
(
nmsed_boxes
.
valid_detections
[
i
]):
boxes
=
boxes
.
write
(
boxes
.
size
(),
boxes_recombined
[
j
])
# stacking to create a tensor
return
boxes
.
stack
()
def
_encode_to_ragged
(
self
,
boxes
,
valid_detections
):
# using cumulative sum to calculate row_limits for ragged tensor
row_limits
=
tf
.
cumsum
(
valid_detections
)
# creating the output RaggedTensor by splitting boxes at row_limits
result
=
tf
.
RaggedTensor
.
from_row_limits
(
values
=
boxes
,
row_limits
=
row_limits
)
return
result
def
get_config
(
self
):
config
=
{
"classes"
:
self
.
classes
,
"bounding_box_format"
:
self
.
bounding_box_format
,
"confidence_threshold"
:
self
.
confidence_threshold
,
"iou_threshold"
:
self
.
iou_threshold
,
"max_detections"
:
self
.
max_detections
,
"max_detections_per_class"
:
self
.
max_detections_per_class
,
}
base_config
=
super
().
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
Prev
1
2
3
4
5
6
7
8
9
10
…
17
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment