Commit 0016b0a7 authored by sunxx1's avatar sunxx1
Browse files

Merge branch 'dtk22.04' into 'main'

Dtk22.04

See merge request dcutoolkit/deeplearing/dlexamples_new!49
parents 17bc28d5 7a382d5d
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.layers import preprocessing
class RandomGaussianBlurTest(tf.test.TestCase):
def test_return_shapes(self):
layer = preprocessing.RandomGaussianBlur(kernel_size=(3, 7), factor=(0, 2))
# RGB
xs = tf.ones((2, 512, 512, 3))
xs = layer(xs)
self.assertEqual(xs.shape, [2, 512, 512, 3])
# greyscale
xs = tf.ones((2, 512, 512, 1))
xs = layer(xs)
self.assertEqual(xs.shape, [2, 512, 512, 1])
def test_in_single_image(self):
layer = preprocessing.RandomGaussianBlur(kernel_size=(3, 7), factor=(0, 2))
# RGB
xs = tf.cast(
tf.ones((512, 512, 3)),
dtype=tf.float32,
)
xs = layer(xs)
self.assertEqual(xs.shape, [512, 512, 3])
# greyscale
xs = tf.cast(
tf.ones((512, 512, 1)),
dtype=tf.float32,
)
xs = layer(xs)
self.assertEqual(xs.shape, [512, 512, 1])
def test_non_square_images(self):
layer = preprocessing.RandomGaussianBlur(kernel_size=(3, 7), factor=(0, 2))
# RGB
xs = tf.ones((2, 256, 512, 3))
xs = layer(xs)
self.assertEqual(xs.shape, [2, 256, 512, 3])
# greyscale
xs = tf.ones((2, 256, 512, 1))
xs = layer(xs)
self.assertEqual(xs.shape, [2, 256, 512, 1])
def test_single_input_args(self):
layer = preprocessing.RandomGaussianBlur(kernel_size=7, factor=2)
# RGB
xs = tf.ones((2, 512, 512, 3))
xs = layer(xs)
self.assertEqual(xs.shape, [2, 512, 512, 3])
# greyscale
xs = tf.ones((2, 512, 512, 1))
xs = layer(xs)
self.assertEqual(xs.shape, [2, 512, 512, 1])
def test_numerical(self):
layer = preprocessing.RandomGaussianBlur(kernel_size=3, factor=(1.0, 1.0))
xs = tf.expand_dims(
tf.constant([[0, 0, 0], [0, 1, 0], [0, 0, 0]]),
axis=-1,
)
xs = tf.expand_dims(xs, axis=0)
# Result expected to be identical to gaussian blur kernel of
# size 3x3 and factor=1.0
result = tf.expand_dims(
tf.constant(
[
[0.07511361, 0.1238414, 0.07511361],
[0.1238414, 0.20417996, 0.1238414],
[0.07511361, 0.1238414, 0.07511361],
]
),
axis=-1,
)
result = tf.expand_dims(result, axis=0)
xs = layer(xs)
self.assertAllClose(xs, result)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.layers.preprocessing.base_image_augmentation_layer import (
BaseImageAugmentationLayer,
)
from keras_cv.utils import preprocessing
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class RandomHue(BaseImageAugmentationLayer):
"""Randomly adjusts the hue on given images.
This layer will randomly increase/reduce the hue for the input RGB
images. At inference time, the output will be identical to the input.
Call the layer with `training=True` to adjust the brightness of the input.
The image hue is adjusted by converting the image(s) to HSV and rotating the
hue channel (H) by delta. The image is then converted back to RGB.
Args:
factor: A tuple of two floats, a single float or `keras_cv.FactorSampler`.
`factor` controls the extent to which the image hue is impacted.
`factor=0.0` makes this layer perform a no-op operation, while a value of
1.0 performs the most aggressive contrast adjustment available. If a tuple
is used, a `factor` is sampled between the two values for every image
augmented. If a single float is used, a value between `0.0` and the passed
float is sampled. In order to ensure the value is always the same, please
pass a tuple with two identical floats: `(0.5, 0.5)`.
value_range: the range of values the incoming images will have.
Represented as a two number tuple written [low, high].
This is typically either `[0, 1]` or `[0, 255]` depending
on how your preprocessing pipeline is setup.
seed: Integer. Used to create a random seed.
"""
def __init__(self, factor, value_range, seed=None, **kwargs):
super().__init__(seed=seed, **kwargs)
self.factor = preprocessing.parse_factor(
factor,
)
self.value_range = value_range
self.seed = seed
def get_random_transformation(self, **kwargs):
invert = preprocessing.random_inversion(self._random_generator)
# We must scale self.factor() to the range [-0.5, 0.5]. This is because the
# tf.image operation performs rotation on the hue saturation value orientation.
# This can be thought of as an angle in the range [-180, 180]
return invert * self.factor() * 0.5
def augment_image(self, image, transformation=None, **kwargs):
image = preprocessing.transform_value_range(image, self.value_range, (0, 1))
# tf.image.adjust_hue expects floats to be in range [0, 1]
image = tf.image.adjust_hue(image, delta=transformation)
# RandomHue is one of the rare KPLs that needs to clip
image = tf.clip_by_value(image, 0, 1)
image = preprocessing.transform_value_range(image, (0, 1), self.value_range)
return image
def augment_bounding_boxes(self, bounding_boxes, **kwargs):
return bounding_boxes
def augment_label(self, label, transformation=None, **kwargs):
return label
def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs):
return segmentation_mask
def get_config(self):
config = {
"factor": self.factor,
"value_range": self.value_range,
"seed": self.seed,
}
base_config = super().get_config()
return dict(list(base_config.items()) + list(config.items()))
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from absl.testing import parameterized
from keras_cv import core
from keras_cv.layers import preprocessing
class RandomHueTest(tf.test.TestCase, parameterized.TestCase):
def test_preserves_output_shape(self):
image_shape = (4, 8, 8, 3)
image = tf.random.uniform(shape=image_shape) * 255.0
layer = preprocessing.RandomHue(factor=(0.3, 0.8), value_range=(0, 255))
output = layer(image)
self.assertEqual(image.shape, output.shape)
self.assertNotAllClose(image, output)
def test_adjust_no_op(self):
image_shape = (4, 8, 8, 3)
image = tf.random.uniform(shape=image_shape) * 255.0
layer = preprocessing.RandomHue(factor=(0.0, 0.0), value_range=(0, 255))
output = layer(image)
self.assertAllClose(image, output, atol=1e-5, rtol=1e-5)
def test_adjust_full_opposite_hue(self):
image_shape = (4, 8, 8, 3)
image = tf.random.uniform(shape=image_shape) * 255.0
layer = preprocessing.RandomHue(factor=(1.0, 1.0), value_range=(0, 255))
output = layer(image)
channel_max = tf.math.reduce_max(output, axis=-1)
channel_min = tf.math.reduce_min(output, axis=-1)
# Make sure the max and min channel are the same between input and output
# In the meantime, and channel will swap between each other.
self.assertAllClose(channel_max, tf.math.reduce_max(image, axis=-1))
self.assertAllClose(channel_min, tf.math.reduce_min(image, axis=-1))
@parameterized.named_parameters(
("025", 0.25), ("05", 0.5), ("075", 0.75), ("100", 1.0)
)
def test_adjusts_all_values_for_factor(self, factor):
image_shape = (4, 8, 8, 3)
# Value range (0, 100)
image = tf.random.uniform(shape=image_shape) * 100.0
layer = preprocessing.RandomHue(factor=(factor, factor), value_range=(0, 255))
output = layer(image)
self.assertNotAllClose(image, output, atol=1e-5, rtol=1e-5)
def test_adjustment_for_non_rgb_value_range(self):
image_shape = (4, 8, 8, 3)
# Value range (0, 100)
image = tf.random.uniform(shape=image_shape) * 100.0
layer = preprocessing.RandomHue(factor=(0.0, 0.0), value_range=(0, 255))
output = layer(image)
self.assertAllClose(image, output, atol=1e-5, rtol=1e-5)
layer = preprocessing.RandomHue(factor=(0.3, 0.8), value_range=(0, 255))
output = layer(image)
self.assertNotAllClose(image, output)
def test_with_uint8(self):
image_shape = (4, 8, 8, 3)
image = tf.cast(tf.random.uniform(shape=image_shape) * 255.0, dtype=tf.uint8)
layer = preprocessing.RandomHue(factor=(0.0, 0.0), value_range=(0, 255))
output = layer(image)
self.assertAllClose(image, output, atol=1e-5, rtol=1e-5)
layer = preprocessing.RandomHue(factor=(0.3, 0.8), value_range=(0, 255))
output = layer(image)
self.assertNotAllClose(image, output)
def test_config(self):
layer = preprocessing.RandomHue(factor=(0.3, 0.8), value_range=(0, 255))
config = layer.get_config()
self.assertTrue(isinstance(config["factor"], core.UniformFactorSampler))
self.assertEqual(config["factor"].get_config()["lower"], 0.3)
self.assertEqual(config["factor"].get_config()["upper"], 0.8)
self.assertEqual(config["value_range"], (0, 255))
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.layers.preprocessing.base_image_augmentation_layer import (
BaseImageAugmentationLayer,
)
from keras_cv.utils import preprocessing
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class RandomJpegQuality(BaseImageAugmentationLayer):
"""Applies Random Jpeg compression artifacts to an image.
Performs the jpeg compression algorithm on the image. This layer can used in order
to ensure your model is robust to artifacts introduced by JPEG compresion.
Args:
factor: 2 element tuple or 2 element list. During augmentation, a random number
is drawn from the factor distribution. This value is passed to
`tf.image.adjust_jpeg_quality()`.
seed: Integer. Used to create a random seed.
Usage:
```python
layer = keras_cv.RandomJpegQuality(factor=(75, 100)))
(images, labels), _ = tf.keras.datasets.cifar10.load_data()
augmented_images = layer(images)
```
"""
def __init__(self, factor, seed=None, **kwargs):
super().__init__(**kwargs)
if isinstance(factor, (float, int)):
raise ValueError(
"RandomJpegQuality() expects factor to be a 2 element "
"tuple, list or a `keras_cv.FactorSampler`. "
"RandomJpegQuality() received `factor={factor}`."
)
self.seed = seed
self.factor = preprocessing.parse_factor(
factor, min_value=0, max_value=100, param_name="factor", seed=self.seed
)
def get_random_transformation(self, **kwargs):
return self.factor(dtype=tf.int32)
def augment_image(self, image, transformation=None, **kwargs):
jpeg_quality = transformation
return tf.image.adjust_jpeg_quality(image, jpeg_quality)
def augment_bounding_boxes(self, bounding_boxes, **kwargs):
return bounding_boxes
def augment_label(self, label, transformation=None, **kwargs):
return label
def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs):
return segmentation_mask
def get_config(self):
config = super().get_config()
config.update({"factor": self.factor, "seed": self.seed})
return config
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.layers import preprocessing
class RandomJpegQualityTest(tf.test.TestCase):
def test_return_shapes(self):
layer = preprocessing.RandomJpegQuality(factor=[0, 100])
# RGB
xs = tf.ones((2, 512, 512, 3))
xs = layer(xs)
self.assertEqual(xs.shape, [2, 512, 512, 3])
# greyscale
xs = tf.ones((2, 512, 512, 1))
xs = layer(xs)
self.assertEqual(xs.shape, [2, 512, 512, 1])
def test_in_single_image(self):
layer = preprocessing.RandomJpegQuality(factor=[0, 100])
# RGB
xs = tf.cast(
tf.ones((512, 512, 3)),
dtype=tf.float32,
)
xs = layer(xs)
self.assertEqual(xs.shape, [512, 512, 3])
# greyscale
xs = tf.cast(
tf.ones((512, 512, 1)),
dtype=tf.float32,
)
xs = layer(xs)
self.assertEqual(xs.shape, [512, 512, 1])
def test_non_square_images(self):
layer = preprocessing.RandomJpegQuality(factor=[0, 100])
# RGB
xs = tf.ones((2, 256, 512, 3))
xs = layer(xs)
self.assertEqual(xs.shape, [2, 256, 512, 3])
# greyscale
xs = tf.ones((2, 256, 512, 1))
xs = layer(xs)
self.assertEqual(xs.shape, [2, 256, 512, 1])
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import tensorflow as tf
from keras_cv import bounding_box
from keras_cv.layers.preprocessing.base_image_augmentation_layer import (
BaseImageAugmentationLayer,
)
from keras_cv.utils import preprocessing
# In order to support both unbatched and batched inputs, the horizontal
# and verticle axis is reverse indexed
H_AXIS = -3
W_AXIS = -2
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class RandomRotation(BaseImageAugmentationLayer):
"""A preprocessing layer which randomly rotates images during training.
This layer will apply random rotations to each image, filling empty space
according to `fill_mode`.
By default, random rotations are only applied during training.
At inference time, the layer does nothing. If you need to apply random
rotations at inference time, set `training` to True when calling the layer.
Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and
of interger or floating point dtype. By default, the layer will output
floats.
Input shape:
3D (unbatched) or 4D (batched) tensor with shape:
`(..., height, width, channels)`, in `"channels_last"` format
Output shape:
3D (unbatched) or 4D (batched) tensor with shape:
`(..., height, width, channels)`, in `"channels_last"` format
Arguments:
factor: a float represented as fraction of 2 Pi, or a tuple of size 2
representing lower and upper bound for rotating clockwise and
counter-clockwise. A positive values means rotating counter clock-wise,
while a negative value means clock-wise. When represented as a single
float, this value is used for both the upper and lower bound. For
instance, `factor=(-0.2, 0.3)` results in an output rotation by a random
amount in the range `[-20% * 2pi, 30% * 2pi]`. `factor=0.2` results in
an output rotating by a random amount in the range
`[-20% * 2pi, 20% * 2pi]`.
fill_mode: Points outside the boundaries of the input are filled according
to the given mode (one of `{"constant", "reflect", "wrap", "nearest"}`).
- *reflect*: `(d c b a | a b c d | d c b a)` The input is extended by
reflecting about the edge of the last pixel.
- *constant*: `(k k k k | a b c d | k k k k)` The input is extended by
filling all values beyond the edge with the same constant value k = 0.
- *wrap*: `(a b c d | a b c d | a b c d)` The input is extended by
wrapping around to the opposite edge.
- *nearest*: `(a a a a | a b c d | d d d d)` The input is extended by
the nearest pixel.
interpolation: Interpolation mode. Supported values: `"nearest"`,
`"bilinear"`.
seed: Integer. Used to create a random seed.
fill_value: a float represents the value to be filled outside the
boundaries when `fill_mode="constant"`.
bounding_box_format: The format of bounding boxes of input dataset. Refer
https://github.com/keras-team/keras-cv/blob/master/keras_cv/bounding_box/converters.py
for more details on supported bounding box formats.
segmentation_classes: an optional integer with the number of classes in
the input segmentation mask. Required iff augmenting data with sparse
(non one-hot) segmentation masks. Include the background class in this
count (e.g. for segmenting dog vs background, this should be set to 2).
"""
def __init__(
self,
factor,
fill_mode="reflect",
interpolation="bilinear",
seed=None,
fill_value=0.0,
bounding_box_format=None,
segmentation_classes=None,
**kwargs,
):
super().__init__(seed=seed, force_generator=True, **kwargs)
self.factor = factor
if isinstance(factor, (tuple, list)):
self.lower = factor[0]
self.upper = factor[1]
else:
self.lower = -factor
self.upper = factor
if self.upper < self.lower:
raise ValueError(
"Factor cannot have negative values, " "got {}".format(factor)
)
preprocessing.check_fill_mode_and_interpolation(fill_mode, interpolation)
self.fill_mode = fill_mode
self.fill_value = fill_value
self.interpolation = interpolation
self.seed = seed
self.bounding_box_format = bounding_box_format
self.segmentation_classes = segmentation_classes
def get_random_transformation(self, **kwargs):
min_angle = self.lower * 2.0 * np.pi
max_angle = self.upper * 2.0 * np.pi
angle = self._random_generator.random_uniform(
shape=[1], minval=min_angle, maxval=max_angle
)
return {"angle": angle}
def augment_image(self, image, transformation, **kwargs):
return self._rotate_image(image, transformation)
def _rotate_image(self, image, transformation):
image = preprocessing.ensure_tensor(image, self.compute_dtype)
original_shape = image.shape
image = tf.expand_dims(image, 0)
image_shape = tf.shape(image)
img_hd = tf.cast(image_shape[H_AXIS], tf.float32)
img_wd = tf.cast(image_shape[W_AXIS], tf.float32)
angle = transformation["angle"]
output = preprocessing.transform(
image,
preprocessing.get_rotation_matrix(angle, img_hd, img_wd),
fill_mode=self.fill_mode,
fill_value=self.fill_value,
interpolation=self.interpolation,
)
output = tf.squeeze(output, 0)
output.set_shape(original_shape)
return output
def augment_bounding_boxes(
self, bounding_boxes, transformation, image=None, **kwargs
):
if self.bounding_box_format is None:
raise ValueError(
"`RandomRotation()` was called with bounding boxes,"
"but no `bounding_box_format` was specified in the constructor."
"Please specify a bounding box format in the constructor. i.e."
"`RandomRotation(bounding_box_format='xyxy')`"
)
else:
bounding_boxes = bounding_box.convert_format(
bounding_boxes,
source=self.bounding_box_format,
target="xyxy",
images=image,
)
image_shape = tf.shape(image)
h = image_shape[H_AXIS]
w = image_shape[W_AXIS]
_, _, _, _, rest = tf.split(
bounding_boxes, [1, 1, 1, 1, bounding_boxes.shape[-1] - 4], axis=-1
)
# origin coordinates, all the points on the image are rotated around
# this point
origin_x, origin_y = tf.cast(w / 2, dtype=self.compute_dtype), tf.cast(
h / 2, dtype=self.compute_dtype
)
angle = transformation["angle"]
angle = -angle
# calculate coordinates of all four corners of the bounding box
point = tf.stack(
[
tf.stack([bounding_boxes[:, 0], bounding_boxes[:, 1]], axis=1),
tf.stack([bounding_boxes[:, 2], bounding_boxes[:, 1]], axis=1),
tf.stack([bounding_boxes[:, 2], bounding_boxes[:, 3]], axis=1),
tf.stack([bounding_boxes[:, 0], bounding_boxes[:, 3]], axis=1),
],
axis=1,
)
# point_x : x coordinates of all corners of the bounding box
point_x = tf.gather(point, [0], axis=2)
# point_y : y cordinates of all corners of the bounding box
point_y = tf.gather(point, [1], axis=2)
# rotated bounding box coordinates
# new_x : new position of x coordinates of corners of bounding box
new_x = (
origin_x
+ tf.multiply(
tf.cos(angle), tf.cast((point_x - origin_x), dtype=tf.float32)
)
- tf.multiply(
tf.sin(angle), tf.cast((point_y - origin_y), dtype=tf.float32)
)
)
# new_y : new position of y coordinates of corners of bounding box
new_y = (
origin_y
+ tf.multiply(
tf.sin(angle), tf.cast((point_x - origin_x), dtype=tf.float32)
)
+ tf.multiply(
tf.cos(angle), tf.cast((point_y - origin_y), dtype=tf.float32)
)
)
# rotated bounding box coordinates
out = tf.concat([new_x, new_y], axis=2)
# find readjusted coordinates of bounding box to represent it in corners
# format
min_cordinates = tf.math.reduce_min(out, axis=1)
max_cordinates = tf.math.reduce_max(out, axis=1)
bounding_boxes_out = tf.concat([min_cordinates, max_cordinates, rest], axis=1)
bounding_boxes_out = bounding_box.clip_to_image(
bounding_boxes_out,
bounding_box_format="xyxy",
images=image,
)
# cordinates cannot be float values, it is casted to int32
bounding_boxes_out = bounding_box.convert_format(
bounding_boxes_out,
source="xyxy",
target=self.bounding_box_format,
dtype=self.compute_dtype,
images=image,
)
return bounding_boxes_out
def augment_label(self, label, transformation, **kwargs):
return label
def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs):
# If segmentation_classes is specified, we have a dense segmentation mask.
# We therefore one-hot encode before rotation to avoid bad interpolation
# during the rotation transformation. We then make the mask sparse
# again using tf.argmax.
if self.segmentation_classes:
one_hot_mask = tf.one_hot(
tf.squeeze(segmentation_mask, axis=-1), self.segmentation_classes
)
rotated_one_hot_mask = self._rotate_image(one_hot_mask, transformation)
rotated_mask = tf.argmax(rotated_one_hot_mask, axis=-1)
return tf.expand_dims(rotated_mask, axis=-1)
else:
if segmentation_mask.shape[-1] == 1:
raise ValueError(
"Segmentation masks must be one-hot encoded, or "
"RandomRotate must be initialized with "
"`segmentation_classes`. `segmentation_classes` was not "
f"specified, and mask has shape {segmentation_mask.shape}"
)
rotated_mask = self._rotate_image(segmentation_mask, transformation)
# Round because we are in one-hot encoding, and we may have
# pixels with ambugious value due to floating point math for rotation.
return tf.round(rotated_mask)
def compute_output_shape(self, input_shape):
return input_shape
def get_config(self):
config = {
"factor": self.factor,
"fill_mode": self.fill_mode,
"fill_value": self.fill_value,
"interpolation": self.interpolation,
"bounding_box_format": self.bounding_box_format,
"segmentation_classes": self.segmentation_classes,
"seed": self.seed,
}
base_config = super().get_config()
return dict(list(base_config.items()) + list(config.items()))
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import tensorflow as tf
from keras_cv import bounding_box
from keras_cv.layers.preprocessing.random_rotation import RandomRotation
class RandomRotationTest(tf.test.TestCase):
def test_random_rotation_output_shapes(self):
input_images = np.random.random((2, 5, 8, 3)).astype(np.float32)
expected_output = input_images
layer = RandomRotation(0.5)
actual_output = layer(input_images, training=True)
self.assertEqual(expected_output.shape, actual_output.shape)
def test_random_rotation_inference(self):
input_images = np.random.random((2, 5, 8, 3)).astype(np.float32)
expected_output = input_images
layer = RandomRotation(0.5)
actual_output = layer(input_images, training=False)
self.assertAllClose(expected_output, actual_output)
def test_config_with_custom_name(self):
layer = RandomRotation(0.5, name="image_preproc")
config = layer.get_config()
layer_reconstructed = RandomRotation.from_config(config)
self.assertEqual(layer_reconstructed.name, layer.name)
def test_unbatched_image(self):
input_image = np.reshape(np.arange(0, 25), (5, 5, 1)).astype(np.float32)
# 180 rotation.
layer = RandomRotation(factor=(0.5, 0.5))
output_image = layer(input_image)
expected_output = np.asarray(
[
[24, 23, 22, 21, 20],
[19, 18, 17, 16, 15],
[14, 13, 12, 11, 10],
[9, 8, 7, 6, 5],
[4, 3, 2, 1, 0],
]
).astype(np.float32)
expected_output = np.reshape(expected_output, (5, 5, 1))
self.assertAllClose(expected_output, output_image)
def test_augment_bbox_dict_input(self):
input_image = np.random.random((512, 512, 3)).astype(np.float32)
bboxes = tf.convert_to_tensor(
[[200, 200, 400, 400, 1], [100, 100, 300, 300, 2]]
)
input = {"images": input_image, "bounding_boxes": bboxes}
# 180 rotation.
layer = RandomRotation(factor=(0.5, 0.5), bounding_box_format="xyxy")
output_bbox = layer(input)
expected_output = np.asarray(
[[112.0, 112.0, 312.0, 312.0, 1], [212.0, 212.0, 412.0, 412.0, 2]],
)
expected_output = np.reshape(expected_output, (2, 5))
self.assertAllClose(expected_output, output_bbox["bounding_boxes"])
def test_output_dtypes(self):
inputs = np.array([[[1], [2]], [[3], [4]]], dtype="float64")
layer = RandomRotation(0.5)
self.assertAllEqual(layer(inputs).dtype, "float32")
layer = RandomRotation(0.5, dtype="uint8")
self.assertAllEqual(layer(inputs).dtype, "uint8")
def test_ragged_bounding_boxes(self):
input_image = np.random.random((2, 512, 512, 3)).astype(np.float32)
bboxes = tf.ragged.constant(
[
[[200, 200, 400, 400], [100, 100, 300, 300]],
[[200, 200, 400, 400]],
],
dtype=tf.float32,
)
bboxes = bounding_box.add_class_id(bboxes)
input = {"images": input_image, "bounding_boxes": bboxes}
layer = RandomRotation(factor=(0.5, 0.5), bounding_box_format="xyxy")
output = layer(input)
expected_output = tf.ragged.constant(
[
[[112.0, 112.0, 312.0, 312.0, 0], [212.0, 212.0, 412.0, 412.0, 0]],
[[112.0, 112.0, 312.0, 312.0, 0]],
],
ragged_rank=1,
)
self.assertAllClose(expected_output, output["bounding_boxes"])
def test_augment_sparse_segmentation_mask(self):
classes = 8
input_images = np.random.random((2, 20, 20, 3)).astype(np.float32)
# Masks are all 0s or 8s, to verify that when we rotate we don't do bad
# mask interpolation to either a 0 or a 7
masks = np.random.randint(2, size=(2, 20, 20, 1)) * (classes - 1)
inputs = {"images": input_images, "segmentation_masks": masks}
# Attempting to rotate a sparse mask without specifying classes fails.
bad_layer = RandomRotation(factor=(0.25, 0.25))
with self.assertRaisesRegex(ValueError, "masks must be one-hot"):
outputs = bad_layer(inputs)
# 90 degree rotation.
layer = RandomRotation(factor=(0.25, 0.25), segmentation_classes=classes)
outputs = layer(inputs)
expected_masks = np.rot90(masks, axes=(1, 2))
self.assertAllClose(expected_masks, outputs["segmentation_masks"])
# 45 degree rotation. Only verifies that no interpolation takes place.
layer = RandomRotation(factor=(0.125, 0.125), segmentation_classes=classes)
outputs = layer(inputs)
self.assertAllInSet(outputs["segmentation_masks"], [0, 7])
def test_augment_one_hot_segmentation_mask(self):
classes = 8
input_images = np.random.random((2, 20, 20, 3)).astype(np.float32)
masks = tf.one_hot(np.random.randint(classes, size=(2, 20, 20)), classes)
inputs = {"images": input_images, "segmentation_masks": masks}
# 90 rotation.
layer = RandomRotation(factor=(0.25, 0.25))
outputs = layer(inputs)
expected_masks = np.rot90(masks, axes=(1, 2))
self.assertAllClose(expected_masks, outputs["segmentation_masks"])
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.layers.preprocessing.base_image_augmentation_layer import (
BaseImageAugmentationLayer,
)
from keras_cv.utils import preprocessing
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class RandomSaturation(BaseImageAugmentationLayer):
"""Randomly adjusts the saturation on given images.
This layer will randomly increase/reduce the saturation for the input RGB
images. At inference time, the output will be identical to the input.
Call the layer with `training=True` to adjust the saturation of the input.
Args:
factor: A tuple of two floats, a single float or `keras_cv.FactorSampler`.
`factor` controls the extent to which the image saturation is impacted.
`factor=0.5` makes this layer perform a no-op operation. `factor=0.0` makes
the image to be fully grayscale. `factor=1.0` makes the image to be fully
saturated.
Values should be between `0.0` and `1.0`. If a tuple is used, a `factor`
is sampled between the two values for every image augmented. If a single
float is used, a value between `0.0` and the passed float is sampled.
In order to ensure the value is always the same, please pass a tuple with
two identical floats: `(0.5, 0.5)`.
seed: Integer. Used to create a random seed.
"""
def __init__(self, factor, seed=None, **kwargs):
super().__init__(seed=seed, **kwargs)
self.factor = preprocessing.parse_factor(
factor,
min_value=0.0,
max_value=1.0,
)
self.seed = seed
def get_random_transformation(self, **kwargs):
return self.factor()
def augment_image(self, image, transformation=None, **kwargs):
# Convert the factor range from [0, 1] to [0, +inf]. Note that the
# tf.image.adjust_saturation is trying to apply the following math formula
# `output_saturation = input_saturation * factor`. We use the following
# method to the do the mapping.
# `y = x / (1 - x)`.
# This will ensure:
# y = +inf when x = 1 (full saturation)
# y = 1 when x = 0.5 (no augmentation)
# y = 0 when x = 0 (full gray scale)
# Convert the transformation to tensor in case it is a float. When
# transformation is 1.0, then it will result in to divide by zero error, but
# it will be handled correctly when it is a one tensor.
transformation = tf.convert_to_tensor(transformation)
adjust_factor = transformation / (1 - transformation)
return tf.image.adjust_saturation(image, saturation_factor=adjust_factor)
def augment_bounding_boxes(self, bounding_boxes, transformation=None, **kwargs):
return bounding_boxes
def augment_label(self, label, transformation=None, **kwargs):
return label
def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs):
return segmentation_mask
def get_config(self):
config = {
"factor": self.factor,
"seed": self.seed,
}
base_config = super().get_config()
return dict(list(base_config.items()) + list(config.items()))
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv import core
from keras_cv.layers import preprocessing
class RandomSaturationTest(tf.test.TestCase):
def test_preserves_output_shape(self):
image_shape = (4, 8, 8, 3)
image = tf.random.uniform(shape=image_shape) * 255.0
layer = preprocessing.RandomSaturation(factor=(0.3, 0.8))
output = layer(image)
self.assertEqual(image.shape, output.shape)
self.assertNotAllClose(image, output)
def test_no_adjustment_for_factor_point_five(self):
image_shape = (4, 8, 8, 3)
image = tf.random.uniform(shape=image_shape) * 255.0
layer = preprocessing.RandomSaturation(factor=(0.5, 0.5))
output = layer(image)
self.assertAllClose(image, output, atol=1e-5, rtol=1e-5)
def test_adjust_to_grayscale(self):
image_shape = (4, 8, 8, 3)
image = tf.random.uniform(shape=image_shape) * 255.0
layer = preprocessing.RandomSaturation(factor=(0.0, 0.0))
output = layer(image)
channel_mean = tf.math.reduce_mean(output, axis=-1)
channel_values = tf.unstack(output, axis=-1)
# Make sure all the pixel has the same value among the channel dim, which is
# a fully gray RGB.
for channel_value in channel_values:
self.assertAllClose(channel_mean, channel_value, atol=1e-5, rtol=1e-5)
def test_adjust_to_full_saturation(self):
image_shape = (4, 8, 8, 3)
image = tf.random.uniform(shape=image_shape) * 255.0
layer = preprocessing.RandomSaturation(factor=(1.0, 1.0))
output = layer(image)
channel_mean = tf.math.reduce_min(output, axis=-1)
# Make sure at least one of the channel is 0.0 (fully saturated image)
self.assertAllClose(channel_mean, tf.zeros((4, 8, 8)))
def test_adjustment_for_non_rgb_value_range(self):
image_shape = (4, 8, 8, 3)
# Value range (0, 100)
image = tf.random.uniform(shape=image_shape) * 100.0
layer = preprocessing.RandomSaturation(factor=(0.5, 0.5))
output = layer(image)
self.assertAllClose(image, output, atol=1e-5, rtol=1e-5)
layer = preprocessing.RandomSaturation(factor=(0.3, 0.8))
output = layer(image)
self.assertNotAllClose(image, output)
def test_with_unit8(self):
image_shape = (4, 8, 8, 3)
image = tf.cast(tf.random.uniform(shape=image_shape) * 255.0, dtype=tf.uint8)
layer = preprocessing.RandomSaturation(factor=(0.5, 0.5))
output = layer(image)
self.assertAllClose(image, output, atol=1e-5, rtol=1e-5)
layer = preprocessing.RandomSaturation(factor=(0.3, 0.8))
output = layer(image)
self.assertNotAllClose(image, output)
def test_config(self):
layer = preprocessing.RandomSaturation(factor=(0.3, 0.8))
config = layer.get_config()
self.assertTrue(isinstance(config["factor"], core.UniformFactorSampler))
self.assertEqual(config["factor"].get_config()["lower"], 0.3)
self.assertEqual(config["factor"].get_config()["upper"], 0.8)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.layers.preprocessing.base_image_augmentation_layer import (
BaseImageAugmentationLayer,
)
from keras_cv.utils import preprocessing
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class RandomSharpness(BaseImageAugmentationLayer):
"""Randomly performs the sharpness operation on given images.
The sharpness operation first performs a blur operation, then blends between the
original image and the blurred image. This operation makes the edges of an image
less sharp than they were in the original image.
References:
- [PIL](https://pillow.readthedocs.io/en/stable/reference/ImageEnhance.html)
Args:
factor: A tuple of two floats, a single float or `keras_cv.FactorSampler`.
`factor` controls the extent to which the image sharpness is impacted.
`factor=0.0` makes this layer perform a no-op operation, while a value of
1.0 uses the sharpened result entirely. Values between 0 and 1 result in
linear interpolation between the original image and the sharpened image.
Values should be between `0.0` and `1.0`. If a tuple is used, a `factor` is
sampled between the two values for every image augmented. If a single float
is used, a value between `0.0` and the passed float is sampled. In order to
ensure the value is always the same, please pass a tuple with two identical
floats: `(0.5, 0.5)`.
value_range: the range of values the incoming images will have.
Represented as a two number tuple written [low, high].
This is typically either `[0, 1]` or `[0, 255]` depending
on how your preprocessing pipeline is setup.
"""
def __init__(
self,
factor,
value_range,
seed=None,
**kwargs,
):
super().__init__(seed=seed, **kwargs)
self.value_range = value_range
self.factor = preprocessing.parse_factor(factor)
self.seed = seed
def get_random_transformation(self, **kwargs):
return self.factor()
def augment_image(self, image, transformation=None, **kwargs):
image = preprocessing.transform_value_range(
image, original_range=self.value_range, target_range=(0, 255)
)
original_image = image
# Make image 4D for conv operation.
image = tf.expand_dims(image, axis=0)
# [1 1 1]
# [1 5 1]
# [1 1 1]
# all divided by 13 is the default 3x3 gaussian smoothing kernel.
# Correlating or Convolving with this filter is equivalent to performing a
# gaussian blur.
kernel = (
tf.constant(
[[1, 1, 1], [1, 5, 1], [1, 1, 1]], dtype=tf.float32, shape=[3, 3, 1, 1]
)
/ 13.0
)
# Tile across channel dimension.
channels = tf.shape(image)[-1]
kernel = tf.tile(kernel, [1, 1, channels, 1])
strides = [1, 1, 1, 1]
smoothed_image = tf.nn.depthwise_conv2d(
image, kernel, strides, padding="VALID", dilations=[1, 1]
)
smoothed_image = tf.clip_by_value(smoothed_image, 0.0, 255.0)
smoothed_image = tf.squeeze(smoothed_image, axis=0)
# For the borders of the resulting image, fill in the values of the
# original image.
mask = tf.ones_like(smoothed_image)
padded_mask = tf.pad(mask, [[1, 1], [1, 1], [0, 0]])
padded_smoothed_image = tf.pad(smoothed_image, [[1, 1], [1, 1], [0, 0]])
result = tf.where(
tf.equal(padded_mask, 1), padded_smoothed_image, original_image
)
# Blend the final result.
result = preprocessing.blend(original_image, result, transformation)
result = preprocessing.transform_value_range(
result, original_range=(0, 255), target_range=self.value_range
)
return result
def augment_bounding_boxes(self, bounding_boxes, transformation, **kwargs):
return bounding_boxes
def augment_label(self, label, transformation=None, **kwargs):
return label
def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs):
return segmentation_mask
def get_config(self):
config = super().get_config()
config.update(
{"factor": self.factor, "value_range": self.value_range, "seed": self.seed}
)
return config
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.layers import preprocessing
class RandomSharpnessTest(tf.test.TestCase):
def test_random_sharpness_preserves_output_shape(self):
img_shape = (50, 50, 3)
xs = tf.stack(
[2 * tf.ones(img_shape), tf.ones(img_shape)],
axis=0,
)
layer = preprocessing.RandomSharpness(0.0, value_range=(0, 255))
ys = layer(xs)
self.assertEqual(xs.shape, ys.shape)
self.assertAllClose(xs, ys)
def test_random_sharpness_blur_effect_single_channel(self):
xs = tf.expand_dims(
tf.constant(
[
[0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0],
]
),
axis=-1,
)
xs = tf.expand_dims(xs, axis=0)
layer = preprocessing.RandomSharpness((1.0, 1.0), value_range=(0, 255))
ys = layer(xs)
self.assertEqual(xs.shape, ys.shape)
result = tf.expand_dims(
tf.constant(
[
[0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0],
[0, 0, 1 / 13, 1 / 13, 1 / 13, 0, 0],
[0, 0, 1 / 13, 5 / 13, 1 / 13, 0, 0],
[0, 0, 1 / 13, 1 / 13, 1 / 13, 0, 0],
[0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0],
]
),
axis=-1,
)
result = tf.expand_dims(result, axis=0)
self.assertAllClose(ys, result)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import warnings
import tensorflow as tf
import keras_cv
from keras_cv import bounding_box
from keras_cv.layers.preprocessing.base_image_augmentation_layer import (
BaseImageAugmentationLayer,
)
from keras_cv.utils import preprocessing
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class RandomShear(BaseImageAugmentationLayer):
"""A preprocessing layer which randomly shears images during training.
This layer will apply random shearings to each image, filling empty space
according to `fill_mode`.
By default, random shears are only applied during training.
At inference time, the layer does nothing. If you need to apply random
shear at inference time, set `training` to True when calling the layer.
Input pixel values can be of any range and any data type.
Input shape:
3D (unbatched) or 4D (batched) tensor with shape:
`(..., height, width, channels)`, in `"channels_last"` format
Output shape:
3D (unbatched) or 4D (batched) tensor with shape:
`(..., height, width, channels)`, in `"channels_last"` format
Args:
x_factor: A tuple of two floats, a single float or a
`keras_cv.FactorSampler`. For each augmented image a value is sampled
from the provided range. If a float is passed, the range is interpreted as
`(0, x_factor)`. Values represent a percentage of the image to shear over.
For example, 0.3 shears pixels up to 30% of the way across the image.
All provided values should be positive. If `None` is passed, no shear
occurs on the X axis.
Defaults to `None`.
y_factor: A tuple of two floats, a single float or a
`keras_cv.FactorSampler`. For each augmented image a value is sampled
from the provided range. If a float is passed, the range is interpreted as
`(0, y_factor)`. Values represent a percentage of the image to shear over.
For example, 0.3 shears pixels up to 30% of the way across the image.
All provided values should be positive. If `None` is passed, no shear
occurs on the Y axis.
Defaults to `None`.
interpolation: interpolation method used in the `ImageProjectiveTransformV3` op.
Supported values are `"nearest"` and `"bilinear"`.
Defaults to `"bilinear"`.
fill_mode: fill_mode in the `ImageProjectiveTransformV3` op.
Supported values are `"reflect"`, `"wrap"`, `"constant"`, and `"nearest"`.
Defaults to `"reflect"`.
fill_value: fill_value in the `ImageProjectiveTransformV3` op.
A `Tensor` of type `float32`. The value to be filled when fill_mode is
constant". Defaults to `0.0`.
bounding_box_format: The format of bounding boxes of input dataset. Refer to
https://github.com/keras-team/keras-cv/blob/master/keras_cv/bounding_box/converters.py
for more details on supported bounding box formats.
seed: Integer. Used to create a random seed.
"""
def __init__(
self,
x_factor=None,
y_factor=None,
interpolation="bilinear",
fill_mode="reflect",
fill_value=0.0,
bounding_box_format=None,
seed=None,
**kwargs,
):
super().__init__(seed=seed, **kwargs)
if x_factor is not None:
self.x_factor = preprocessing.parse_factor(
x_factor, max_value=None, param_name="x_factor", seed=seed
)
else:
self.x_factor = x_factor
if y_factor is not None:
self.y_factor = preprocessing.parse_factor(
y_factor, max_value=None, param_name="y_factor", seed=seed
)
else:
self.y_factor = y_factor
if x_factor is None and y_factor is None:
warnings.warn(
"RandomShear received both `x_factor=None` and `y_factor=None`. As a "
"result, the layer will perform no augmentation."
)
self.interpolation = interpolation
self.fill_mode = fill_mode
self.fill_value = fill_value
self.seed = seed
self.bounding_box_format = bounding_box_format
def get_random_transformation(self, **kwargs):
x = self._get_shear_amount(self.x_factor)
y = self._get_shear_amount(self.y_factor)
return (x, y)
def _get_shear_amount(self, constraint):
if constraint is None:
return None
invert = preprocessing.random_inversion(self._random_generator)
return invert * constraint()
def augment_image(self, image, transformation=None, **kwargs):
image = tf.expand_dims(image, axis=0)
x, y = transformation
if x is not None:
transform_x = RandomShear._format_transform(
[1.0, x, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0]
)
image = preprocessing.transform(
images=image,
transforms=transform_x,
interpolation=self.interpolation,
fill_mode=self.fill_mode,
fill_value=self.fill_value,
)
if y is not None:
transform_y = RandomShear._format_transform(
[1.0, 0.0, 0.0, y, 1.0, 0.0, 0.0, 0.0]
)
image = preprocessing.transform(
images=image,
transforms=transform_y,
interpolation=self.interpolation,
fill_mode=self.fill_mode,
fill_value=self.fill_value,
)
return tf.squeeze(image, axis=0)
def augment_label(self, label, transformation=None, **kwargs):
return label
def augment_bounding_boxes(
self, bounding_boxes, transformation, image=None, **kwargs
):
if self.bounding_box_format is None:
raise ValueError(
"`RandomShear()` was called with bounding boxes,"
"but no `bounding_box_format` was specified in the constructor."
"Please specify a bounding box format in the constructor. i.e."
"`RandomShear(bounding_box_format='xyxy')`"
)
bounding_boxes = keras_cv.bounding_box.convert_format(
bounding_boxes,
source=self.bounding_box_format,
target="rel_xyxy",
images=image,
dtype=self.compute_dtype,
)
x, y = transformation
extended_bboxes, rest_axes = self._convert_to_extended_corners_format(
bounding_boxes
)
if x is not None:
extended_bboxes = self._apply_horizontal_transformation_to_bounding_box(
extended_bboxes, x
)
# apply vertical shear
if y is not None:
extended_bboxes = self._apply_vertical_transformation_to_bounding_box(
extended_bboxes, y
)
bounding_boxes = self._convert_to_four_coordinate(extended_bboxes, x, y)
# join rest of the axes with bbox axes
bounding_boxes = tf.concat(
[bounding_boxes, rest_axes],
axis=-1,
)
bounding_boxes = bounding_box.clip_to_image(
bounding_boxes, images=image, bounding_box_format="rel_xyxy"
)
# convert to universal output format
bounding_boxes = keras_cv.bounding_box.convert_format(
bounding_boxes,
source="rel_xyxy",
target=self.bounding_box_format,
images=image,
dtype=self.compute_dtype,
)
return bounding_boxes
def get_config(self):
config = super().get_config()
config.update(
{
"x_factor": self.x_factor,
"y_factor": self.y_factor,
"interpolation": self.interpolation,
"fill_mode": self.fill_mode,
"fill_value": self.fill_value,
"bounding_box_format": self.bounding_box_format,
"seed": self.seed,
}
)
return config
@staticmethod
def _format_transform(transform):
transform = tf.convert_to_tensor(transform, dtype=tf.float32)
return transform[tf.newaxis]
@staticmethod
def _convert_to_four_coordinate(extended_bboxes, x, y):
"""convert from extended coordinates to 4 coordinates system"""
(
top_left_x,
top_left_y,
bottom_right_x,
bottom_right_y,
top_right_x,
top_right_y,
bottom_left_x,
bottom_left_y,
) = tf.split(extended_bboxes, 8, axis=1)
# choose x1,x2 when x>0
def positive_case_x():
final_x1 = bottom_left_x
final_x2 = top_right_x
return final_x1, final_x2
# choose x1,x2 when x<0
def negative_case_x():
final_x1 = top_left_x
final_x2 = bottom_right_x
return final_x1, final_x2
if x is not None:
final_x1, final_x2 = tf.cond(
tf.less(x, 0), negative_case_x, positive_case_x
)
else:
final_x1, final_x2 = top_left_x, bottom_right_x
# choose y1,y2 when y > 0
def positive_case_y():
final_y1 = top_right_y
final_y2 = bottom_left_y
return final_y1, final_y2
# choose y1,y2 when y < 0
def negative_case_y():
final_y1 = top_left_y
final_y2 = bottom_right_y
return final_y1, final_y2
if y is not None:
final_y1, final_y2 = tf.cond(
tf.less(y, 0), negative_case_y, positive_case_y
)
else:
final_y1, final_y2 = top_left_y, bottom_right_y
return tf.concat(
[final_x1, final_y1, final_x2, final_y2],
axis=1,
)
@staticmethod
def _apply_horizontal_transformation_to_bounding_box(extended_bounding_boxes, x):
# create transformation matrix [1,4]
matrix = tf.stack([1.0, -x, 0, 1.0], axis=0)
# reshape it to [2,2]
matrix = tf.reshape(matrix, (2, 2))
# reshape unnormalized bboxes from [N,8] -> [N*4,2]
new_bboxes = tf.reshape(extended_bounding_boxes, (-1, 2))
# [[1,x`],[y`,1]]*[x,y]->[new_x,new_y]
transformed_bboxes = tf.reshape(
tf.einsum("ij,kj->ki", matrix, new_bboxes), (-1, 8)
)
return transformed_bboxes
@staticmethod
def _apply_vertical_transformation_to_bounding_box(extended_bounding_boxes, y):
# create transformation matrix [1,4]
matrix = tf.stack([1.0, 0, -y, 1.0], axis=0)
# reshape it to [2,2]
matrix = tf.reshape(matrix, (2, 2))
# reshape unnormalized bboxes from [N,8] -> [N*4,2]
new_bboxes = tf.reshape(extended_bounding_boxes, (-1, 2))
# [[1,x`],[y`,1]]*[x,y]->[new_x,new_y]
transformed_bboxes = tf.reshape(
tf.einsum("ij,kj->ki", matrix, new_bboxes), (-1, 8)
)
return transformed_bboxes
@staticmethod
def _convert_to_extended_corners_format(bounding_boxes):
"""splits corner bboxes top left,bottom right to 4 corners top left,
bottom right,top right and bottom left"""
x1, y1, x2, y2, rest = tf.split(
bounding_boxes, [1, 1, 1, 1, bounding_boxes.shape[-1] - 4], axis=-1
)
new_bboxes = tf.concat(
[x1, y1, x2, y2, x2, y1, x1, y2],
axis=-1,
)
return new_bboxes, rest
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv import bounding_box
from keras_cv.layers import preprocessing
classes = 10
class RandomShearTest(tf.test.TestCase):
def test_aggressive_shear_fills_at_least_some_pixels(self):
img_shape = (50, 50, 3)
xs = tf.stack(
[2 * tf.ones(img_shape), tf.ones(img_shape)],
axis=0,
)
xs = tf.cast(xs, tf.float32)
fill_value = 0.0
layer = preprocessing.RandomShear(
x_factor=(3, 3), seed=0, fill_mode="constant", fill_value=fill_value
)
xs = layer(xs)
# Some pixels should be replaced with fill value
self.assertTrue(tf.math.reduce_any(xs[0] == fill_value))
self.assertTrue(tf.math.reduce_any(xs[0] == 2.0))
self.assertTrue(tf.math.reduce_any(xs[1] == fill_value))
self.assertTrue(tf.math.reduce_any(xs[1] == 1.0))
def test_return_shapes(self):
"""test return dict keys and value pairs"""
xs = tf.ones((2, 512, 512, 3))
# randomly sample labels
ys_labels = tf.random.categorical(tf.math.log([[0.5, 0.5]]), 2)
ys_labels = tf.squeeze(ys_labels)
ys_labels = tf.one_hot(ys_labels, classes)
# randomly sample bounding boxes
ys_bounding_boxes = tf.random.uniform((2, 3, 7), 0, 1)
layer = preprocessing.RandomShear(
x_factor=(0.1, 0.3),
y_factor=(0.1, 0.3),
seed=0,
fill_mode="constant",
bounding_box_format="rel_xyxy",
)
# mixup on labels
outputs = layer(
{"images": xs, "labels": ys_labels, "bounding_boxes": ys_bounding_boxes}
)
xs, ys_labels, ys_bounding_boxes = (
outputs["images"],
outputs["labels"],
outputs["bounding_boxes"],
)
self.assertEqual(xs.shape, [2, 512, 512, 3])
self.assertEqual(ys_labels.shape, [2, 10])
self.assertEqual(ys_bounding_boxes.shape, [2, 3, 7])
def test_single_image_input(self):
"""test for single image input"""
xs = tf.ones((512, 512, 3))
ys = tf.ones(shape=(5, 5))
inputs = {"images": xs, "bounding_boxes": ys}
layer = preprocessing.RandomShear(
x_factor=(3, 3),
seed=0,
fill_mode="constant",
bounding_box_format="rel_xyxy",
)
outputs = layer(inputs)
xs, ys_bounding_boxes = (
outputs["images"],
outputs["bounding_boxes"],
)
self.assertEqual(xs.shape, [512, 512, 3])
self.assertEqual(ys_bounding_boxes.shape, [5, 5])
def test_area(self):
"""test for shear bbox transformation since new bbox will be
greater than old bbox"""
xs = tf.ones((512, 512, 3))
ys = tf.constant([[0.3, 0.4, 0.5, 0.6, 2], [0.9, 0.8, 1.0, 1.0, 3]])
inputs = {"images": xs, "bounding_boxes": ys}
layer = preprocessing.RandomShear(
x_factor=(0.3, 0.7),
y_factor=(0.4, 0.7),
seed=0,
fill_mode="constant",
bounding_box_format="rel_xyxy",
)
outputs = layer(inputs)
xs, ys_bounding_boxes = (
outputs["images"],
outputs["bounding_boxes"],
)
new_area = tf.math.multiply(
tf.abs(tf.subtract(ys_bounding_boxes[..., 2], ys_bounding_boxes[..., 0])),
tf.abs(tf.subtract(ys_bounding_boxes[..., 3], ys_bounding_boxes[..., 1])),
)
old_area = tf.math.multiply(
tf.abs(tf.subtract(ys[..., 2], ys[..., 0])),
tf.abs(tf.subtract(ys[..., 3], ys[..., 1])),
)
tf.debugging.assert_greater_equal(new_area, old_area)
def test_in_tf_function(self):
"""test for class works with tf function"""
xs = tf.cast(
tf.stack(
[2 * tf.ones((4, 4, 3)), tf.ones((4, 4, 3))],
axis=0,
),
tf.float32,
)
ys = tf.cast(
tf.stack(
[
tf.constant([[0.3, 0.4, 0.5, 0.6], [0.9, 0.8, 1.0, 1.0]]),
tf.constant([[0.3, 0.4, 0.5, 0.6], [0.9, 0.8, 1.0, 1.0]]),
],
axis=0,
),
tf.float32,
)
ys = bounding_box.add_class_id(ys)
layer = preprocessing.RandomShear(
x_factor=0.2, y_factor=0.2, bounding_box_format="rel_xyxy"
)
@tf.function
def augment(x, y):
return layer({"images": x, "bounding_boxes": y})
outputs = augment(xs, ys)
xs, ys = outputs["images"], outputs["bounding_boxes"]
# None of the individual values should still be close to 1 or 0
self.assertNotAllClose(xs, 1.0)
self.assertNotAllClose(xs, 2.0)
# No labels should still be close to their originals
self.assertNotAllClose(ys, 1.0)
self.assertNotAllClose(ys, 0.0)
def test_no_augmentation(self):
"""test for no image and bbox augmenation when x_factor,y_factor is 0,0"""
xs = tf.cast(
tf.stack(
[2 * tf.ones((4, 4, 3)), tf.ones((4, 4, 3))],
axis=0,
),
tf.float32,
)
ys = tf.cast(
tf.stack(
[
tf.constant([[0.3, 0.4, 0.5, 0.6], [0.9, 0.8, 1.0, 1.0]]),
tf.constant([[0.3, 0.4, 0.5, 0.6], [0.9, 0.8, 1.0, 1.0]]),
],
axis=0,
),
tf.float32,
)
ys = bounding_box.add_class_id(ys)
layer = preprocessing.RandomShear(
x_factor=0, y_factor=0, bounding_box_format="rel_xyxy"
)
outputs = layer({"images": xs, "bounding_boxes": ys})
output_xs, output_ys = outputs["images"], outputs["bounding_boxes"]
self.assertAllEqual(xs, output_xs)
self.assertAllEqual(ys, output_ys)
def test_bounding_box_x_augmentation(self):
xs = tf.cast(
tf.stack(
[2 * tf.ones((4, 4, 3)), tf.ones((4, 4, 3))],
axis=0,
),
tf.float32,
)
ys = tf.cast(
tf.stack(
[
tf.constant([[0.3, 0.4, 0.5, 0.6], [0.4, 0.8, 1.0, 1.0]]),
tf.constant([[0.3, 0.4, 0.5, 0.6], [0.4, 0.8, 1.0, 1.0]]),
],
axis=0,
),
tf.float32,
)
ys = bounding_box.add_class_id(ys)
layer = preprocessing.RandomShear(
x_factor=0.5, y_factor=0, bounding_box_format="rel_xyxy"
)
outputs = layer({"images": xs, "bounding_boxes": ys})
_, output_ys = outputs["images"], outputs["bounding_boxes"]
# assert ys are unchanged
self.assertAllEqual(ys[..., 1], output_ys[..., 1])
self.assertAllEqual(ys[..., 3], output_ys[..., 3])
# assert xs are changed
self.assertNotAllClose(ys[..., 0], output_ys[..., 0])
self.assertNotAllClose(ys[..., 2], output_ys[..., 2])
def test_bounding_box_y_augmentation(self):
xs = tf.cast(
tf.stack(
[2 * tf.ones((4, 4, 3)), tf.ones((4, 4, 3))],
axis=0,
),
tf.float32,
)
ys = tf.cast(
tf.stack(
[
tf.constant([[0.3, 0.4, 0.5, 0.6], [0.9, 0.2, 1.0, 1.0]]),
tf.constant([[0.3, 0.4, 0.5, 0.6], [0.9, 0.2, 1.0, 1.0]]),
],
axis=0,
),
tf.float32,
)
ys = bounding_box.add_class_id(ys)
layer = preprocessing.RandomShear(
x_factor=0, y_factor=0.5, bounding_box_format="rel_xyxy"
)
outputs = layer({"images": xs, "bounding_boxes": ys})
_, output_ys = outputs["images"], outputs["bounding_boxes"]
self.assertAllEqual(ys[..., 0], output_ys[..., 0])
self.assertNotAllClose(ys[..., 1], output_ys[..., 1])
self.assertAllEqual(ys[..., 2], output_ys[..., 2])
self.assertNotAllClose(ys[..., 3], output_ys[..., 3])
def test_rel_xyxy(self):
"""test for shear bbox augmentation for relative xyxy bbox input"""
xs = tf.cast(
tf.stack(
[2 * tf.ones((4, 4, 3)), tf.ones((4, 4, 3))],
axis=0,
),
tf.float32,
)
ys = tf.cast(
tf.stack(
[
tf.constant([[0.3, 0.4, 0.5, 0.6], [0.9, 0.8, 1.0, 1.0]]),
tf.constant([[0.3, 0.4, 0.5, 0.6], [0.9, 0.8, 1.0, 1.0]]),
],
axis=0,
),
tf.float32,
)
ys = bounding_box.add_class_id(ys)
layer = preprocessing.RandomShear(
x_factor=0, y_factor=0, bounding_box_format="rel_xyxy"
)
outputs = layer({"images": xs, "bounding_boxes": ys})
_, output_ys = outputs["images"], outputs["bounding_boxes"]
self.assertAllEqual(ys, output_ys)
def test_xyxy(self):
"""test for shear bbox augmentation for xyxy format"""
xs = tf.cast(
tf.stack(
[2 * tf.ones((100, 100, 3)), tf.ones((100, 100, 3))],
axis=0,
),
tf.float32,
)
ys = tf.cast(
tf.stack(
[
tf.constant([[10.0, 20.0, 40.0, 50.0], [12.0, 22.0, 42.0, 54.0]]),
tf.constant([[10.0, 20.0, 40.0, 50.0], [12.0, 22.0, 42.0, 54.0]]),
],
axis=0,
),
tf.float32,
)
ys = bounding_box.add_class_id(ys)
layer = preprocessing.RandomShear(
x_factor=0, y_factor=0, bounding_box_format="xyxy"
)
outputs = layer({"images": xs, "bounding_boxes": ys})
_, output_ys = outputs["images"], outputs["bounding_boxes"]
self.assertAllClose(ys, output_ys)
def test_clip_bounding_box(self):
"""test for bbox clipping to image width and height"""
xs = tf.cast(
tf.stack(
[2 * tf.ones((4, 4, 3)), tf.ones((4, 4, 3))],
axis=0,
),
tf.float32,
)
ys = tf.cast(
tf.stack(
[
tf.constant([[0.0, 0.0, 40.0, 50.0], [0.0, 0.0, 42.0, 54.0]]),
tf.constant([[0.0, 0.0, 40.0, 50.0], [0.0, 0.0, 42.0, 54.0]]),
],
axis=0,
),
tf.float32,
)
ys = bounding_box.add_class_id(ys)
ground_truth = tf.cast(
tf.stack(
[
tf.constant([[0, 0, 4, 4, 0], [0, 0, 4, 4, 0]]),
tf.constant([[0, 0, 4, 4, 0], [0, 0, 4, 4, 0]]),
],
axis=0,
),
tf.float32,
)
layer = preprocessing.RandomShear(
x_factor=0, y_factor=0, bounding_box_format="xyxy"
)
outputs = layer({"images": xs, "bounding_boxes": ys})
_, output_ys = outputs["images"], outputs["bounding_boxes"]
self.assertAllEqual(ground_truth, output_ys)
def test_dtype(self):
"""test for output dtype is returned as standardize dtype"""
xs = tf.cast(
tf.stack(
[2 * tf.ones((4, 4, 3)), tf.ones((4, 4, 3))],
axis=0,
),
tf.float32,
)
ys = tf.cast(
tf.stack(
[
tf.constant([[10.0, 20.0, 40.0, 50.0], [12.0, 22.0, 42.0, 54.0]]),
tf.constant([[10.0, 20.0, 40.0, 50.0], [12.0, 22.0, 42.0, 54.0]]),
],
axis=0,
),
tf.float32,
)
ys = bounding_box.add_class_id(ys)
layer = preprocessing.RandomShear(
x_factor=0, y_factor=0, bounding_box_format="xyxy"
)
outputs = layer({"images": xs, "bounding_boxes": ys})
_, output_ys = outputs["images"], outputs["bounding_boxes"]
self.assertEqual(layer.compute_dtype, output_ys.dtype)
def test_output_values(self):
"""test to verify augmented bounding box output coordinate"""
xs = tf.cast(
tf.stack(
[2 * tf.ones((100, 100, 3)), tf.zeros((100, 100, 3))],
axis=0,
),
tf.float32,
)
ys = tf.cast(
tf.stack(
[
tf.constant([[10.0, 20.0, 40.0, 50.0], [12.0, 22.0, 42.0, 54.0]]),
tf.constant([[10.0, 20.0, 40.0, 50.0], [12.0, 22.0, 42.0, 54.0]]),
],
axis=0,
),
tf.float32,
)
ys = bounding_box.add_class_id(ys)
true_ys = tf.cast(
tf.stack(
[
tf.constant(
[
[7.60, 20.43, 39.04, 51.79, 0.0],
[9.41, 22.52, 40.94, 55.88, 0.0],
]
),
tf.constant(
[
[13.68, 22.51, 49.20, 59.05, 0],
[16.04, 24.95, 51.940, 63.56, 0],
]
),
],
axis=0,
),
tf.float32,
)
layer = preprocessing.RandomShear(
x_factor=0.2, y_factor=0.2, bounding_box_format="xyxy", seed=1
)
outputs = layer({"images": xs, "bounding_boxes": ys})
_, output_ys = outputs["images"], outputs["bounding_boxes"]
self.assertAllClose(true_ys, output_ys, rtol=1e-02, atol=1e-03)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv import core
from keras_cv.layers.preprocessing.base_image_augmentation_layer import (
BaseImageAugmentationLayer,
)
from keras_cv.utils import preprocessing
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class RandomlyZoomedCrop(BaseImageAugmentationLayer):
"""Randomly crops a part of an image and zooms it by a provided amount size.
This implementation takes a distortion-oriented approach, which means the
amount of distortion in the image is proportional to the `zoom_factor`
argument. To do this, we first sample a random value for `zoom_factor` and
`aspect_ratio_factor`. Further we deduce a `crop_size` which abides by the
calculated aspect ratio. Finally we do the actual cropping operation and
resize the image to `(height, width)`.
Args:
height: The height of the output shape.
width: The width of the output shape.
zoom_factor: A tuple of two floats, ConstantFactorSampler or
UniformFactorSampler. Represents the area relative to the original image
of the cropped image before resizing it to `(height, width)`.
aspect_ratio_factor: A tuple of two floats, ConstantFactorSampler or
UniformFactorSampler. Aspect ratio means the ratio of width to
height of the cropped image. In the context of this layer, the aspect ratio
sampled represents a value to distort the aspect ratio by.
Represents the lower and upper bound for the aspect ratio of the
cropped image before resizing it to `(height, width)`. For most tasks, this
should be `(3/4, 4/3)`. To perform a no-op provide the value `(1.0, 1.0)`.
interpolation: (Optional) A string specifying the sampling method for
resizing. Defaults to "bilinear".
seed: (Optional) Used to create a random seed. Defaults to None.
"""
def __init__(
self,
height,
width,
zoom_factor,
aspect_ratio_factor,
interpolation="bilinear",
seed=None,
**kwargs,
):
super().__init__(seed=seed, **kwargs)
self.height = height
self.width = width
self.aspect_ratio_factor = preprocessing.parse_factor(
aspect_ratio_factor,
min_value=0.0,
max_value=None,
param_name="aspect_ratio_factor",
seed=seed,
)
self.zoom_factor = preprocessing.parse_factor(
zoom_factor,
min_value=0.0,
max_value=None,
param_name="zoom_factor",
seed=seed,
)
self._check_class_arguments(height, width, zoom_factor, aspect_ratio_factor)
self.interpolation = interpolation
self.seed = seed
def get_random_transformation(
self, image=None, label=None, bounding_box=None, **kwargs
):
zoom_factor = self.zoom_factor()
aspect_ratio = self.aspect_ratio_factor()
original_height = tf.cast(tf.shape(image)[-3], tf.float32)
original_width = tf.cast(tf.shape(image)[-2], tf.float32)
crop_size = (
tf.round(self.height / zoom_factor),
tf.round(self.width / zoom_factor),
)
new_height = crop_size[0] / tf.sqrt(aspect_ratio)
new_width = crop_size[1] * tf.sqrt(aspect_ratio)
height_offset = self._random_generator.random_uniform(
(),
minval=tf.minimum(0.0, original_height - new_height),
maxval=tf.maximum(0.0, original_height - new_height),
dtype=tf.float32,
)
width_offset = self._random_generator.random_uniform(
(),
minval=tf.minimum(0.0, original_width - new_width),
maxval=tf.maximum(0.0, original_width - new_width),
dtype=tf.float32,
)
new_height = new_height / original_height
new_width = new_width / original_width
height_offset = height_offset / original_height
width_offset = width_offset / original_width
return (new_height, new_width, height_offset, width_offset)
def call(self, inputs, training=True):
if training:
return super().call(inputs, training)
else:
inputs = self._ensure_inputs_are_compute_dtype(inputs)
inputs, meta_data = self._format_inputs(inputs)
output = inputs
# self._resize() returns valid results for both batched and
# unbatched
output["images"] = self._resize(inputs["images"])
return self._format_output(output, meta_data)
def augment_image(self, image, transformation, **kwargs):
image_shape = tf.shape(image)
height = tf.cast(image_shape[-3], tf.float32)
width = tf.cast(image_shape[-2], tf.float32)
image = tf.expand_dims(image, axis=0)
new_height, new_width, height_offset, width_offset = transformation
transform = RandomlyZoomedCrop._format_transform(
[
new_width,
0.0,
width_offset * width,
0.0,
new_height,
height_offset * height,
0.0,
0.0,
]
)
image = preprocessing.transform(
images=image,
transforms=transform,
output_shape=(self.height, self.width),
interpolation=self.interpolation,
fill_mode="reflect",
)
return tf.squeeze(image, axis=0)
@staticmethod
def _format_transform(transform):
transform = tf.convert_to_tensor(transform, dtype=tf.float32)
return transform[tf.newaxis]
def _resize(self, image):
outputs = tf.keras.preprocessing.image.smart_resize(
image, (self.height, self.width)
)
# smart_resize will always output float32, so we need to re-cast.
return tf.cast(outputs, self.compute_dtype)
def _check_class_arguments(self, height, width, zoom_factor, aspect_ratio_factor):
if not isinstance(height, int):
raise ValueError("`height` must be an integer. Received height={height}")
if not isinstance(width, int):
raise ValueError("`width` must be an integer. Received width={width}")
if (
not isinstance(zoom_factor, (tuple, list, core.FactorSampler))
or isinstance(zoom_factor, float)
or isinstance(zoom_factor, int)
):
raise ValueError(
"`zoom_factor` must be tuple of two positive floats"
" or keras_cv.core.FactorSampler instance. Received "
f"zoom_factor={zoom_factor}"
)
if (
not isinstance(aspect_ratio_factor, (tuple, list, core.FactorSampler))
or isinstance(aspect_ratio_factor, float)
or isinstance(aspect_ratio_factor, int)
):
raise ValueError(
"`aspect_ratio_factor` must be tuple of two positive floats or "
"keras_cv.core.FactorSampler instance. Received "
f"aspect_ratio_factor={aspect_ratio_factor}"
)
def augment_target(self, augment_target, **kwargs):
return augment_target
def get_config(self):
config = super().get_config()
config.update(
{
"height": self.height,
"width": self.width,
"zoom_factor": self.zoom_factor,
"aspect_ratio_factor": self.aspect_ratio_factor,
"interpolation": self.interpolation,
"seed": self.seed,
}
)
return config
def _crop_and_resize(self, image, transformation, method=None):
image = tf.expand_dims(image, axis=0)
boxes = transformation
# See bit.ly/tf_crop_resize for more details
augmented_image = tf.image.crop_and_resize(
image, # image shape: [B, H, W, C]
boxes, # boxes: (1, 4) in this case; represents area
# to be cropped from the original image
[0], # box_indices: maps boxes to images along batch axis
# [0] since there is only one image
(self.height, self.width), # output size
method=method or self.interpolation,
)
return tf.squeeze(augmented_image, axis=0)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from absl.testing import parameterized
from keras_cv.layers import preprocessing
class RandomlyZoomedCropTest(tf.test.TestCase, parameterized.TestCase):
height, width = 300, 300
batch_size = 4
target_size = (224, 224)
seed = 42
def test_train_augments_image(self):
# Checks if original and augmented images are different
input_image_shape = (self.batch_size, self.height, self.width, 3)
image = tf.random.uniform(shape=input_image_shape, seed=self.seed)
layer = preprocessing.RandomlyZoomedCrop(
height=self.target_size[0],
width=self.target_size[1],
aspect_ratio_factor=(3 / 4, 4 / 3),
zoom_factor=(0.8, 1.0),
seed=self.seed,
)
output = layer(image, training=True)
input_image_resized = tf.image.resize(image, self.target_size)
self.assertNotAllClose(output, input_image_resized)
def test_grayscale(self):
input_image_shape = (self.batch_size, self.height, self.width, 1)
image = tf.random.uniform(shape=input_image_shape)
layer = preprocessing.RandomlyZoomedCrop(
height=self.target_size[0],
width=self.target_size[1],
aspect_ratio_factor=(3 / 4, 4 / 3),
zoom_factor=(0.8, 1.0),
)
output = layer(image, training=True)
input_image_resized = tf.image.resize(image, self.target_size)
self.assertAllEqual(output.shape, (4, 224, 224, 1))
self.assertNotAllClose(output, input_image_resized)
def test_preserves_image(self):
image_shape = (self.batch_size, self.height, self.width, 3)
image = tf.random.uniform(shape=image_shape)
layer = preprocessing.RandomlyZoomedCrop(
height=self.target_size[0],
width=self.target_size[1],
aspect_ratio_factor=(3 / 4, 4 / 3),
zoom_factor=(0.8, 1.0),
)
input_resized = tf.image.resize(image, self.target_size)
output = layer(image, training=False)
self.assertAllClose(output, input_resized)
@parameterized.named_parameters(
("Not tuple or list", dict()),
("Length not equal to 2", [1, 2, 3]),
("Members not int", (2.3, 4.5)),
("Single float", 1.5),
)
def test_height_errors(self, height):
with self.assertRaisesRegex(
ValueError,
"`height` must be an integer. Received height=(.*)",
):
_ = preprocessing.RandomlyZoomedCrop(
height=height,
width=100,
aspect_ratio_factor=(3 / 4, 4 / 3),
zoom_factor=(0.8, 1.0),
)
@parameterized.named_parameters(
("Not tuple or list", dict()),
("Length not equal to 2", [1, 2, 3]),
("Members not int", (2.3, 4.5)),
("Single float", 1.5),
)
def test_width_errors(self, width):
with self.assertRaisesRegex(
ValueError,
"`width` must be an integer. Received width=(.*)",
):
_ = preprocessing.RandomlyZoomedCrop(
height=100,
width=width,
aspect_ratio_factor=(3 / 4, 4 / 3),
zoom_factor=(0.8, 1.0),
)
@parameterized.named_parameters(
("Single integer", 5),
("Single float", 5.0),
)
def test_aspect_ratio_factor_errors(self, aspect_ratio_factor):
with self.assertRaisesRegex(
ValueError,
"`aspect_ratio_factor` must be tuple of two positive floats or "
"keras_cv.core.FactorSampler instance. Received aspect_ratio_factor=(.*)",
):
_ = preprocessing.RandomlyZoomedCrop(
height=self.target_size[0],
width=self.target_size[1],
aspect_ratio_factor=aspect_ratio_factor,
zoom_factor=(0.8, 1.0),
)
@parameterized.named_parameters(
("Single integer", 5),
("Single float", 5.0),
)
def test_zoom_factor_errors(self, zoom_factor):
with self.assertRaisesRegex(
ValueError,
"`zoom_factor` must be tuple of two positive floats"
" or keras_cv.core.FactorSampler instance. Received "
"zoom_factor=(.*)",
):
_ = preprocessing.RandomlyZoomedCrop(
height=self.target_size[0],
width=self.target_size[1],
aspect_ratio_factor=(3 / 4, 4 / 3),
zoom_factor=zoom_factor,
)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.layers.preprocessing.base_image_augmentation_layer import (
BaseImageAugmentationLayer,
)
from keras_cv.utils import preprocessing
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class Solarization(BaseImageAugmentationLayer):
"""Applies (max_value - pixel + min_value) for each pixel in the image.
When created without `threshold` parameter, the layer performs solarization to
all values. When created with specified `threshold` the layer only augments
pixels that are above the `threshold` value
Reference:
- [AutoAugment: Learning Augmentation Policies from Data](
https://arxiv.org/abs/1805.09501
)
- [RandAugment](https://arxiv.org/pdf/1909.13719.pdf)
Args:
value_range: a tuple or a list of two elements. The first value represents
the lower bound for values in passed images, the second represents the
upper bound. Images passed to the layer should have values within
`value_range`.
addition_factor: (Optional) A tuple of two floats, a single float or a
`keras_cv.FactorSampler`. For each augmented image a value is sampled
from the provided range. If a float is passed, the range is interpreted as
`(0, addition_factor)`. If specified, this value is added to each pixel
before solarization and thresholding. The addition value should be scaled
according to the value range (0, 255). Defaults to 0.0.
threshold_factor: (Optional) A tuple of two floats, a single float or a
`keras_cv.FactorSampler`. For each augmented image a value is sampled
from the provided range. If a float is passed, the range is interpreted as
`(0, threshold_factor)`. If specified, only pixel values above this
threshold will be solarized.
seed: Integer. Used to create a random seed.
Usage:
```python
(images, labels), _ = tf.keras.datasets.cifar10.load_data()
print(images[0, 0, 0])
# [59 62 63]
# Note that images are Tensor with values in the range [0, 255]
solarization = Solarization()
images = solarization(images)
print(images[0, 0, 0])
# [196, 193, 192]
```
Call arguments:
images: Tensor of type int or float, with pixels in
range [0, 255] and shape [batch, height, width, channels]
or [height, width, channels].
"""
def __init__(
self,
value_range,
addition_factor=0.0,
threshold_factor=0.0,
seed=None,
**kwargs
):
super().__init__(seed=seed, **kwargs)
self.seed = seed
self.addition_factor = preprocessing.parse_factor(
addition_factor, max_value=255, seed=seed, param_name="addition_factor"
)
self.threshold_factor = preprocessing.parse_factor(
threshold_factor, max_value=255, seed=seed, param_name="threshold_factor"
)
self.value_range = value_range
def get_random_transformation(self, **kwargs):
return (self.addition_factor(), self.threshold_factor())
def augment_image(self, image, transformation=None, **kwargs):
(addition, threshold) = transformation
image = preprocessing.transform_value_range(
image, original_range=self.value_range, target_range=(0, 255)
)
result = image + addition
result = tf.clip_by_value(result, 0, 255)
result = tf.where(result < threshold, result, 255 - result)
result = preprocessing.transform_value_range(
result, original_range=(0, 255), target_range=self.value_range
)
return result
def augment_bounding_boxes(self, bounding_boxes, **kwargs):
return bounding_boxes
def augment_label(self, label, transformation=None, **kwargs):
return label
def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs):
return segmentation_mask
def get_config(self):
config = {
"threshold_factor": self.threshold_factor,
"addition_factor": self.addition_factor,
"value_range": self.value_range,
"seed": self.seed,
}
base_config = super().get_config()
return dict(list(base_config.items()) + list(config.items()))
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from absl.testing import parameterized
from keras_cv.layers.preprocessing.solarization import Solarization
class SolarizationTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.named_parameters(
("0_255", 0, 255),
("64_191", 64, 191),
("127_128", 127, 128),
("191_64", 191, 64),
("255_0", 255, 0),
)
def test_output_values(self, input_value, expected_value):
solarization = Solarization(value_range=(0, 255))
self._test_input_output(
layer=solarization,
input_value=input_value,
expected_value=expected_value,
dtype=tf.uint8,
)
@parameterized.named_parameters(
("0_245", 0, 245),
("255_0", 255, 0),
)
def test_solarization_with_addition(self, input_value, output_value):
solarization = Solarization(addition_factor=(10.0, 10.0), value_range=(0, 255))
self._test_input_output(
layer=solarization,
input_value=input_value,
expected_value=output_value,
dtype=tf.float32,
)
@parameterized.named_parameters(
("0_0", 0, 0),
("64_64", 64, 64),
("127_127", 127, 127),
("191_64", 191, 64),
("255_0", 255, 0),
)
def test_only_values_above_threshold_are_solarized(self, input_value, output_value):
solarization = Solarization(threshold_factor=(128, 128), value_range=(0, 255))
self._test_input_output(
layer=solarization,
input_value=input_value,
expected_value=output_value,
dtype=tf.uint8,
)
def _test_input_output(self, layer, input_value, expected_value, dtype):
input = tf.ones(shape=(2, 224, 224, 3), dtype=dtype) * input_value
expected_output = tf.clip_by_value(
(
tf.ones(shape=(2, 224, 224, 3), dtype=layer.compute_dtype)
* expected_value
),
0,
255,
)
output = layer(input)
self.assertAllClose(output, expected_output)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from absl.testing import parameterized
from keras_cv.layers import preprocessing
TEST_CONFIGURATIONS = [
("AutoContrast", preprocessing.AutoContrast, {"value_range": (0, 255)}),
("ChannelShuffle", preprocessing.ChannelShuffle, {}),
("Equalization", preprocessing.Equalization, {"value_range": (0, 255)}),
(
"RandomCropAndResize",
preprocessing.RandomCropAndResize,
{
"target_size": (224, 224),
"crop_area_factor": (0.8, 1.0),
"aspect_ratio_factor": (3 / 4, 4 / 3),
},
),
(
"RandomlyZoomedCrop",
preprocessing.RandomlyZoomedCrop,
{
"height": 224,
"width": 224,
"zoom_factor": (0.8, 1.0),
"aspect_ratio_factor": (3 / 4, 4 / 3),
},
),
("Grayscale", preprocessing.Grayscale, {}),
("GridMask", preprocessing.GridMask, {}),
(
"Posterization",
preprocessing.Posterization,
{"bits": 3, "value_range": (0, 255)},
),
(
"RandomColorDegeneration",
preprocessing.RandomColorDegeneration,
{"factor": 0.5},
),
(
"RandomCutout",
preprocessing.RandomCutout,
{"height_factor": 0.2, "width_factor": 0.2},
),
(
"RandomHue",
preprocessing.RandomHue,
{"factor": 0.5, "value_range": (0, 255)},
),
(
"RandomChannelShift",
preprocessing.RandomChannelShift,
{"value_range": (0, 255), "factor": 0.5},
),
(
"RandomColorJitter",
preprocessing.RandomColorJitter,
{
"value_range": (0, 255),
"brightness_factor": (-0.2, 0.5),
"contrast_factor": (0.5, 0.9),
"saturation_factor": (0.5, 0.9),
"hue_factor": (0.5, 0.9),
"seed": 1,
},
),
(
"RandomGaussianBlur",
preprocessing.RandomGaussianBlur,
{"kernel_size": 3, "factor": (0.0, 3.0)},
),
("RandomJpegQuality", preprocessing.RandomJpegQuality, {"factor": (75, 100)}),
("RandomSaturation", preprocessing.RandomSaturation, {"factor": 0.5}),
(
"RandomSharpness",
preprocessing.RandomSharpness,
{"factor": 0.5, "value_range": (0, 255)},
),
("RandomShear", preprocessing.RandomShear, {"x_factor": 0.3, "x_factor": 0.3}),
("Solarization", preprocessing.Solarization, {"value_range": (0, 255)}),
]
class WithLabelsTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.named_parameters(
*TEST_CONFIGURATIONS,
("CutMix", preprocessing.CutMix, {}),
("Mosaic", preprocessing.Mosaic, {}),
)
def test_can_run_with_labels(self, layer_cls, init_args):
layer = layer_cls(**init_args)
img = tf.random.uniform(
shape=(3, 512, 512, 3), minval=0, maxval=1, dtype=tf.float32
)
labels = tf.ones((3,), dtype=tf.float32)
inputs = {"images": img, "labels": labels}
outputs = layer(inputs)
self.assertIn("labels", outputs)
# this has to be a separate test case to exclude CutMix, MixUp, Mosaic etc.
@parameterized.named_parameters(*TEST_CONFIGURATIONS)
def test_can_run_with_labels_single_image(self, layer_cls, init_args):
layer = layer_cls(**init_args)
img = tf.random.uniform(
shape=(512, 512, 3), minval=0, maxval=1, dtype=tf.float32
)
labels = tf.ones((), dtype=tf.float32)
inputs = {"images": img, "labels": labels}
outputs = layer(inputs)
self.assertIn("labels", outputs)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from absl.testing import parameterized
from keras_cv.layers import preprocessing
TEST_CONFIGURATIONS = [
("AutoContrast", preprocessing.AutoContrast, {"value_range": (0, 255)}),
("ChannelShuffle", preprocessing.ChannelShuffle, {}),
("Equalization", preprocessing.Equalization, {"value_range": (0, 255)}),
("Grayscale", preprocessing.Grayscale, {}),
("GridMask", preprocessing.GridMask, {}),
(
"Posterization",
preprocessing.Posterization,
{"bits": 3, "value_range": (0, 255)},
),
(
"RandomColorDegeneration",
preprocessing.RandomColorDegeneration,
{"factor": 0.5},
),
(
"RandomHue",
preprocessing.RandomHue,
{"factor": 0.5, "value_range": (0, 255)},
),
(
"RandomChannelShift",
preprocessing.RandomChannelShift,
{"value_range": (0, 255), "factor": 0.5},
),
(
"RandomColorJitter",
preprocessing.RandomColorJitter,
{
"value_range": (0, 255),
"brightness_factor": (-0.2, 0.5),
"contrast_factor": (0.5, 0.9),
"saturation_factor": (0.5, 0.9),
"hue_factor": (0.5, 0.9),
"seed": 1,
},
),
(
"RandomGaussianBlur",
preprocessing.RandomGaussianBlur,
{"kernel_size": 3, "factor": (0.0, 3.0)},
),
("RandomJpegQuality", preprocessing.RandomJpegQuality, {"factor": (75, 100)}),
("RandomSaturation", preprocessing.RandomSaturation, {"factor": 0.5}),
(
"RandomSharpness",
preprocessing.RandomSharpness,
{"factor": 0.5, "value_range": (0, 255)},
),
("Solarization", preprocessing.Solarization, {"value_range": (0, 255)}),
]
class WithSegmentationMasksTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.named_parameters(*TEST_CONFIGURATIONS)
def test_can_run_with_segmentation_masks(self, layer_cls, init_args):
classes = 10
layer = layer_cls(**init_args)
img = tf.random.uniform(
shape=(3, 512, 512, 3), minval=0, maxval=1, dtype=tf.float32
)
segmentation_masks = tf.random.uniform(
shape=(3, 512, 512, 1), minval=0, maxval=classes, dtype=tf.int32
)
inputs = {"images": img, "segmentation_masks": segmentation_masks}
outputs = layer(inputs)
self.assertIn("segmentation_masks", outputs)
# This currently asserts that all layers are no-ops.
# When preprocessing layers are updated to mutate segmentation masks,
# this condition should only be asserted for no-op layers.
self.assertAllClose(inputs["segmentation_masks"], outputs["segmentation_masks"])
# This has to be a separate test case to exclude CutMix and MixUp
# (which are not yet supported for segmentation mask augmentation)
@parameterized.named_parameters(*TEST_CONFIGURATIONS)
def test_can_run_with_segmentation_mask_single_image(self, layer_cls, init_args):
classes = 10
layer = layer_cls(**init_args)
img = tf.random.uniform(
shape=(512, 512, 3), minval=0, maxval=1, dtype=tf.float32
)
segmentation_mask = tf.random.uniform(
shape=(512, 512, 1), minval=0, maxval=classes, dtype=tf.int32
)
inputs = {"images": img, "segmentation_masks": segmentation_mask}
outputs = layer(inputs)
self.assertIn("segmentation_masks", outputs)
# This currently asserts that all layers are no-ops.
# When preprocessing layers are updated to mutate segmentation masks,
# this condition should only be asserted for no-op layers.
self.assertAllClose(inputs["segmentation_masks"], outputs["segmentation_masks"])
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from keras_cv.layers.regularization.drop_path import DropPath
from keras_cv.layers.regularization.dropblock_2d import DropBlock2D
from keras_cv.layers.regularization.squeeze_excite import SqueezeAndExcite2D
from keras_cv.layers.regularization.stochastic_depth import StochasticDepth
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment