"sgl-router/py_test/vscode:/vscode.git/clone" did not exist on "5dccf697137620c74f36964191dfe2311b3ada6c"
Commit 0016b0a7 authored by sunxx1's avatar sunxx1
Browse files

Merge branch 'dtk22.04' into 'main'

Dtk22.04

See merge request dcutoolkit/deeplearing/dlexamples_new!49
parents 17bc28d5 7a382d5d
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import tensorflow as tf
from keras_cv.layers.preprocessing.base_image_augmentation_layer import (
BaseImageAugmentationLayer,
)
class RandomAddLayer(BaseImageAugmentationLayer):
def __init__(self, value_range=(0.0, 1.0), fixed_value=None, **kwargs):
super().__init__(**kwargs)
self.value_range = value_range
self.fixed_value = fixed_value
def get_random_transformation(self, **kwargs):
if self.fixed_value:
return self.fixed_value
return self._random_generator.random_uniform(
[], minval=self.value_range[0], maxval=self.value_range[1]
)
def augment_image(self, image, transformation, **kwargs):
return image + transformation
def augment_label(self, label, transformation, **kwargs):
return label + transformation
def augment_bounding_boxes(self, bounding_boxes, transformation, **kwargs):
return bounding_boxes + transformation
def augment_keypoints(self, keypoints, transformation, **kwargs):
return keypoints + transformation
def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs):
return segmentation_mask + transformation
class VectorizeDisabledLayer(BaseImageAugmentationLayer):
def __init__(self, **kwargs):
self.auto_vectorize = False
super().__init__(**kwargs)
class BaseImageAugmentationLayerTest(tf.test.TestCase):
def test_augment_single_image(self):
add_layer = RandomAddLayer(fixed_value=2.0)
image = np.random.random(size=(8, 8, 3)).astype("float32")
output = add_layer(image)
self.assertAllClose(image + 2.0, output)
def test_augment_dict_return_type(self):
add_layer = RandomAddLayer(fixed_value=2.0)
image = np.random.random(size=(8, 8, 3)).astype("float32")
output = add_layer({"images": image})
self.assertIsInstance(output, dict)
def test_auto_vectorize_disabled(self):
vectorize_disabled_layer = VectorizeDisabledLayer()
self.assertFalse(vectorize_disabled_layer.auto_vectorize)
self.assertEqual(vectorize_disabled_layer._map_fn, tf.map_fn)
def test_augment_casts_dtypes(self):
add_layer = RandomAddLayer(fixed_value=2.0)
images = tf.ones((2, 8, 8, 3), dtype="uint8")
output = add_layer(images)
self.assertAllClose(tf.ones((2, 8, 8, 3), dtype="float32") * 3.0, output)
def test_augment_batch_images(self):
add_layer = RandomAddLayer()
images = np.random.random(size=(2, 8, 8, 3)).astype("float32")
output = add_layer(images)
diff = output - images
# Make sure the first image and second image get different augmentation
self.assertNotAllClose(diff[0], diff[1])
def test_augment_image_and_label(self):
add_layer = RandomAddLayer(fixed_value=2.0)
image = np.random.random(size=(8, 8, 3)).astype("float32")
label = np.random.random(size=(1,)).astype("float32")
output = add_layer({"images": image, "labels": label})
expected_output = {"images": image + 2.0, "labels": label + 2.0}
self.assertAllClose(output, expected_output)
def test_augment_image_and_target(self):
add_layer = RandomAddLayer(fixed_value=2.0)
image = np.random.random(size=(8, 8, 3)).astype("float32")
label = np.random.random(size=(1,)).astype("float32")
output = add_layer({"images": image, "targets": label})
expected_output = {"images": image + 2.0, "targets": label + 2.0}
self.assertAllClose(output, expected_output)
def test_augment_batch_images_and_labels(self):
add_layer = RandomAddLayer()
images = np.random.random(size=(2, 8, 8, 3)).astype("float32")
labels = np.random.random(size=(2, 1)).astype("float32")
output = add_layer({"images": images, "labels": labels})
image_diff = output["images"] - images
label_diff = output["labels"] - labels
# Make sure the first image and second image get different augmentation
self.assertNotAllClose(image_diff[0], image_diff[1])
self.assertNotAllClose(label_diff[0], label_diff[1])
def test_augment_leaves_extra_dict_entries_unmodified(self):
add_layer = RandomAddLayer(fixed_value=0.5)
images = np.random.random(size=(8, 8, 3)).astype("float32")
filenames = tf.constant("/path/to/first.jpg")
inputs = {"images": images, "filenames": filenames}
outputs = add_layer(inputs)
self.assertListEqual(list(inputs.keys()), list(outputs.keys()))
self.assertAllEqual(inputs["filenames"], outputs["filenames"])
self.assertNotAllClose(inputs["images"], outputs["images"])
self.assertAllEqual(inputs["images"], images) # Assert original unchanged
def test_augment_leaves_batched_extra_dict_entries_unmodified(self):
add_layer = RandomAddLayer(fixed_value=0.5)
images = np.random.random(size=(2, 8, 8, 3)).astype("float32")
filenames = tf.constant(["/path/to/first.jpg", "/path/to/second.jpg"])
inputs = {"images": images, "filenames": filenames}
outputs = add_layer(inputs)
self.assertListEqual(list(inputs.keys()), list(outputs.keys()))
self.assertAllEqual(inputs["filenames"], outputs["filenames"])
self.assertNotAllClose(inputs["images"], outputs["images"])
self.assertAllEqual(inputs["images"], images) # Assert original unchanged
def test_augment_image_and_localization_data(self):
add_layer = RandomAddLayer(fixed_value=2.0)
images = np.random.random(size=(8, 8, 3)).astype("float32")
bounding_boxes = np.random.random(size=(3, 5)).astype("float32")
keypoints = np.random.random(size=(3, 5, 2)).astype("float32")
segmentation_mask = np.random.random(size=(8, 8, 1)).astype("float32")
output = add_layer(
{
"images": images,
"bounding_boxes": bounding_boxes,
"keypoints": keypoints,
"segmentation_masks": segmentation_mask,
}
)
expected_output = {
"images": images + 2.0,
"bounding_boxes": bounding_boxes + 2.0,
"keypoints": keypoints + 2.0,
"segmentation_masks": segmentation_mask + 2.0,
}
self.assertAllClose(output, expected_output)
def test_augment_batch_image_and_localization_data(self):
add_layer = RandomAddLayer()
images = np.random.random(size=(2, 8, 8, 3)).astype("float32")
bounding_boxes = np.random.random(size=(2, 3, 5)).astype("float32")
keypoints = np.random.random(size=(2, 3, 5, 2)).astype("float32")
segmentation_masks = np.random.random(size=(2, 8, 8, 1)).astype("float32")
output = add_layer(
{
"images": images,
"bounding_boxes": bounding_boxes,
"keypoints": keypoints,
"segmentation_masks": segmentation_masks,
}
)
bounding_boxes_diff = output["bounding_boxes"] - bounding_boxes
keypoints_diff = output["keypoints"] - keypoints
segmentation_mask_diff = output["segmentation_masks"] - segmentation_masks
self.assertNotAllClose(bounding_boxes_diff[0], bounding_boxes_diff[1])
self.assertNotAllClose(keypoints_diff[0], keypoints_diff[1])
self.assertNotAllClose(segmentation_mask_diff[0], segmentation_mask_diff[1])
@tf.function
def in_tf_function(inputs):
return add_layer(inputs)
output = in_tf_function(
{
"images": images,
"bounding_boxes": bounding_boxes,
"keypoints": keypoints,
"segmentation_masks": segmentation_masks,
}
)
bounding_boxes_diff = output["bounding_boxes"] - bounding_boxes
keypoints_diff = output["keypoints"] - keypoints
segmentation_mask_diff = output["segmentation_masks"] - segmentation_masks
self.assertNotAllClose(bounding_boxes_diff[0], bounding_boxes_diff[1])
self.assertNotAllClose(keypoints_diff[0], keypoints_diff[1])
self.assertNotAllClose(segmentation_mask_diff[0], segmentation_mask_diff[1])
def test_augment_all_data_in_tf_function(self):
add_layer = RandomAddLayer()
images = np.random.random(size=(2, 8, 8, 3)).astype("float32")
bounding_boxes = np.random.random(size=(2, 3, 5)).astype("float32")
keypoints = np.random.random(size=(2, 3, 5, 2)).astype("float32")
segmentation_masks = np.random.random(size=(2, 8, 8, 1)).astype("float32")
@tf.function
def in_tf_function(inputs):
return add_layer(inputs)
output = in_tf_function(
{
"images": images,
"bounding_boxes": bounding_boxes,
"keypoints": keypoints,
"segmentation_masks": segmentation_masks,
}
)
bounding_boxes_diff = output["bounding_boxes"] - bounding_boxes
keypoints_diff = output["keypoints"] - keypoints
segmentation_mask_diff = output["segmentation_masks"] - segmentation_masks
self.assertNotAllClose(bounding_boxes_diff[0], bounding_boxes_diff[1])
self.assertNotAllClose(keypoints_diff[0], keypoints_diff[1])
self.assertNotAllClose(segmentation_mask_diff[0], segmentation_mask_diff[1])
def test_raise_error_missing_class_id(self):
add_layer = RandomAddLayer()
images = np.random.random(size=(2, 8, 8, 3)).astype("float32")
bounding_boxes = np.random.random(size=(2, 3, 4)).astype("float32")
keypoints = np.random.random(size=(2, 3, 5, 2)).astype("float32")
segmentation_masks = np.random.random(size=(2, 8, 8, 1)).astype("float32")
with self.assertRaisesRegex(
ValueError,
"Bounding boxes are missing class_id. If you would like to pad the "
"bounding boxes with class_id, use `keras_cv.bounding_box.add_"
"class_id`",
):
add_layer(
{
"images": images,
"bounding_boxes": bounding_boxes,
"keypoints": keypoints,
"segmentation_masks": segmentation_masks,
}
)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.layers.preprocessing.base_image_augmentation_layer import (
BaseImageAugmentationLayer,
)
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class ChannelShuffle(BaseImageAugmentationLayer):
"""Shuffle channels of an input image.
Input shape:
The expected images should be [0-255] pixel ranges.
3D (unbatched) or 4D (batched) tensor with shape:
`(..., height, width, channels)`, in `"channels_last"` format
Output shape:
3D (unbatched) or 4D (batched) tensor with shape:
`(..., height, width, channels)`, in `"channels_last"` format
Args:
groups: Number of groups to divide the input channels. Default 3.
seed: Integer. Used to create a random seed.
Call arguments:
inputs: Tensor representing images of shape
`(batch_size, width, height, channels)`, with dtype tf.float32 / tf.uint8,
` or (width, height, channels)`, with dtype tf.float32 / tf.uint8
training: A boolean argument that determines whether the call should be run
in inference mode or training mode. Default: True.
Usage:
```python
(images, labels), _ = tf.keras.datasets.cifar10.load_data()
channel_shuffle = keras_cv.layers.ChannelShuffle()
augmented_images = channel_shuffle(images)
```
"""
def __init__(self, groups=3, seed=None, **kwargs):
super().__init__(seed=seed, **kwargs)
self.groups = groups
self.seed = seed
def augment_image(self, image, transformation=None, **kwargs):
shape = tf.shape(image)
height, width = shape[0], shape[1]
num_channels = image.shape[2]
if not num_channels % self.groups == 0:
raise ValueError(
"The number of input channels should be "
"divisible by the number of groups."
f"Received: channels={num_channels}, groups={self.groups}"
)
channels_per_group = num_channels // self.groups
image = tf.reshape(image, [height, width, self.groups, channels_per_group])
image = tf.transpose(image, perm=[2, 0, 1, 3])
image = tf.random.shuffle(image, seed=self.seed)
image = tf.transpose(image, perm=[1, 2, 3, 0])
image = tf.reshape(image, [height, width, num_channels])
return image
def augment_bounding_boxes(self, bounding_boxes, **kwargs):
return bounding_boxes
def augment_label(self, label, transformation=None, **kwargs):
return label
def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs):
return segmentation_mask
def get_config(self):
config = super().get_config()
config.update({"groups": self.groups, "seed": self.seed})
return config
def compute_output_shape(self, input_shape):
return input_shape
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.layers.preprocessing.channel_shuffle import ChannelShuffle
class ChannelShuffleTest(tf.test.TestCase):
def test_return_shapes(self):
xs = tf.ones((2, 512, 512, 3))
layer = ChannelShuffle(groups=3)
xs = layer(xs, training=True)
self.assertEqual(xs.shape, [2, 512, 512, 3])
def test_channel_shuffle_call_results_one_channel(self):
xs = tf.cast(
tf.stack(
[3 * tf.ones((40, 40, 1)), 2 * tf.ones((40, 40, 1))],
axis=0,
),
dtype=tf.float32,
)
layer = ChannelShuffle(groups=1)
xs = layer(xs, training=True)
self.assertTrue(tf.math.reduce_any(xs[0] == 3.0))
self.assertTrue(tf.math.reduce_any(xs[1] == 2.0))
def test_channel_shuffle_call_results_multi_channel(self):
xs = tf.cast(
tf.stack(
[3 * tf.ones((40, 40, 20)), 2 * tf.ones((40, 40, 20))],
axis=0,
),
dtype=tf.float32,
)
layer = ChannelShuffle(groups=5)
xs = layer(xs, training=True)
self.assertTrue(tf.math.reduce_any(xs[0] == 3.0))
self.assertTrue(tf.math.reduce_any(xs[1] == 2.0))
def test_non_square_image(self):
xs = tf.cast(
tf.stack(
[2 * tf.ones((1024, 512, 1)), tf.ones((1024, 512, 1))],
axis=0,
),
dtype=tf.float32,
)
layer = ChannelShuffle(groups=1)
xs = layer(xs, training=True)
self.assertTrue(tf.math.reduce_any(xs[0] == 2.0))
self.assertTrue(tf.math.reduce_any(xs[1] == 1.0))
def test_in_tf_function(self):
xs = tf.cast(
tf.stack([2 * tf.ones((100, 100, 1)), tf.ones((100, 100, 1))], axis=0),
dtype=tf.float32,
)
layer = ChannelShuffle(groups=1)
@tf.function
def augment(x):
return layer(x, training=True)
xs = augment(xs)
self.assertTrue(tf.math.reduce_any(xs[0] == 2.0))
self.assertTrue(tf.math.reduce_any(xs[1] == 1.0))
def test_in_single_image(self):
xs = tf.cast(
tf.ones((512, 512, 1)),
dtype=tf.float32,
)
layer = ChannelShuffle(groups=1)
xs = layer(xs, training=True)
self.assertTrue(tf.math.reduce_any(xs == 1.0))
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.layers.preprocessing.base_image_augmentation_layer import (
BaseImageAugmentationLayer,
)
from keras_cv.utils import fill_utils
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class CutMix(BaseImageAugmentationLayer):
"""CutMix implements the CutMix data augmentation technique.
Args:
alpha: Float between 0 and 1. Inverse scale parameter for the gamma
distribution. This controls the shape of the distribution from which the
smoothing values are sampled. Defaults 1.0, which is a recommended value
when training an imagenet1k classification model.
seed: Integer. Used to create a random seed.
References:
- [CutMix paper]( https://arxiv.org/abs/1905.04899).
Sample usage:
```python
(images, labels), _ = tf.keras.datasets.cifar10.load_data()
labels = tf.one_hot(labels.squeeze(), 10)
cutmix = keras_cv.layers.preprocessing.cut_mix.CutMix(10)
output = cutmix({"images": images[:32], "labels": labels[:32]})
# output == {'images': updated_images, 'labels': updated_labels}
```
"""
def __init__(self, alpha=1.0, seed=None, **kwargs):
super().__init__(seed=seed, **kwargs)
self.alpha = alpha
self.seed = seed
def _sample_from_beta(self, alpha, beta, shape):
sample_alpha = tf.random.gamma(
shape, 1.0, beta=alpha, seed=self._random_generator.make_legacy_seed()
)
sample_beta = tf.random.gamma(
shape, 1.0, beta=beta, seed=self._random_generator.make_legacy_seed()
)
return sample_alpha / (sample_alpha + sample_beta)
def _batch_augment(self, inputs):
self._validate_inputs(inputs)
images = inputs.get("images", None)
labels = inputs.get("labels", None)
if images is None or labels is None:
raise ValueError(
"CutMix expects inputs in a dictionary with format "
'{"images": images, "labels": labels}.'
f"Got: inputs = {inputs}"
)
images, labels = self._update_labels(*self._cutmix(images, labels))
inputs["images"] = images
inputs["labels"] = labels
return inputs
def _augment(self, inputs):
raise ValueError(
"CutMix received a single image to `call`. The layer relies on "
"combining multiple examples, and as such will not behave as "
"expected. Please call the layer with 2 or more samples."
)
def _cutmix(self, images, labels):
"""Apply cutmix."""
input_shape = tf.shape(images)
batch_size, image_height, image_width = (
input_shape[0],
input_shape[1],
input_shape[2],
)
permutation_order = tf.random.shuffle(tf.range(0, batch_size), seed=self.seed)
lambda_sample = self._sample_from_beta(self.alpha, self.alpha, (batch_size,))
ratio = tf.math.sqrt(1 - lambda_sample)
cut_height = tf.cast(
ratio * tf.cast(image_height, dtype=tf.float32), dtype=tf.int32
)
cut_width = tf.cast(
ratio * tf.cast(image_height, dtype=tf.float32), dtype=tf.int32
)
random_center_height = tf.random.uniform(
shape=[batch_size], minval=0, maxval=image_height, dtype=tf.int32
)
random_center_width = tf.random.uniform(
shape=[batch_size], minval=0, maxval=image_width, dtype=tf.int32
)
bounding_box_area = cut_height * cut_width
lambda_sample = 1.0 - bounding_box_area / (image_height * image_width)
lambda_sample = tf.cast(lambda_sample, dtype=tf.float32)
images = fill_utils.fill_rectangle(
images,
random_center_width,
random_center_height,
cut_width,
cut_height,
tf.gather(images, permutation_order),
)
return images, labels, lambda_sample, permutation_order
def _update_labels(self, images, labels, lambda_sample, permutation_order):
cutout_labels = tf.gather(labels, permutation_order)
lambda_sample = tf.reshape(lambda_sample, [-1, 1])
labels = lambda_sample * labels + (1.0 - lambda_sample) * cutout_labels
return images, labels
def _validate_inputs(self, inputs):
labels = inputs.get("labels", None)
if labels is None:
raise ValueError(
"CutMix expects 'labels' to be present in its inputs. "
"CutMix relies on both images an labels. "
"Please pass a dictionary with keys 'images' "
"containing the image Tensor, and 'labels' containing "
"the classification labels. "
"For example, `cut_mix({'images': images, 'labels': labels})`."
)
if not labels.dtype.is_floating:
raise ValueError(
f"CutMix received labels with type {labels.dtype}. "
"Labels must be of type float."
)
def get_config(self):
config = {
"alpha": self.alpha,
"seed": self.seed,
}
base_config = super().get_config()
return dict(list(base_config.items()) + list(config.items()))
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.layers.preprocessing.cut_mix import CutMix
classes = 10
class CutMixTest(tf.test.TestCase):
def test_return_shapes(self):
xs = tf.ones((2, 512, 512, 3))
# randomly sample labels
ys = tf.random.categorical(tf.math.log([[0.5, 0.5]]), 2)
ys = tf.squeeze(ys)
ys = tf.one_hot(ys, classes)
layer = CutMix(seed=1)
outputs = layer({"images": xs, "labels": ys})
xs, ys = outputs["images"], outputs["labels"]
self.assertEqual(xs.shape, [2, 512, 512, 3])
self.assertEqual(ys.shape, [2, 10])
def test_cut_mix_call_results(self):
xs = tf.cast(
tf.stack(
[2 * tf.ones((4, 4, 3)), tf.ones((4, 4, 3))],
axis=0,
),
tf.float32,
)
ys = tf.one_hot(tf.constant([0, 1]), 2)
layer = CutMix(seed=1)
outputs = layer({"images": xs, "labels": ys})
xs, ys = outputs["images"], outputs["labels"]
# At least some pixels should be replaced in the CutMix operation
self.assertTrue(tf.math.reduce_any(xs[0] == 1.0))
self.assertTrue(tf.math.reduce_any(xs[0] == 2.0))
self.assertTrue(tf.math.reduce_any(xs[1] == 1.0))
self.assertTrue(tf.math.reduce_any(xs[1] == 2.0))
# No labels should still be close to their original values
self.assertNotAllClose(ys, 1.0)
self.assertNotAllClose(ys, 0.0)
def test_cut_mix_call_results_one_channel(self):
xs = tf.cast(
tf.stack(
[2 * tf.ones((4, 4, 1)), tf.ones((4, 4, 1))],
axis=0,
),
tf.float32,
)
ys = tf.one_hot(tf.constant([0, 1]), 2)
layer = CutMix(seed=1)
outputs = layer({"images": xs, "labels": ys})
xs, ys = outputs["images"], outputs["labels"]
# At least some pixels should be replaced in the CutMix operation
self.assertTrue(tf.math.reduce_any(xs[0] == 1.0))
self.assertTrue(tf.math.reduce_any(xs[0] == 2.0))
self.assertTrue(tf.math.reduce_any(xs[1] == 1.0))
self.assertTrue(tf.math.reduce_any(xs[1] == 2.0))
# No labels should still be close to their original values
self.assertNotAllClose(ys, 1.0)
self.assertNotAllClose(ys, 0.0)
def test_in_tf_function(self):
xs = tf.cast(
tf.stack([2 * tf.ones((100, 100, 1)), tf.ones((100, 100, 1))], axis=0),
tf.float32,
)
ys = tf.one_hot(tf.constant([0, 1]), 2)
layer = CutMix(seed=1)
@tf.function
def augment(x, y):
return layer({"images": x, "labels": y})
outputs = augment(xs, ys)
xs, ys = outputs["images"], outputs["labels"]
# At least some pixels should be replaced in the CutMix operation
self.assertTrue(tf.math.reduce_any(xs[0] == 1.0))
self.assertTrue(tf.math.reduce_any(xs[0] == 2.0))
self.assertTrue(tf.math.reduce_any(xs[1] == 1.0))
self.assertTrue(tf.math.reduce_any(xs[1] == 2.0))
# No labels should still be close to their original values
self.assertNotAllClose(ys, 1.0)
self.assertNotAllClose(ys, 0.0)
def test_single_image_input(self):
xs = tf.ones((512, 512, 3))
ys = tf.one_hot(tf.constant([1]), 2)
inputs = {"images": xs, "labels": ys}
layer = CutMix()
with self.assertRaisesRegexp(
ValueError, "CutMix received a single image to `call`"
):
_ = layer(inputs)
def test_missing_labels(self):
xs = tf.ones((2, 512, 512, 3))
inputs = {"images": xs}
layer = CutMix()
with self.assertRaisesRegexp(ValueError, "CutMix expects 'labels'"):
_ = layer(inputs)
def test_int_labels(self):
xs = tf.ones((2, 512, 512, 3))
ys = tf.one_hot(tf.constant([1, 0]), 2, dtype=tf.int32)
inputs = {"images": xs, "labels": ys}
layer = CutMix()
with self.assertRaisesRegexp(ValueError, "CutMix received labels with type"):
_ = layer(inputs)
def test_image_input(self):
xs = tf.ones((2, 512, 512, 3))
layer = CutMix()
with self.assertRaisesRegexp(
ValueError, "CutMix expects 'labels' to be present in its inputs"
):
_ = layer(xs)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.layers.preprocessing.base_image_augmentation_layer import (
BaseImageAugmentationLayer,
)
from keras_cv.utils import preprocessing
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class Equalization(BaseImageAugmentationLayer):
"""Equalization performs histogram equalization on a channel-wise basis.
Args:
value_range: a tuple or a list of two elements. The first value represents
the lower bound for values in passed images, the second represents the
upper bound. Images passed to the layer should have values within
`value_range`.
bins: Integer indicating the number of bins to use in histogram equalization.
Should be in the range [0, 256].
Usage:
```python
equalize = Equalization()
(images, labels), _ = tf.keras.datasets.cifar10.load_data()
# Note that images are an int8 Tensor with values in the range [0, 255]
images = equalize(images)
```
Call arguments:
images: Tensor of pixels in range [0, 255], in RGB format. Can be
of type float or int. Should be in NHWC format.
"""
def __init__(self, value_range, bins=256, **kwargs):
super().__init__(**kwargs)
self.bins = bins
self.value_range = value_range
def equalize_channel(self, image, channel_index):
"""equalize_channel performs histogram equalization on a single channel.
Args:
image: int Tensor with pixels in range [0, 255], RGB format,
with channels last
channel_index: channel to equalize
"""
image = image[..., channel_index]
# Compute the histogram of the image channel.
histogram = tf.histogram_fixed_width(image, [0, 255], nbins=self.bins)
# For the purposes of computing the step, filter out the nonzeros.
# Zeroes are replaced by a big number while calculating min to keep shape
# constant across input sizes for compatibility with vectorized_map
big_number = 1410065408
histogram_without_zeroes = tf.where(
tf.equal(histogram, 0),
big_number,
histogram,
)
step = (tf.reduce_sum(histogram) - tf.reduce_min(histogram_without_zeroes)) // (
self.bins - 1
)
def build_mapping(histogram, step):
# Compute the cumulative sum, shifting by step // 2
# and then normalization by step.
lookup_table = (tf.cumsum(histogram) + (step // 2)) // step
# Shift lookup_table, prepending with 0.
lookup_table = tf.concat([[0], lookup_table[:-1]], 0)
# Clip the counts to be in range. This is done
# in the C code for image.point.
return tf.clip_by_value(lookup_table, 0, 255)
# If step is zero, return the original image. Otherwise, build
# lookup table from the full histogram and step and then index from it.
result = tf.cond(
tf.equal(step, 0),
lambda: image,
lambda: tf.gather(build_mapping(histogram, step), image),
)
return result
def augment_image(self, image, **kwargs):
image = preprocessing.transform_value_range(
image, self.value_range, (0, 255), dtype=image.dtype
)
image = tf.cast(image, tf.int32)
image = tf.map_fn(
lambda channel: self.equalize_channel(image, channel),
tf.range(tf.shape(image)[-1]),
)
image = tf.transpose(image, [1, 2, 0])
image = tf.cast(image, tf.float32)
image = preprocessing.transform_value_range(image, (0, 255), self.value_range)
return image
def augment_bounding_boxes(self, bounding_boxes, **kwargs):
return bounding_boxes
def augment_label(self, label, transformation=None, **kwargs):
return label
def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs):
return segmentation_mask
def get_config(self):
config = super().get_config()
config.update({"bins": self.bins, "value_range": self.value_range})
return config
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from absl.testing import parameterized
from keras_cv.layers.preprocessing.equalization import Equalization
class EqualizationTest(tf.test.TestCase, parameterized.TestCase):
def test_return_shapes(self):
xs = 255 * tf.ones((2, 512, 512, 3), dtype=tf.int32)
layer = Equalization(value_range=(0, 255))
xs = layer(xs)
self.assertEqual(xs.shape, [2, 512, 512, 3])
self.assertAllEqual(xs, 255 * tf.ones((2, 512, 512, 3)))
def test_return_shapes_inside_model(self):
layer = Equalization(value_range=(0, 255))
inp = tf.keras.layers.Input(shape=[512, 512, 5])
out = layer(inp)
model = tf.keras.models.Model(inp, out)
self.assertEqual(model.layers[-1].output_shape, (None, 512, 512, 5))
def test_equalizes_to_all_bins(self):
xs = tf.random.uniform((2, 512, 512, 3), 0, 255, dtype=tf.float32)
layer = Equalization(value_range=(0, 255))
xs = layer(xs)
for i in range(0, 256):
self.assertTrue(tf.math.reduce_any(xs == i))
@parameterized.named_parameters(
("float32", tf.float32), ("int32", tf.int32), ("int64", tf.int64)
)
def test_input_dtypes(self, dtype):
xs = tf.random.uniform((2, 512, 512, 3), 0, 255, dtype=dtype)
layer = Equalization(value_range=(0, 255))
xs = layer(xs)
for i in range(0, 256):
self.assertTrue(tf.math.reduce_any(xs == i))
self.assertAllInRange(xs, 0, 255)
@parameterized.named_parameters(("0_255", 0, 255), ("0_1", 0, 1))
def test_output_range(self, lower, upper):
xs = tf.random.uniform((2, 512, 512, 3), lower, upper, dtype=tf.float32)
layer = Equalization(value_range=(lower, upper))
xs = layer(xs)
self.assertAllInRange(xs, lower, upper)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.layers.preprocessing.base_image_augmentation_layer import (
BaseImageAugmentationLayer,
)
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class FourierMix(BaseImageAugmentationLayer):
"""FourierMix implements the FMix data augmentation technique.
Args:
alpha: Float value for beta distribution. Inverse scale parameter for the gamma
distribution. This controls the shape of the distribution from which the
smoothing values are sampled. Defaults to 0.5, which is a recommended value
in the paper.
decay_power: A float value representing the decay power. Defaults to 3, as
recommended in the paper.
seed: Integer. Used to create a random seed.
References:
- [FMix paper](https://arxiv.org/abs/2002.12047).
Sample usage:
```python
(images, labels), _ = tf.keras.datasets.cifar10.load_data()
fourier_mix = keras_cv.layers.preprocessing.FourierMix(0.5)
augmented_images, updated_labels = fourier_mix({'images': images, 'labels': labels})
# output == {'images': updated_images, 'labels': updated_labels}
```
"""
def __init__(self, alpha=0.5, decay_power=3, seed=None, **kwargs):
super().__init__(seed=seed, **kwargs)
self.alpha = alpha
self.decay_power = decay_power
self.seed = seed
def _sample_from_beta(self, alpha, beta, shape):
sample_alpha = tf.random.gamma(
shape, 1.0, beta=alpha, seed=self._random_generator.make_legacy_seed()
)
sample_beta = tf.random.gamma(
shape, 1.0, beta=beta, seed=self._random_generator.make_legacy_seed()
)
return sample_alpha / (sample_alpha + sample_beta)
@staticmethod
def _fftfreq(signal_size, sample_spacing=1):
"""This function returns the sample frequencies of a discrete fourier transform.
The result array contains the frequency bin centers starting at 0 using the
sample spacing.
"""
results = tf.concat(
[
tf.range((signal_size - 1) / 2 + 1, dtype=tf.int32),
tf.range(-(signal_size // 2), 0, dtype=tf.int32),
],
0,
)
return results / (signal_size * sample_spacing)
def _apply_fftfreq(self, h, w):
# Applying the fourier transform across 2 dimensions (height and width).
fx = FourierMix._fftfreq(w)[: w // 2 + 1 + w % 2]
fy = FourierMix._fftfreq(h)
fy = tf.expand_dims(fy, -1)
return tf.math.sqrt(fx * fx + fy * fy)
def _get_spectrum(self, freqs, decay_power, channel, h, w):
# Function to apply a low pass filter by decaying its high frequency components.
scale = tf.ones(1) / tf.cast(
tf.math.maximum(freqs, tf.convert_to_tensor([1 / tf.reduce_max([w, h])]))
** decay_power,
tf.float32,
)
param_size = tf.concat(
[tf.constant([channel]), tf.shape(freqs), tf.constant([2])], 0
)
param = self._random_generator.random_normal(param_size)
scale = tf.expand_dims(scale, -1)[None, :]
return scale * param
def _sample_mask_from_transform(self, decay, shape, ch=1):
# Sampling low frequency map from fourier transform.
freqs = self._apply_fftfreq(shape[0], shape[1])
spectrum = self._get_spectrum(freqs, decay, ch, shape[0], shape[1])
spectrum = tf.complex(spectrum[:, 0], spectrum[:, 1])
mask = tf.math.real(tf.signal.irfft2d(spectrum, shape))
mask = mask[:1, : shape[0], : shape[1]]
mask = mask - tf.reduce_min(mask)
mask = mask / tf.reduce_max(mask)
return mask
def _binarise_mask(self, mask, lam, in_shape):
# Create the final mask from the sampled values.
idx = tf.argsort(tf.reshape(mask, [-1]), direction="DESCENDING")
mask = tf.reshape(mask, [-1])
num = tf.cast(tf.math.round(lam * tf.cast(tf.size(mask), tf.float32)), tf.int32)
updates = tf.concat(
[
tf.ones((num,), tf.float32),
tf.zeros((tf.size(mask) - num,), tf.float32),
],
0,
)
mask = tf.scatter_nd(
tf.expand_dims(idx, -1), updates, tf.expand_dims(tf.size(mask), -1)
)
mask = tf.reshape(mask, in_shape)
return mask
def _batch_augment(self, inputs):
images = inputs.get("images", None)
labels = inputs.get("labels", None)
if images is None or labels is None:
raise ValueError(
"FourierMix expects inputs in a dictionary with format "
'{"images": images, "labels": labels}.'
f"Got: inputs = {inputs}"
)
images, lambda_sample, permutation_order = self._fourier_mix(images)
if labels is not None:
labels = self._update_labels(labels, lambda_sample, permutation_order)
inputs["labels"] = labels
inputs["images"] = images
return inputs
def _augment(self, inputs):
raise ValueError(
"FourierMix received a single image to `call`. The layer relies on "
"combining multiple examples, and as such will not behave as "
"expected. Please call the layer with 2 or more samples."
)
def _fourier_mix(self, images):
shape = tf.shape(images)
permutation_order = tf.random.shuffle(tf.range(0, shape[0]), seed=self.seed)
lambda_sample = self._sample_from_beta(self.alpha, self.alpha, (shape[0],))
# generate masks utilizing mapped calls
masks = tf.map_fn(
lambda x: self._sample_mask_from_transform(self.decay_power, shape[1:-1]),
tf.range(shape[0], dtype=tf.float32),
)
# binarise masks utilizing mapped calls
masks = tf.map_fn(
lambda i: self._binarise_mask(masks[i], lambda_sample[i], shape[1:-1]),
tf.range(shape[0], dtype=tf.int32),
fn_output_signature=tf.float32,
)
masks = tf.expand_dims(masks, -1)
fmix_images = tf.gather(images, permutation_order)
images = masks * images + (1.0 - masks) * fmix_images
return images, lambda_sample, permutation_order
def _update_labels(self, labels, lambda_sample, permutation_order):
labels_for_fmix = tf.gather(labels, permutation_order)
# for broadcasting
batch_size = tf.expand_dims(tf.shape(labels)[0], -1)
labels_rank = tf.rank(labels)
broadcast_shape = tf.concat([batch_size, tf.ones(labels_rank - 1, tf.int32)], 0)
lambda_sample = tf.reshape(lambda_sample, broadcast_shape)
labels = lambda_sample * labels + (1.0 - lambda_sample) * labels_for_fmix
return labels
def get_config(self):
config = {
"alpha": self.alpha,
"decay_power": self.decay_power,
"seed": self.seed,
}
base_config = super().get_config()
return dict(list(base_config.items()) + list(config.items()))
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.layers.preprocessing.fourier_mix import FourierMix
classes = 10
class FourierMixTest(tf.test.TestCase):
def test_return_shapes(self):
xs = tf.ones((2, 512, 512, 3))
# randomly sample labels
ys = tf.random.categorical(tf.math.log([[0.5, 0.5]]), 2)
ys = tf.squeeze(ys)
ys = tf.one_hot(ys, classes)
layer = FourierMix()
outputs = layer({"images": xs, "labels": ys})
xs, ys = (
outputs["images"],
outputs["labels"],
)
self.assertEqual(xs.shape, [2, 512, 512, 3])
self.assertEqual(ys.shape, [2, 10])
def test_fourier_mix_call_results(self):
xs = tf.cast(
tf.stack(
[2 * tf.ones((4, 4, 3)), tf.ones((4, 4, 3))],
axis=0,
),
tf.float32,
)
ys = tf.one_hot(tf.constant([0, 1]), 2)
layer = FourierMix()
outputs = layer({"images": xs, "labels": ys})
xs, ys = outputs["images"], outputs["labels"]
# None of the individual values should still be close to 1 or 0
self.assertNotAllClose(xs, 1.0)
self.assertNotAllClose(xs, 2.0)
# No labels should still be close to their originals
self.assertNotAllClose(ys, 1.0)
self.assertNotAllClose(ys, 0.0)
def test_in_tf_function(self):
xs = tf.cast(
tf.stack(
[2 * tf.ones((4, 4, 3)), tf.ones((4, 4, 3))],
axis=0,
),
tf.float32,
)
ys = tf.one_hot(tf.constant([0, 1]), 2)
layer = FourierMix()
@tf.function
def augment(x, y):
return layer({"images": x, "labels": y})
outputs = augment(xs, ys)
xs, ys = outputs["images"], outputs["labels"]
# None of the individual values should still be close to 1 or 0
self.assertNotAllClose(xs, 1.0)
self.assertNotAllClose(xs, 2.0)
# No labels should still be close to their originals
self.assertNotAllClose(ys, 1.0)
self.assertNotAllClose(ys, 0.0)
def test_image_input_only(self):
xs = tf.cast(
tf.stack([2 * tf.ones((100, 100, 1)), tf.ones((100, 100, 1))], axis=0),
tf.float32,
)
layer = FourierMix()
with self.assertRaisesRegexp(ValueError, "expects inputs in a dictionary"):
_ = layer(xs)
def test_single_image_input(self):
xs = tf.ones((512, 512, 3))
ys = tf.one_hot(tf.constant([1]), 2)
inputs = {"images": xs, "labels": ys}
layer = FourierMix()
with self.assertRaisesRegexp(
ValueError, "FourierMix received a single image to `call`"
):
_ = layer(inputs)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.layers.preprocessing.base_image_augmentation_layer import (
BaseImageAugmentationLayer,
)
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class Grayscale(BaseImageAugmentationLayer):
"""Grayscale is a preprocessing layer that transforms RGB images to Grayscale images.
Input images should have values in the range of [0, 255].
Input shape:
3D (unbatched) or 4D (batched) tensor with shape:
`(..., height, width, channels)`, in `"channels_last"` format
Output shape:
3D (unbatched) or 4D (batched) tensor with shape:
`(..., height, width, channels)`, in `"channels_last"` format
Args:
output_channels.
Number color channels present in the output image.
The output_channels can be 1 or 3. RGB image with shape
(..., height, width, 3) will have the following shapes
after the `Grayscale` operation:
a. (..., height, width, 1) if output_channels = 1
b. (..., height, width, 3) if output_channels = 3.
Usage:
```python
(images, labels), _ = tf.keras.datasets.cifar10.load_data()
to_grayscale = keras_cv.layers.preprocessing.Grayscale()
augmented_images = to_grayscale(images)
```
"""
def __init__(self, output_channels=1, **kwargs):
super().__init__(**kwargs)
self.output_channels = output_channels
# This layer may raise an error when running on GPU using auto_vectorize
self.auto_vectorize = False
def _check_input_params(self, output_channels):
if output_channels not in [1, 3]:
raise ValueError(
"Received invalid argument output_channels. "
f"output_channels must be in 1 or 3. Got {output_channels}"
)
self.output_channels = output_channels
def augment_image(self, image, transformation=None, **kwargs):
grayscale = tf.image.rgb_to_grayscale(image)
if self.output_channels == 1:
return grayscale
elif self.output_channels == 3:
return tf.image.grayscale_to_rgb(grayscale)
else:
raise ValueError("Unsupported value for `output_channels`.")
def augment_bounding_boxes(self, bounding_boxes, **kwargs):
return bounding_boxes
def augment_label(self, label, transformation=None, **kwargs):
return label
def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs):
return segmentation_mask
def get_config(self):
config = {
"output_channels": self.output_channels,
}
base_config = super().get_config()
return dict(list(base_config.items()) + list(config.items()))
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.layers import preprocessing
class GrayscaleTest(tf.test.TestCase):
def test_return_shapes(self):
xs = tf.ones((2, 512, 512, 3))
layer = preprocessing.Grayscale(
output_channels=1,
)
xs1 = layer(xs, training=True)
layer = preprocessing.Grayscale(
output_channels=3,
)
xs2 = layer(xs, training=True)
self.assertEqual(xs1.shape, [2, 512, 512, 1])
self.assertEqual(xs2.shape, [2, 512, 512, 3])
def test_in_tf_function(self):
xs = tf.cast(
tf.stack([2 * tf.ones((100, 100, 3)), tf.ones((100, 100, 3))], axis=0),
tf.float32,
)
# test 1
layer = preprocessing.Grayscale(
output_channels=1,
)
@tf.function
def augment(x):
return layer(x, training=True)
xs1 = augment(xs)
# test 2
layer = preprocessing.Grayscale(
output_channels=3,
)
@tf.function
def augment(x):
return layer(x, training=True)
xs2 = augment(xs)
self.assertEqual(xs1.shape, [2, 100, 100, 1])
self.assertEqual(xs2.shape, [2, 100, 100, 3])
def test_non_square_image(self):
xs = tf.cast(
tf.stack([2 * tf.ones((512, 1024, 3)), tf.ones((512, 1024, 3))], axis=0),
tf.float32,
)
layer = preprocessing.Grayscale(
output_channels=1,
)
xs1 = layer(xs, training=True)
layer = preprocessing.Grayscale(
output_channels=3,
)
xs2 = layer(xs, training=True)
self.assertEqual(xs1.shape, [2, 512, 1024, 1])
self.assertEqual(xs2.shape, [2, 512, 1024, 3])
def test_in_single_image(self):
xs = tf.cast(
tf.ones((512, 512, 3)),
dtype=tf.float32,
)
layer = preprocessing.Grayscale(
output_channels=1,
)
xs1 = layer(xs, training=True)
layer = preprocessing.Grayscale(
output_channels=3,
)
xs2 = layer(xs, training=True)
self.assertEqual(xs1.shape, [512, 512, 1])
self.assertEqual(xs2.shape, [512, 512, 3])
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from tensorflow.keras import layers
from keras_cv import core
from keras_cv.layers.preprocessing.base_image_augmentation_layer import (
BaseImageAugmentationLayer,
)
from keras_cv.utils import fill_utils
from keras_cv.utils import preprocessing
def _center_crop(mask, width, height):
masks_shape = tf.shape(mask)
h_diff = masks_shape[0] - height
w_diff = masks_shape[1] - width
h_start = tf.cast(h_diff / 2, tf.int32)
w_start = tf.cast(w_diff / 2, tf.int32)
return tf.image.crop_to_bounding_box(mask, h_start, w_start, height, width)
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class GridMask(BaseImageAugmentationLayer):
"""GridMask class for grid-mask augmentation.
Input shape:
Int or float tensor with values in the range [0, 255].
3D (unbatched) or 4D (batched) tensor with shape:
`(..., height, width, channels)`, in `"channels_last"` format
Output shape:
3D (unbatched) or 4D (batched) tensor with shape:
`(..., height, width, channels)`, in `"channels_last"` format
Args:
ratio_factor: A float, tuple of two floats, or `keras_cv.FactorSampler`.
Ratio determines the ratio from spacings to grid masks.
Lower values make the grid
size smaller, and higher values make the grid mask large.
Floats should be in the range [0, 1]. 0.5 indicates that grid and
spacing will be of equal size. To always use the same value, pass a
`keras_cv.ConstantFactorSampler()`.
Defaults to `(0, 0.5)`.
rotation_factor:
The rotation_factor will be used to randomly rotate the grid_mask during
training. Default to 0.1, which results in an output rotating by a
random amount in the range [-10% * 2pi, 10% * 2pi].
A float represented as fraction of 2 Pi, or a tuple of size 2
representing lower and upper bound for rotating clockwise and
counter-clockwise. A positive values means rotating counter clock-wise,
while a negative value means clock-wise. When represented as a single
float, this value is used for both the upper and lower bound. For
instance, factor=(-0.2, 0.3) results in an output rotation by a random
amount in the range [-20% * 2pi, 30% * 2pi]. factor=0.2 results in an
output rotating by a random amount in the range [-20% * 2pi, 20% * 2pi].
fill_mode: Pixels inside the gridblock are filled according to the given
mode (one of `{"constant", "gaussian_noise"}`). Default: "constant".
- *constant*: Pixels are filled with the same constant value.
- *gaussian_noise*: Pixels are filled with random gaussian noise.
fill_value: an integer represents of value to be filled inside the gridblock
when `fill_mode="constant"`. Valid integer range [0 to 255]
seed: Integer. Used to create a random seed.
Usage:
```python
(images, labels), _ = tf.keras.datasets.cifar10.load_data()
random_gridmask = keras_cv.layers.preprocessing.GridMask()
augmented_images = random_gridmask(images)
```
References:
- [GridMask paper](https://arxiv.org/abs/2001.04086)
"""
def __init__(
self,
ratio_factor=(0, 0.5),
rotation_factor=0.15,
fill_mode="constant",
fill_value=0.0,
seed=None,
**kwargs,
):
super().__init__(seed=seed, **kwargs)
self.ratio_factor = preprocessing.parse_factor(
ratio_factor, param_name="ratio_factor"
)
if isinstance(rotation_factor, core.FactorSampler):
raise ValueError(
"Currently `GridMask.rotation_factor` does not support the "
"`FactorSampler` API. This will be supported in the next Keras "
"release. For now, please pass a float for the "
"`rotation_factor` argument."
)
self.fill_mode = fill_mode
self.fill_value = fill_value
self.rotation_factor = rotation_factor
self.random_rotate = layers.RandomRotation(
factor=rotation_factor,
fill_mode="constant",
fill_value=0.0,
seed=seed,
)
self.auto_vectorize = False
self._check_parameter_values()
self.seed = seed
def _check_parameter_values(self):
fill_mode, fill_value = self.fill_mode, self.fill_value
if fill_value not in range(0, 256):
raise ValueError(
f"fill_value should be in the range [0, 255]. Got {fill_value}"
)
if fill_mode not in ["constant", "gaussian_noise", "random"]:
raise ValueError(
'`fill_mode` should be "constant", '
f'"gaussian_noise", or "random". Got `fill_mode`={fill_mode}'
)
def get_random_transformation(
self, image=None, label=None, bounding_boxes=None, **kwargs
):
ratio = self.ratio_factor()
# compute grid mask
input_shape = tf.shape(image)
mask = self._compute_grid_mask(input_shape, ratio=ratio)
# convert mask to single-channel image
mask = tf.cast(mask, tf.float32)
mask = tf.expand_dims(mask, axis=-1)
# randomly rotate mask
mask = self.random_rotate(mask)
# compute fill
if self.fill_mode == "constant":
fill_value = tf.fill(input_shape, self.fill_value)
else:
# gaussian noise
fill_value = self._random_generator.random_normal(
shape=input_shape, dtype=image.dtype
)
return mask, fill_value
def _compute_grid_mask(self, input_shape, ratio):
height = tf.cast(input_shape[0], tf.float32)
width = tf.cast(input_shape[1], tf.float32)
# mask side length
input_diagonal_len = tf.sqrt(tf.square(width) + tf.square(height))
mask_side_len = tf.math.ceil(input_diagonal_len)
# grid unit size
unit_size = self._random_generator.random_uniform(
shape=(),
minval=tf.math.minimum(height * 0.5, width * 0.3),
maxval=tf.math.maximum(height * 0.5, width * 0.3) + 1,
dtype=tf.float32,
)
rectangle_side_len = tf.cast((ratio) * unit_size, tf.float32)
# sample x and y offset for grid units randomly between 0 and unit_size
delta_x = self._random_generator.random_uniform(
shape=(), minval=0.0, maxval=unit_size, dtype=tf.float32
)
delta_y = self._random_generator.random_uniform(
shape=(), minval=0.0, maxval=unit_size, dtype=tf.float32
)
# grid size (number of diagonal units in grid)
grid_size = mask_side_len // unit_size + 1
grid_size_range = tf.range(1, grid_size + 1)
# diagonal corner coordinates
unit_size_range = grid_size_range * unit_size
x1 = unit_size_range - delta_x
x0 = x1 - rectangle_side_len
y1 = unit_size_range - delta_y
y0 = y1 - rectangle_side_len
# compute grid coordinates
x0, y0 = tf.meshgrid(x0, y0)
x1, y1 = tf.meshgrid(x1, y1)
# flatten mesh grid
x0 = tf.reshape(x0, [-1])
y0 = tf.reshape(y0, [-1])
x1 = tf.reshape(x1, [-1])
y1 = tf.reshape(y1, [-1])
# convert coordinates to mask
corners = tf.stack([x0, y0, x1, y1], axis=-1)
mask_side_len = tf.cast(mask_side_len, tf.int32)
rectangle_masks = fill_utils.corners_to_mask(
corners, mask_shape=(mask_side_len, mask_side_len)
)
grid_mask = tf.reduce_any(rectangle_masks, axis=0)
return grid_mask
def augment_image(self, image, transformation=None, **kwargs):
mask, fill_value = transformation
input_shape = tf.shape(image)
# center crop mask
input_height = input_shape[0]
input_width = input_shape[1]
mask = _center_crop(mask, input_width, input_height)
# convert back to boolean mask
mask = tf.cast(mask, tf.bool)
return tf.where(mask, fill_value, image)
def augment_bounding_boxes(self, bounding_boxes, **kwargs):
return bounding_boxes
def augment_label(self, label, transformation=None, **kwargs):
return label
def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs):
return segmentation_mask
def get_config(self):
config = {
"ratio_factor": self.ratio_factor,
"rotation_factor": self.rotation_factor,
"fill_mode": self.fill_mode,
"fill_value": self.fill_value,
"seed": self.seed,
}
base_config = super().get_config()
return dict(list(base_config.items()) + list(config.items()))
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
import keras_cv
from keras_cv.layers.preprocessing.grid_mask import GridMask
class GridMaskTest(tf.test.TestCase):
def test_return_shapes(self):
xs = tf.ones((2, 512, 512, 3))
layer = GridMask(ratio_factor=0.1, rotation_factor=(-0.2, 0.3))
xs = layer(xs, training=True)
self.assertEqual(xs.shape, [2, 512, 512, 3])
def test_gridmask_call_results_one_channel(self):
xs = tf.cast(
tf.stack(
[3 * tf.ones((40, 40, 1)), 2 * tf.ones((40, 40, 1))],
axis=0,
),
dtype=tf.float32,
)
fill_value = 0.0
layer = GridMask(
ratio_factor=0.3,
rotation_factor=(0.2, 0.3),
fill_mode="constant",
fill_value=fill_value,
)
xs = layer(xs, training=True)
# Some pixels should be replaced with fill_value
self.assertTrue(tf.math.reduce_any(xs[0] == float(fill_value)))
self.assertTrue(tf.math.reduce_any(xs[0] == 3.0))
self.assertTrue(tf.math.reduce_any(xs[1] == float(fill_value)))
self.assertTrue(tf.math.reduce_any(xs[1] == 2.0))
def test_non_square_image(self):
xs = tf.cast(
tf.stack(
[2 * tf.ones((1024, 512, 1)), tf.ones((1024, 512, 1))],
axis=0,
),
dtype=tf.float32,
)
fill_value = 100.0
layer = GridMask(
ratio_factor=0.6,
rotation_factor=0.3,
fill_mode="constant",
fill_value=fill_value,
)
xs = layer(xs, training=True)
# Some pixels should be replaced with fill_value
self.assertTrue(tf.math.reduce_any(xs[0] == float(fill_value)))
self.assertTrue(tf.math.reduce_any(xs[0] == 2.0))
self.assertTrue(tf.math.reduce_any(xs[1] == float(fill_value)))
self.assertTrue(tf.math.reduce_any(xs[1] == 1.0))
def test_in_tf_function(self):
xs = tf.cast(
tf.stack([2 * tf.ones((100, 100, 1)), tf.ones((100, 100, 1))], axis=0),
dtype=tf.float32,
)
fill_value = 255.0
layer = GridMask(
ratio_factor=keras_cv.ConstantFactorSampler(0.5),
rotation_factor=0.5,
fill_mode="constant",
fill_value=fill_value,
)
@tf.function
def augment(x):
return layer(x, training=True)
xs = augment(xs)
# Some pixels should be replaced with fill_value
self.assertTrue(tf.math.reduce_any(xs[0] == float(fill_value)))
self.assertTrue(tf.math.reduce_any(xs[0] == 2.0))
self.assertTrue(tf.math.reduce_any(xs[1] == float(fill_value)))
self.assertTrue(tf.math.reduce_any(xs[1] == 1.0))
def test_in_single_image(self):
xs = tf.cast(
tf.ones((512, 512, 1)),
dtype=tf.float32,
)
layer = GridMask(ratio_factor=(0.5, 0.5), fill_mode="constant", fill_value=0.0)
xs = layer(xs, training=True)
self.assertTrue(tf.math.reduce_any(xs == 0.0))
self.assertTrue(tf.math.reduce_any(xs == 1.0))
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.layers.preprocessing.base_image_augmentation_layer import (
BaseImageAugmentationLayer,
)
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class MaybeApply(BaseImageAugmentationLayer):
"""Apply provided layer to random elements in a batch.
Args:
layer: a keras `Layer` or `BaseImageAugmentationLayer`. This layer will be
applied to randomly chosen samples in a batch. Layer should not modify the
size of provided inputs.
rate: controls the frequency of applying the layer. 1.0 means all elements in
a batch will be modified. 0.0 means no elements will be modified.
Defaults to 0.5.
auto_vectorize: bool, whether to use tf.vectorized_map or tf.map_fn for
batched input. Setting this to True might give better performance but
currently doesn't work with XLA. Defaults to False.
seed: integer, controls random behaviour.
Example usage:
```
# Let's declare an example layer that will set all image pixels to zero.
zero_out = tf.keras.layers.Lambda(lambda x: {"images": 0 * x["images"]})
# Create a small batch of random, single-channel, 2x2 images:
images = tf.random.stateless_uniform(shape=(5, 2, 2, 1), seed=[0, 1])
print(images[..., 0])
# <tf.Tensor: shape=(5, 2, 2), dtype=float32, numpy=
# array([[[0.08216608, 0.40928006],
# [0.39318466, 0.3162533 ]],
#
# [[0.34717774, 0.73199546],
# [0.56369007, 0.9769211 ]],
#
# [[0.55243933, 0.13101244],
# [0.2941643 , 0.5130266 ]],
#
# [[0.38977218, 0.80855536],
# [0.6040567 , 0.10502195]],
#
# [[0.51828027, 0.12730157],
# [0.288486 , 0.252975 ]]], dtype=float32)>
# Apply the layer with 50% probability:
maybe_apply = MaybeApply(layer=zero_out, rate=0.5, seed=1234)
outputs = maybe_apply(images)
print(outputs[..., 0])
# <tf.Tensor: shape=(5, 2, 2), dtype=float32, numpy=
# array([[[0. , 0. ],
# [0. , 0. ]],
#
# [[0.34717774, 0.73199546],
# [0.56369007, 0.9769211 ]],
#
# [[0.55243933, 0.13101244],
# [0.2941643 , 0.5130266 ]],
#
# [[0.38977218, 0.80855536],
# [0.6040567 , 0.10502195]],
#
# [[0. , 0. ],
# [0. , 0. ]]], dtype=float32)>
# We can observe that the layer has been randomly applied to 2 out of 5 samples.
```
"""
def __init__(self, layer, rate=0.5, auto_vectorize=False, seed=None, **kwargs):
super().__init__(seed=seed, **kwargs)
if not (0 <= rate <= 1.0):
raise ValueError(f"rate must be in range [0, 1]. Received rate: {rate}")
self._layer = layer
self._rate = rate
self.auto_vectorize = auto_vectorize
self.seed = seed
def _augment(self, inputs):
if self._random_generator.random_uniform(shape=()) > 1.0 - self._rate:
return self._layer(inputs)
else:
return inputs
def get_config(self):
config = super().get_config()
config.update(
{
"rate": self._rate,
"layer": self._layer,
"seed": self.seed,
"auto_vectorize": self.auto_vectorize,
}
)
return config
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from absl.testing import parameterized
from keras_cv.layers.preprocessing.base_image_augmentation_layer import (
BaseImageAugmentationLayer,
)
from keras_cv.layers.preprocessing.maybe_apply import MaybeApply
class ZeroOut(BaseImageAugmentationLayer):
"""Zero out all entries, for testing purposes."""
def __init__(self):
super(ZeroOut, self).__init__()
def augment_image(self, image, transformation=None, **kwargs):
return 0 * image
def augment_label(self, label, transformation=None, **kwargs):
return 0 * label
class MaybeApplyTest(tf.test.TestCase, parameterized.TestCase):
rng = tf.random.Generator.from_seed(seed=1234)
@parameterized.parameters([-0.5, 1.7])
def test_raises_error_on_invalid_rate_parameter(self, invalid_rate):
with self.assertRaises(ValueError):
MaybeApply(rate=invalid_rate, layer=ZeroOut())
def test_works_with_batched_input(self):
batch_size = 32
dummy_inputs = self.rng.uniform(shape=(batch_size, 224, 224, 3))
layer = MaybeApply(rate=0.5, layer=ZeroOut(), seed=1234)
outputs = layer(dummy_inputs)
num_zero_inputs = self._num_zero_batches(dummy_inputs)
num_zero_outputs = self._num_zero_batches(outputs)
self.assertEqual(num_zero_inputs, 0)
self.assertLess(num_zero_outputs, batch_size)
self.assertGreater(num_zero_outputs, 0)
@staticmethod
def _num_zero_batches(images):
num_batches = tf.shape(images)[0]
num_non_zero_batches = tf.math.count_nonzero(
tf.math.count_nonzero(images, axis=[1, 2, 3]), dtype=tf.int32
)
return num_batches - num_non_zero_batches
def test_inputs_unchanged_with_zero_rate(self):
dummy_inputs = self.rng.uniform(shape=(32, 224, 224, 3))
layer = MaybeApply(rate=0.0, layer=ZeroOut())
outputs = layer(dummy_inputs)
self.assertAllClose(outputs, dummy_inputs)
def test_all_inputs_changed_with_rate_equal_to_one(self):
dummy_inputs = self.rng.uniform(shape=(32, 224, 224, 3))
layer = MaybeApply(rate=1.0, layer=ZeroOut())
outputs = layer(dummy_inputs)
self.assertAllEqual(outputs, tf.zeros_like(dummy_inputs))
def test_works_with_single_image(self):
dummy_inputs = self.rng.uniform(shape=(224, 224, 3))
layer = MaybeApply(rate=1.0, layer=ZeroOut())
outputs = layer(dummy_inputs)
self.assertAllEqual(outputs, tf.zeros_like(dummy_inputs))
def test_can_modify_label(self):
dummy_inputs = self.rng.uniform(shape=(32, 224, 224, 3))
dummy_labels = tf.ones(shape=(32, 2))
layer = MaybeApply(rate=1.0, layer=ZeroOut())
outputs = layer({"images": dummy_inputs, "labels": dummy_labels})
self.assertAllEqual(outputs["labels"], tf.zeros_like(dummy_labels))
def test_works_with_native_keras_layers(self):
dummy_inputs = self.rng.uniform(shape=(32, 224, 224, 3))
zero_out = tf.keras.layers.Lambda(lambda x: {"images": 0 * x["images"]})
layer = MaybeApply(rate=1.0, layer=zero_out)
outputs = layer(dummy_inputs)
self.assertAllEqual(outputs, tf.zeros_like(dummy_inputs))
def test_works_with_xla(self):
dummy_inputs = self.rng.uniform(shape=(32, 224, 224, 3))
# auto_vectorize=True will crash XLA
layer = MaybeApply(rate=0.5, layer=ZeroOut(), auto_vectorize=False)
@tf.function(jit_compile=True)
def apply(x):
return layer(x)
apply(dummy_inputs)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.layers.preprocessing.base_image_augmentation_layer import (
BaseImageAugmentationLayer,
)
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class MixUp(BaseImageAugmentationLayer):
"""MixUp implements the MixUp data augmentation technique.
Args:
alpha: Float between 0 and 1. Inverse scale parameter for the gamma
distribution. This controls the shape of the distribution from which the
smoothing values are sampled. Defaults 0.2, which is a recommended value
when training an imagenet1k classification model.
seed: Integer. Used to create a random seed.
References:
- [MixUp paper](https://arxiv.org/abs/1710.09412).
- [MixUp for Object Detection paper](https://arxiv.org/pdf/1902.04103).
Sample usage:
```python
(images, labels), _ = tf.keras.datasets.cifar10.load_data()
mixup = keras_cv.layers.preprocessing.MixUp(10)
augmented_images, updated_labels = mixup({'images': images, 'labels': labels})
# output == {'images': updated_images, 'labels': updated_labels}
```
"""
def __init__(self, alpha=0.2, seed=None, **kwargs):
super().__init__(seed=seed, **kwargs)
self.alpha = alpha
self.seed = seed
def _sample_from_beta(self, alpha, beta, shape):
sample_alpha = tf.random.gamma(
shape, 1.0, beta=alpha, seed=self._random_generator.make_legacy_seed()
)
sample_beta = tf.random.gamma(
shape, 1.0, beta=beta, seed=self._random_generator.make_legacy_seed()
)
return sample_alpha / (sample_alpha + sample_beta)
def _batch_augment(self, inputs):
self._validate_inputs(inputs)
images = inputs.get("images", None)
labels = inputs.get("labels", None)
bounding_boxes = inputs.get("bounding_boxes", None)
images, lambda_sample, permutation_order = self._mixup(images)
if labels is not None:
labels = self._update_labels(labels, lambda_sample, permutation_order)
inputs["labels"] = labels
if bounding_boxes is not None:
bounding_boxes = self._update_bounding_boxes(
bounding_boxes, permutation_order
)
inputs["bounding_boxes"] = bounding_boxes
inputs["images"] = images
return inputs
def _augment(self, inputs):
raise ValueError(
"MixUp received a single image to `call`. The layer relies on "
"combining multiple examples, and as such will not behave as "
"expected. Please call the layer with 2 or more samples."
)
def _mixup(self, images):
batch_size = tf.shape(images)[0]
permutation_order = tf.random.shuffle(tf.range(0, batch_size), seed=self.seed)
lambda_sample = self._sample_from_beta(self.alpha, self.alpha, (batch_size,))
lambda_sample = tf.reshape(lambda_sample, [-1, 1, 1, 1])
mixup_images = tf.gather(images, permutation_order)
images = lambda_sample * images + (1.0 - lambda_sample) * mixup_images
return images, tf.squeeze(lambda_sample), permutation_order
def _update_labels(self, labels, lambda_sample, permutation_order):
labels_for_mixup = tf.gather(labels, permutation_order)
lambda_sample = tf.reshape(lambda_sample, [-1, 1])
labels = lambda_sample * labels + (1.0 - lambda_sample) * labels_for_mixup
return labels
def _update_bounding_boxes(self, bounding_boxes, permutation_order):
boxes_for_mixup = tf.gather(bounding_boxes, permutation_order)
bounding_boxes = tf.concat([bounding_boxes, boxes_for_mixup], axis=1)
return bounding_boxes
def _validate_inputs(self, inputs):
images = inputs.get("images", None)
labels = inputs.get("labels", None)
bounding_boxes = inputs.get("bounding_boxes", None)
if images is None or (labels is None and bounding_boxes is None):
raise ValueError(
"MixUp expects inputs in a dictionary with format "
'{"images": images, "labels": labels}. or'
'{"images": images, "bounding_boxes": bounding_boxes}'
f"Got: inputs = {inputs}"
)
if labels is not None and not labels.dtype.is_floating:
raise ValueError(
f"MixUp received labels with type {labels.dtype}. "
"Labels must be of type float."
)
def get_config(self):
config = {
"alpha": self.alpha,
"seed": self.seed,
}
base_config = super().get_config()
return dict(list(base_config.items()) + list(config.items()))
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.layers.preprocessing.mix_up import MixUp
classes = 10
class MixUpTest(tf.test.TestCase):
def test_return_shapes(self):
xs = tf.ones((2, 512, 512, 3))
# randomly sample labels
ys_labels = tf.random.categorical(tf.math.log([[0.5, 0.5]]), 2)
ys_labels = tf.squeeze(ys_labels)
ys_labels = tf.one_hot(ys_labels, classes)
# randomly sample bounding boxes
ys_bounding_boxes = tf.random.uniform((2, 3, 5), 0, 1)
layer = MixUp()
# mixup on labels
outputs = layer(
{"images": xs, "labels": ys_labels, "bounding_boxes": ys_bounding_boxes}
)
xs, ys_labels, ys_bounding_boxes = (
outputs["images"],
outputs["labels"],
outputs["bounding_boxes"],
)
self.assertEqual(xs.shape, [2, 512, 512, 3])
self.assertEqual(ys_labels.shape, [2, 10])
self.assertEqual(ys_bounding_boxes.shape, [2, 6, 5])
def test_mix_up_call_results(self):
xs = tf.cast(
tf.stack(
[2 * tf.ones((4, 4, 3)), tf.ones((4, 4, 3))],
axis=0,
),
tf.float32,
)
ys = tf.one_hot(tf.constant([0, 1]), 2)
layer = MixUp()
outputs = layer({"images": xs, "labels": ys})
xs, ys = outputs["images"], outputs["labels"]
# None of the individual values should still be close to 1 or 0
self.assertNotAllClose(xs, 1.0)
self.assertNotAllClose(xs, 2.0)
# No labels should still be close to their originals
self.assertNotAllClose(ys, 1.0)
self.assertNotAllClose(ys, 0.0)
def test_in_tf_function(self):
xs = tf.cast(
tf.stack(
[2 * tf.ones((4, 4, 3)), tf.ones((4, 4, 3))],
axis=0,
),
tf.float32,
)
ys = tf.one_hot(tf.constant([0, 1]), 2)
layer = MixUp()
@tf.function
def augment(x, y):
return layer({"images": x, "labels": y})
outputs = augment(xs, ys)
xs, ys = outputs["images"], outputs["labels"]
# None of the individual values should still be close to 1 or 0
self.assertNotAllClose(xs, 1.0)
self.assertNotAllClose(xs, 2.0)
# No labels should still be close to their originals
self.assertNotAllClose(ys, 1.0)
self.assertNotAllClose(ys, 0.0)
def test_image_input_only(self):
xs = tf.cast(
tf.stack([2 * tf.ones((100, 100, 1)), tf.ones((100, 100, 1))], axis=0),
tf.float32,
)
layer = MixUp()
with self.assertRaisesRegexp(ValueError, "expects inputs in a dictionary"):
_ = layer(xs)
def test_single_image_input(self):
xs = tf.ones((512, 512, 3))
ys = tf.one_hot(tf.constant([1]), 2)
inputs = {"images": xs, "labels": ys}
layer = MixUp()
with self.assertRaisesRegexp(
ValueError, "MixUp received a single image to `call`"
):
_ = layer(inputs)
def test_int_labels(self):
xs = tf.ones((2, 512, 512, 3))
ys = tf.one_hot(tf.constant([1, 0]), 2, dtype=tf.int32)
inputs = {"images": xs, "labels": ys}
layer = MixUp()
with self.assertRaisesRegexp(ValueError, "MixUp received labels with type"):
_ = layer(inputs)
def test_image_input(self):
xs = tf.ones((2, 512, 512, 3))
layer = MixUp()
with self.assertRaisesRegexp(
ValueError, "MixUp expects inputs in a dictionary with format"
):
_ = layer(xs)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv import bounding_box
from keras_cv.layers.preprocessing.base_image_augmentation_layer import (
BaseImageAugmentationLayer,
)
from keras_cv.utils import preprocessing
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class Mosaic(BaseImageAugmentationLayer):
"""Mosaic implements the mosaic data augmentation technique.
Mosaic data augmentation first takes 4 images from the batch and makes a grid.
After that based on the offset, a crop is taken to form the mosaic image. Labels
are in the same ratio as the the area of their images in the output image. Bounding
boxes are translated according to the position of the 4 images.
Args:
offset: A tuple of two floats, a single float or `keras_cv.FactorSampler`.
`offset` is used to determine the offset of the mosaic center from the
top-left corner of the mosaic. If a tuple is used, the x and y coordinates
of the mosaic center are sampled between the two values for every image
augmented. If a single float is used, a value between `0.0` and the passed
float is sampled. In order to ensure the value is always the same, please
pass a tuple with two identical floats: `(0.5, 0.5)`. Defaults to
(0.25, 0.75).
bounding_box_format: a case-insensitive string (for example, "xyxy") to be
passed if bounding boxes are being augmented by this layer.
Each bounding box is defined by at least these 4 values. The inputs
may contain additional information such as classes and confidence after
these 4 values but these values will be ignored and returned as is. For
detailed information on the supported formats, see the
[KerasCV bounding box documentation](https://keras.io/api/keras_cv/bounding_box/formats/).
Defualts to None.
seed: Integer. Used to create a random seed.
References:
- [Yolov4 paper](https://arxiv.org/pdf/2004.10934).
- [Yolov5 implementation](https://github.com/ultralytics/yolov5).
- [YoloX implementation](https://github.com/Megvii-BaseDetection/YOLOX)
Sample usage:
```python
(images, labels), _ = tf.keras.datasets.cifar10.load_data()
labels = tf.one_hot(labels,10)
labels = tf.cast(tf.squeeze(labels), tf.float32)
mosaic = keras_cv.layers.preprocessing.Mosaic()
output = mosaic({'images': images, 'labels': labels})
# output == {'images': updated_images, 'labels': updated_labels}
```
"""
def __init__(
self, offset=(0.25, 0.75), bounding_box_format=None, seed=None, **kwargs
):
super().__init__(seed=seed, **kwargs)
self.offset = offset
self.bounding_box_format = bounding_box_format
self.center_sampler = preprocessing.parse_factor(offset)
self.seed = seed
def _batch_augment(self, inputs):
self._validate_inputs(inputs)
images = inputs.get("images", None)
labels = inputs.get("labels", None)
bounding_boxes = inputs.get("bounding_boxes", None)
batch_size = tf.shape(images)[0]
# pick 3 indices for every batch to create the mosaic output with.
permutation_order = tf.random.uniform(
(batch_size, 3),
minval=0,
maxval=batch_size,
dtype=tf.int32,
seed=self._random_generator.make_legacy_seed(),
)
# concatenate the batches with permutation order to get all 4 images of the mosaic
permutation_order = tf.concat(
[tf.expand_dims(tf.range(batch_size), axis=-1), permutation_order], axis=-1
)
input_height, input_width, _ = images.shape[1:]
mosaic_centers_x = (
self.center_sampler(tf.expand_dims(batch_size, axis=0)) * input_width
)
mosaic_centers_y = (
self.center_sampler(shape=tf.expand_dims(batch_size, axis=0)) * input_height
)
mosaic_centers = tf.stack((mosaic_centers_x, mosaic_centers_y), axis=-1)
# return the mosaics
images = tf.vectorized_map(
lambda index: self._update_image(
images, permutation_order, mosaic_centers, index
),
tf.range(batch_size),
)
if labels is not None:
labels = tf.vectorized_map(
lambda index: self._update_label(
images, labels, permutation_order, mosaic_centers, index
),
tf.range(batch_size),
)
inputs["labels"] = labels
if bounding_boxes is not None:
# values to translate the boxes by in the mosaic image
translate_x = tf.stack(
[
mosaic_centers_x - input_width,
mosaic_centers_x,
mosaic_centers_x - input_width,
mosaic_centers_x,
],
axis=-1,
)
translate_y = tf.stack(
[
mosaic_centers_y - input_height,
mosaic_centers_y - input_height,
mosaic_centers_y,
mosaic_centers_y,
],
axis=-1,
)
bounding_boxes = tf.vectorized_map(
lambda index: self._update_bounding_box(
images,
bounding_boxes,
permutation_order,
translate_x,
translate_y,
index,
),
tf.range(batch_size),
)
inputs["bounding_boxes"] = bounding_boxes
inputs["images"] = images
return inputs
def _augment(self, inputs):
raise ValueError(
"Mosaic received a single image to `call`. The layer relies on "
"combining multiple examples, and as such will not behave as "
"expected. Please call the layer with 4 or more samples."
)
def _update_image(self, images, permutation_order, mosaic_centers, index):
# forms mosaic for one image from the batch
input_height, input_width, _ = images.shape[1:]
mosaic_images = tf.gather(images, permutation_order[index])
top = tf.concat([mosaic_images[0], mosaic_images[1]], axis=1)
bottom = tf.concat([mosaic_images[2], mosaic_images[3]], axis=1)
output = tf.concat([top, bottom], axis=0)
# cropping coordinates for the mosaic
x1 = (input_width - mosaic_centers[index][0]) / (input_width * 2 - 1)
y1 = (input_height - mosaic_centers[index][1]) / (input_height * 2 - 1)
x2 = x1 + (input_width) / (input_width * 2 - 1)
y2 = y1 + (input_height) / (input_height * 2 - 1)
# helps avoid retracing caused by slicing, inspired by RRC implementation
output = tf.image.crop_and_resize(
tf.expand_dims(output, axis=0),
[[y1, x1, y2, x2]],
[0],
[input_height, input_width],
)
return tf.squeeze(output)
def _update_label(self, images, labels, permutation_order, mosaic_centers, index):
# updates labels for one output mosaic
input_height, input_width, _ = images.shape[1:]
labels_for_mosaic = tf.gather(labels, permutation_order[index])
center_x = mosaic_centers[index][0]
center_y = mosaic_centers[index][1]
area = input_height * input_width
# labels are in the same ratio as the area of the images
top_left_ratio = (center_x * center_y) / area
top_right_ratio = ((input_width - center_x) * center_y) / area
bottom_left_ratio = (center_x * (input_height - center_y)) / area
bottom_right_ratio = (
(input_width - center_x) * (input_height - center_y)
) / area
label = (
labels_for_mosaic[0] * top_left_ratio
+ labels_for_mosaic[1] * top_right_ratio
+ labels_for_mosaic[2] * bottom_left_ratio
+ labels_for_mosaic[3] * bottom_right_ratio
)
return label
def _update_bounding_box(
self, images, bounding_boxes, permutation_order, translate_x, translate_y, index
):
# updates bboxes for one output mosaic
bounding_boxes = bounding_box.convert_format(
bounding_boxes,
source=self.bounding_box_format,
target="xyxy",
images=images,
)
boxes_for_mosaic = tf.gather(bounding_boxes, permutation_order[index])
boxes_for_mosaic, rest = tf.split(
boxes_for_mosaic, [4, bounding_boxes.shape[-1] - 4], axis=-1
)
# stacking translate values such that the shape is (4, 1, 4) or (num_images, broadcast dim, coordinates)
translate_values = tf.stack(
[
translate_x[index],
translate_y[index],
translate_x[index],
translate_y[index],
],
axis=-1,
)
translate_values = tf.expand_dims(translate_values, axis=1)
# translating boxes
boxes_for_mosaic = boxes_for_mosaic + translate_values
boxes_for_mosaic = tf.concat([boxes_for_mosaic, rest], axis=-1)
boxes_for_mosaic = tf.reshape(boxes_for_mosaic, [-1, bounding_boxes.shape[-1]])
boxes_for_mosaic = bounding_box.clip_to_image(
boxes_for_mosaic,
bounding_box_format="xyxy",
images=images[index],
)
boxes_for_mosaic = bounding_box.convert_format(
boxes_for_mosaic,
source="xyxy",
target=self.bounding_box_format,
images=images[index],
)
return boxes_for_mosaic
def _validate_inputs(self, inputs):
images = inputs.get("images", None)
labels = inputs.get("labels", None)
bounding_boxes = inputs.get("bounding_boxes", None)
if images is None or (labels is None and bounding_boxes is None):
raise ValueError(
"Mosaic expects inputs in a dictionary with format "
'{"images": images, "labels": labels}. or'
'{"images": images, "bounding_boxes": bounding_boxes}'
f"Got: inputs = {inputs}"
)
if labels is not None and not labels.dtype.is_floating:
raise ValueError(
f"Mosaic received labels with type {labels.dtype}. "
"Labels must be of type float."
)
if bounding_boxes is not None and self.bounding_box_format is None:
raise ValueError(
"Mosaic received bounding boxes but no bounding_box_format. "
"Please pass a bounding_box_format from the supported list."
)
def get_config(self):
config = {
"offset": self.offset,
"bounding_box_format": self.bounding_box_format,
"seed": self.seed,
}
base_config = super().get_config()
return dict(list(base_config.items()) + list(config.items()))
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
import keras_cv
from keras_cv.layers.preprocessing.mosaic import Mosaic
classes = 10
class MosaicTest(tf.test.TestCase):
def DISABLED_test_integration_retina_net(self):
train_ds, train_dataset_info = keras_cv.datasets.pascal_voc.load(
bounding_box_format="xywh", split="train", batch_size=9
)
mosaic = keras_cv.layers.Mosaic(bounding_box_format="xywh")
train_ds = train_ds.map(mosaic, num_parallel_calls=tf.data.AUTOTUNE)
train_ds = train_ds.map(
lambda inputs: (inputs["images"], inputs["bounding_boxes"]),
num_parallel_calls=tf.data.AUTOTUNE,
)
model = keras_cv.models.RetinaNet(
classes=20,
bounding_box_format="xywh",
backbone="resnet50",
backbone_weights="imagenet",
include_rescaling=True,
evaluate_train_time_metrics=False,
)
model.backbone.trainable = False
optimizer = tf.optimizers.SGD(global_clipnorm=10.0)
model.compile(
run_eagerly=True,
classification_loss=keras_cv.losses.FocalLoss(
from_logits=True, reduction="none"
),
box_loss=keras_cv.losses.SmoothL1Loss(l1_cutoff=1.0, reduction="none"),
optimizer=optimizer,
)
callbacks = [
tf.keras.callbacks.ReduceLROnPlateau(patience=5),
tf.keras.callbacks.TerminateOnNaN(),
]
history = model.fit(train_ds, epochs=20, callbacks=callbacks)
for loss in history.history["loss"]:
self.assertFalse(tf.math.is_nan(loss))
def test_return_shapes(self):
xs = tf.ones((2, 512, 512, 3))
# randomly sample labels
ys_labels = tf.random.categorical(tf.math.log([[0.5, 0.5]]), 2)
ys_labels = tf.squeeze(ys_labels)
ys_labels = tf.one_hot(ys_labels, classes)
# randomly sample bounding boxes
ys_bounding_boxes = tf.random.uniform((2, 3, 5), 0, 1)
layer = Mosaic(bounding_box_format="xywh")
# mosaic on labels
outputs = layer(
{"images": xs, "labels": ys_labels, "bounding_boxes": ys_bounding_boxes}
)
xs, ys_labels, ys_bounding_boxes = (
outputs["images"],
outputs["labels"],
outputs["bounding_boxes"],
)
self.assertEqual(xs.shape, [2, 512, 512, 3])
self.assertEqual(ys_labels.shape, [2, 10])
self.assertEqual(ys_bounding_boxes.shape, [2, 12, 5])
def test_in_tf_function(self):
xs = tf.cast(
tf.stack(
[2 * tf.ones((4, 4, 3)), tf.ones((4, 4, 3))],
axis=0,
),
tf.float32,
)
ys = tf.one_hot(tf.constant([0, 1]), 2)
layer = Mosaic()
@tf.function
def augment(x, y):
return layer({"images": x, "labels": y})
outputs = augment(xs, ys)
xs, ys = outputs["images"], outputs["labels"]
self.assertEqual(xs.shape, [2, 4, 4, 3])
self.assertEqual(ys.shape, [2, 2])
def test_image_input_only(self):
xs = tf.cast(
tf.stack([2 * tf.ones((100, 100, 1)), tf.ones((100, 100, 1))], axis=0),
tf.float32,
)
layer = Mosaic()
with self.assertRaisesRegexp(ValueError, "expects inputs in a dictionary"):
_ = layer(xs)
def test_single_image_input(self):
xs = tf.ones((512, 512, 3))
ys = tf.one_hot(tf.constant([1]), 2)
inputs = {"images": xs, "labels": ys}
layer = Mosaic()
with self.assertRaisesRegexp(
ValueError, "Mosaic received a single image to `call`"
):
_ = layer(inputs)
def test_int_labels(self):
xs = tf.ones((2, 512, 512, 3))
ys = tf.one_hot(tf.constant([1, 0]), 2, dtype=tf.int32)
inputs = {"images": xs, "labels": ys}
layer = Mosaic()
with self.assertRaisesRegexp(ValueError, "Mosaic received labels with type"):
_ = layer(inputs)
def test_image_input(self):
xs = tf.ones((2, 512, 512, 3))
layer = Mosaic()
with self.assertRaisesRegexp(
ValueError, "Mosaic expects inputs in a dictionary with format"
):
_ = layer(xs)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.layers.preprocessing.base_image_augmentation_layer import (
BaseImageAugmentationLayer,
)
from keras_cv.utils.preprocessing import transform_value_range
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class Posterization(BaseImageAugmentationLayer):
"""Reduces the number of bits for each color channel.
References:
- [AutoAugment: Learning Augmentation Policies from Data](
https://arxiv.org/abs/1805.09501
)
- [RandAugment: Practical automated data augmentation with a reduced search space](
https://arxiv.org/abs/1909.13719
)
Args:
value_range: a tuple or a list of two elements. The first value represents
the lower bound for values in passed images, the second represents the
upper bound. Images passed to the layer should have values within
`value_range`. Defaults to `(0, 255)`.
bits: integer. The number of bits to keep for each channel. Must be a value
between 1-8.
Usage:
```python
(images, labels), _ = tf.keras.datasets.cifar10.load_data()
print(images[0, 0, 0])
# [59 62 63]
# Note that images are Tensors with values in the range [0, 255] and uint8 dtype
posterization = Posterization(bits=4, value_range=[0, 255])
images = posterization(images)
print(images[0, 0, 0])
# [48., 48., 48.]
# NOTE: the layer will output values in tf.float32, regardless of input dtype.
```
Call arguments:
inputs: input tensor in two possible formats:
1. single 3D (HWC) image or 4D (NHWC) batch of images.
2. A dict of tensors where the images are under `"images"` key.
"""
def __init__(self, value_range, bits, **kwargs):
super().__init__(**kwargs)
if not len(value_range) == 2:
raise ValueError(
"value_range must be a sequence of two elements. "
f"Received: {value_range}"
)
if not (0 < bits < 9):
raise ValueError(f"Bits value must be between 1-8. Received bits: {bits}.")
self._shift = 8 - bits
self._value_range = value_range
def augment_image(self, image, **kwargs):
image = transform_value_range(
images=image,
original_range=self._value_range,
target_range=[0, 255],
)
image = tf.cast(image, tf.uint8)
image = self._posterize(image)
image = tf.cast(image, self.compute_dtype)
return transform_value_range(
images=image,
original_range=[0, 255],
target_range=self._value_range,
)
def augment_bounding_boxes(self, bounding_boxes, **kwargs):
return bounding_boxes
def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs):
return segmentation_mask
def _batch_augment(self, inputs):
# Skip the use of vectorized_map or map_fn as the implementation is already
# vectorized
return self._augment(inputs)
def _posterize(self, image):
return tf.bitwise.left_shift(
tf.bitwise.right_shift(image, self._shift), self._shift
)
def augment_label(self, label, transformation=None, **kwargs):
return label
def get_config(self):
config = {"bits": 8 - self._shift, "value_range": self._value_range}
base_config = super().get_config()
return dict(list(base_config.items()) + list(config.items()))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment