Commit 0016b0a7 authored by sunxx1's avatar sunxx1
Browse files

Merge branch 'dtk22.04' into 'main'

Dtk22.04

See merge request dcutoolkit/deeplearing/dlexamples_new!49
parents 17bc28d5 7a382d5d
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import tensorflow as tf
from keras_cv.layers.preprocessing.posterization import Posterization
class PosterizationTest(tf.test.TestCase):
rng = tf.random.Generator.from_non_deterministic_state()
def test_raises_error_on_invalid_bits_parameter(self):
invalid_values = [-1, 0, 9, 24]
for value in invalid_values:
with self.assertRaises(ValueError):
Posterization(bits=value, value_range=[0, 1])
def test_raises_error_on_invalid_value_range(self):
invalid_ranges = [(1,), [1, 2, 3]]
for value_range in invalid_ranges:
with self.assertRaises(ValueError):
Posterization(bits=1, value_range=value_range)
def test_single_image(self):
bits = self._get_random_bits()
dummy_input = self.rng.uniform(shape=(224, 224, 3), maxval=256)
expected_output = self._calc_expected_output(dummy_input, bits=bits)
layer = Posterization(bits=bits, value_range=[0, 255])
output = layer(dummy_input)
self.assertAllEqual(output, expected_output)
def _get_random_bits(self):
return int(self.rng.uniform(shape=(), minval=1, maxval=9, dtype=tf.int32))
def test_single_image_rescaled(self):
bits = self._get_random_bits()
dummy_input = self.rng.uniform(shape=(224, 224, 3), maxval=1.0)
expected_output = self._calc_expected_output(dummy_input * 255, bits=bits) / 255
layer = Posterization(bits=bits, value_range=[0, 1])
output = layer(dummy_input)
self.assertAllClose(output, expected_output)
def test_batched_input(self):
bits = self._get_random_bits()
dummy_input = self.rng.uniform(shape=(2, 224, 224, 3), maxval=256)
expected_output = []
for image in dummy_input:
expected_output.append(self._calc_expected_output(image, bits=bits))
expected_output = tf.stack(expected_output)
layer = Posterization(bits=bits, value_range=[0, 255])
output = layer(dummy_input)
self.assertAllEqual(output, expected_output)
def test_works_with_xla(self):
dummy_input = self.rng.uniform(shape=(2, 224, 224, 3))
layer = Posterization(bits=4, value_range=[0, 1])
@tf.function(jit_compile=True)
def apply(x):
return layer(x)
apply(dummy_input)
@staticmethod
def _calc_expected_output(image, bits):
"""Posterization in numpy, based on Albumentations:
The algorithm is basically:
1. create a lookup table of all possible input pixel values to pixel values
after posterize
2. map each pixel in the input to created lookup table.
Source:
https://github.com/albumentations-team/albumentations/blob/89a675cbfb2b76f6be90e7049cd5211cb08169a5/albumentations/augmentations/functional.py#L407
"""
dtype = image.dtype
image = tf.cast(image, tf.uint8)
lookup_table = np.arange(0, 256, dtype=np.uint8)
mask = ~np.uint8(2 ** (8 - bits) - 1)
lookup_table &= mask
return tf.cast(lookup_table[image], dtype)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv import core
from keras_cv.layers import preprocessing as cv_preprocessing
from keras_cv.layers.preprocessing.random_augmentation_pipeline import (
RandomAugmentationPipeline,
)
from keras_cv.utils import preprocessing as preprocessing_utils
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class RandAugment(RandomAugmentationPipeline):
"""RandAugment performs the Rand Augment operation on input images.
This layer can be thought of as an all in one image augmentation layer. The policy
implemented by this layer has been benchmarked extensively and is effective on a
wide variety of datasets.
The policy operates as follows:
For each augmentation in the range `[0, augmentations_per_image]`,
the policy selects a random operation from a list of operations.
It then samples a random number and if that number is less than
`rate` applies it to the given image.
References:
- [RandAugment](https://arxiv.org/abs/1909.13719)
Args:
value_range: the range of values the incoming images will have.
Represented as a two number tuple written [low, high].
This is typically either `[0, 1]` or `[0, 255]` depending
on how your preprocessing pipeline is setup.
augmentations_per_image: the number of layers to use in the rand augment policy.
Defaults to `3`.
magnitude: magnitude is the mean of the normal distribution used to sample the
magnitude used for each data augmentation. magnitude should
be a float in the range `[0, 1]`. A magnitude of `0` indicates that the
augmentations are as weak as possible (not recommended), while a value of
`1.0` implies use of the strongest possible augmentation. All magnitudes
are clipped to the range `[0, 1]` after sampling. Defaults to `0.5`.
magnitude_stddev: the standard deviation to use when drawing values
for the perturbations. Keep in mind magnitude will still be clipped to the
range `[0, 1]` after samples are drawn from the normal distribution.
Defaults to `0.15`.
rate: the rate at which to apply each augmentation. This parameter is applied
on a per-distortion layer, per image. Should be in the range `[0, 1]`.
To reproduce the original RandAugment paper results, set this to `10/11`.
The original `RandAugment` paper includes an Identity transform. By setting
the rate to 10/11 in our implementation, the behavior is identical to
sampling an Identity augmentation 10/11th of the time.
Defaults to `1.0`.
geometric: whether or not to include geometric augmentations. This should be
set to False when performing object detection. Defaults to True.
Usage:
```python
(x_test, y_test), _ = tf.keras.datasets.cifar10.load_data()
rand_augment = keras_cv.layers.RandAugment(
value_range=(0, 255), augmentations_per_image=3, magnitude=0.5
)
x_test = rand_augment(x_test)
```
"""
def __init__(
self,
value_range,
augmentations_per_image=3,
magnitude=0.5,
magnitude_stddev=0.15,
rate=10 / 11,
geometric=True,
seed=None,
**kwargs,
):
# As an optimization RandAugment makes all internal layers use (0, 255) while
# and we handle range transformation at the _augment level.
if magnitude < 0.0 or magnitude > 1:
raise ValueError(
f"`magnitude` must be in the range [0, 1], got `magnitude={magnitude}`"
)
if magnitude_stddev < 0.0 or magnitude_stddev > 1:
raise ValueError(
"`magnitude_stddev` must be in the range [0, 1], got "
f"`magnitude_stddev={magnitude}`"
)
super().__init__(
layers=RandAugment.get_standard_policy(
(0, 255), magnitude, magnitude_stddev, geometric=geometric, seed=seed
),
augmentations_per_image=augmentations_per_image,
rate=rate,
**kwargs,
seed=seed,
)
self.magnitude = float(magnitude)
self.value_range = value_range
self.seed = seed
self.geometric = geometric
self.magnitude_stddev = float(magnitude_stddev)
def _augment(self, sample):
sample["images"] = preprocessing_utils.transform_value_range(
sample["images"], self.value_range, (0, 255)
)
result = super()._augment(sample)
result["images"] = preprocessing_utils.transform_value_range(
result["images"], (0, 255), self.value_range
)
result["images"]
return result
@staticmethod
def get_standard_policy(
value_range, magnitude, magnitude_stddev, geometric=True, seed=None
):
policy = create_rand_augment_policy(magnitude, magnitude_stddev)
auto_contrast = cv_preprocessing.AutoContrast(
**policy["auto_contrast"], value_range=value_range, seed=seed
)
equalize = cv_preprocessing.Equalization(
**policy["equalize"], value_range=value_range, seed=seed
)
solarize = cv_preprocessing.Solarization(
**policy["solarize"], value_range=value_range, seed=seed
)
color = cv_preprocessing.RandomColorDegeneration(**policy["color"], seed=seed)
contrast = cv_preprocessing.RandomContrast(**policy["contrast"], seed=seed)
brightness = cv_preprocessing.RandomBrightness(
**policy["brightness"], value_range=value_range, seed=seed
)
layers = [
auto_contrast,
equalize,
solarize,
color,
contrast,
brightness,
]
if geometric:
shear_x = cv_preprocessing.RandomShear(**policy["shear_x"], seed=seed)
shear_y = cv_preprocessing.RandomShear(**policy["shear_y"], seed=seed)
translate_x = cv_preprocessing.RandomTranslation(
**policy["translate_x"], seed=seed
)
translate_y = cv_preprocessing.RandomTranslation(
**policy["translate_y"], seed=seed
)
layers += [shear_x, shear_y, translate_x, translate_y]
return layers
def get_config(self):
config = super().get_config()
config.update(
{
"value_range": self.value_range,
"augmentations_per_image": self.augmentations_per_image,
"magnitude": self.magnitude,
"magnitude_stddev": self.magnitude_stddev,
"rate": self.rate,
"geometric": self.geometric,
"seed": self.seed,
}
)
# layers is recreated in the constructor
del config["layers"]
return config
def auto_contrast_policy(magnitude, magnitude_stddev):
return {}
def equalize_policy(magnitude, magnitude_stddev):
return {}
def solarize_policy(magnitude, magnitude_stddev):
# We cap additions at 110, because if we add more than 110 we will be nearly
# nullifying the information contained in the image, making the model train on noise
maximum_addition_value = 110
addition_factor = core.NormalFactorSampler(
mean=magnitude * maximum_addition_value,
stddev=magnitude_stddev * maximum_addition_value,
min_value=0,
max_value=maximum_addition_value,
)
threshold_factor = core.NormalFactorSampler(
mean=(255 - (magnitude * 255)),
stddev=(magnitude_stddev * 255),
min_value=0,
max_value=255,
)
return {"addition_factor": addition_factor, "threshold_factor": threshold_factor}
def color_policy(magnitude, magnitude_stddev):
factor = core.NormalFactorSampler(
mean=magnitude,
stddev=magnitude_stddev,
min_value=0,
max_value=1,
)
return {"factor": factor}
def contrast_policy(magnitude, magnitude_stddev):
# TODO(lukewood): should we integrate RandomContrast with `factor`?
# RandomContrast layer errors when factor=0
factor = max(magnitude, 0.001)
return {"factor": factor}
def brightness_policy(magnitude, magnitude_stddev):
# TODO(lukewood): should we integrate RandomBrightness with `factor`?
return {"factor": magnitude}
def shear_x_policy(magnitude, magnitude_stddev):
factor = core.NormalFactorSampler(
mean=magnitude,
stddev=magnitude_stddev,
min_value=0,
max_value=1,
)
return {"x_factor": factor, "y_factor": 0}
def shear_y_policy(magnitude, magnitude_stddev):
factor = core.NormalFactorSampler(
mean=magnitude,
stddev=magnitude_stddev,
min_value=0,
max_value=1,
)
return {"x_factor": 0, "y_factor": factor}
def translate_x_policy(magnitude, magnitude_stddev):
# TODO(lukewood): should we integrate RandomTranslation with `factor`?
return {"width_factor": magnitude, "height_factor": 0}
def translate_y_policy(magnitude, magnitude_stddev):
# TODO(lukewood): should we integrate RandomTranslation with `factor`?
return {"width_factor": 0, "height_factor": magnitude}
POLICY_PAIRS = {
"auto_contrast": auto_contrast_policy,
"equalize": equalize_policy,
"solarize": solarize_policy,
"color": color_policy,
"contrast": contrast_policy,
"brightness": brightness_policy,
"shear_x": shear_x_policy,
"shear_y": shear_y_policy,
"translate_x": translate_x_policy,
"translate_y": translate_y_policy,
}
def create_rand_augment_policy(magnitude, magnitude_stddev):
result = {}
for name, policy_fn in POLICY_PAIRS.items():
result[name] = policy_fn(magnitude, magnitude_stddev)
return result
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from absl.testing import parameterized
from keras_cv import layers
class RandAugmentTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.named_parameters(
("0", 0),
("20", 0.2),
("55", 0.55),
("10", 1.0),
)
def test_runs_with_magnitude(self, magnitude):
rand_augment = layers.RandAugment(
value_range=(0, 255), rate=0.5, magnitude=magnitude
)
xs = tf.ones((2, 512, 512, 3))
ys = rand_augment(xs)
self.assertEqual(ys.shape, (2, 512, 512, 3))
@parameterized.named_parameters(
("0_255", 0, 255),
("neg_1_1", -1, 1),
("0_1", 0, 1),
)
def test_runs_with_value_range(self, low, high):
rand_augment = layers.RandAugment(
augmentations_per_image=3, magnitude=0.5, rate=1.0, value_range=(low, high)
)
xs = tf.random.uniform((2, 512, 512, 3), low, high, dtype=tf.float32)
ys = rand_augment(xs)
self.assertTrue(tf.math.reduce_all(tf.logical_and(ys >= low, ys <= high)))
@parameterized.named_parameters(
("float32", tf.float32),
("int32", tf.int32),
("uint8", tf.uint8),
)
def test_runs_with_dtype_input(self, dtype):
rand_augment = layers.RandAugment(value_range=(0, 255))
xs = tf.ones((2, 512, 512, 3), dtype=dtype)
ys = rand_augment(xs)
self.assertEqual(ys.shape, (2, 512, 512, 3))
@parameterized.named_parameters(
("0_255", 0, 255),
("neg1_1", -1, 1),
("0_1", 0, 1),
)
def test_standard_policy_respects_value_range(self, lower, upper):
my_layers = layers.RandAugment.get_standard_policy(
value_range=(lower, upper), magnitude=1.0, magnitude_stddev=0.2
)
rand_augment = layers.RandomAugmentationPipeline(
layers=my_layers, augmentations_per_image=3
)
xs = tf.random.uniform((2, 512, 512, 3), lower, upper, dtype=tf.float32)
ys = rand_augment(xs)
self.assertLessEqual(tf.math.reduce_max(ys), upper)
self.assertGreaterEqual(tf.math.reduce_min(ys), lower)
def test_runs_unbatched(self):
rand_augment = layers.RandAugment(
augmentations_per_image=3, magnitude=0.5, rate=1.0, value_range=(0, 255)
)
xs = tf.random.uniform((512, 512, 3), 0, 255, dtype=tf.float32)
ys = rand_augment(xs)
self.assertEqual(xs.shape, ys.shape)
def test_runs_no_geo(self):
rand_augment = layers.RandAugment(
augmentations_per_image=2,
magnitude=0.5,
rate=1.0,
geometric=False,
value_range=(0, 255),
)
self.assertFalse(
any([isinstance(x, layers.RandomTranslation) for x in rand_augment.layers])
)
self.assertFalse(
any([isinstance(x, layers.RandomShear) for x in rand_augment.layers])
)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.layers import preprocessing
from keras_cv.layers.preprocessing.base_image_augmentation_layer import (
BaseImageAugmentationLayer,
)
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class RandomAugmentationPipeline(BaseImageAugmentationLayer):
"""RandomAugmentationPipeline constructs a pipeline based on provided arguments.
The implemented policy does the following: for each inputs provided in `call`(), the
policy first inputs a random number, if the number is < rate, the policy then
selects a random layer from the provided list of `layers`. It then calls the
`layer()` on the inputs. This is done `augmentations_per_image` times.
This layer can be used to create custom policies resembling `RandAugment` or
`AutoAugment`.
Usage:
```python
# construct a list of layers
layers = keras_cv.layers.RandAugment.get_standard_policy(
value_range=(0, 255), magnitude=0.75, magnitude_stddev=0.3
)
layers = layers[:4] # slice out some layers you don't want for whatever reason
layers = layers + [keras_cv.layers.GridMask()]
# create the pipeline.
pipeline = keras_cv.layers.RandomAugmentationPipeline(
layers=layers, augmentations_per_image=3
)
augmented_images = pipeline(images)
```
Args:
layers: a list of `keras.Layers`. These are randomly inputs during
augmentation to augment the inputs passed in `call()`. The layers passed
should subclass `BaseImageAugmentationLayer`. Passing `layers=[]`
would result in a no-op.
augmentations_per_image: the number of layers to apply to each inputs in the
`call()` method.
rate: the rate at which to apply each augmentation. This is applied on a per
augmentation bases, so if `augmentations_per_image=3` and `rate=0.5`, the
odds an image will receive no augmentations is 0.5^3, or 0.5*0.5*0.5.
auto_vectorize: whether to use `tf.vectorized_map` or `tf.map_fn` to
apply the augmentations. This offers a significant performance boost, but
can only be used if all the layers provided to the `layers` argument
support auto vectorization.
seed: Integer. Used to create a random seed.
"""
def __init__(
self,
layers,
augmentations_per_image,
rate=1.0,
auto_vectorize=False,
seed=None,
**kwargs,
):
super().__init__(**kwargs, seed=seed, force_generator=True)
self.augmentations_per_image = augmentations_per_image
self.rate = rate
self.layers = list(layers)
self.auto_vectorize = auto_vectorize
self.seed = seed
self._random_choice = preprocessing.RandomChoice(
layers=layers, auto_vectorize=auto_vectorize, seed=seed
)
def _augment(self, inputs):
if self.layers == []:
return inputs
result = inputs
for _ in range(self.augmentations_per_image):
skip_augment = self._random_generator.random_uniform(
shape=(), minval=0.0, maxval=1.0, dtype=tf.float32
)
result = tf.cond(
skip_augment > self.rate,
lambda: inputs,
lambda: self._random_choice(result),
)
return result
def get_config(self):
config = super().get_config()
config.update(
{
"augmentations_per_image": self.augmentations_per_image,
"auto_vectorize": self.auto_vectorize,
"rate": self.rate,
"layers": self.layers,
"seed": self.seed,
}
)
return config
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from absl.testing import parameterized
from keras_cv import layers
class AddOneToInputs(tf.keras.layers.Layer):
def __init__(self, **kwargs):
super().__init__(**kwargs)
def call(self, inputs):
result = inputs.copy()
result["images"] = inputs["images"] + 1
return result
class RandomAugmentationPipelineTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.named_parameters(("1", 1), ("3", 3), ("5", 5))
def test_calls_layers_augmentations_per_image_times(self, augmentations_per_image):
layer = AddOneToInputs()
pipeline = layers.RandomAugmentationPipeline(
layers=[layer], augmentations_per_image=augmentations_per_image, rate=1.0
)
xs = tf.random.uniform((2, 5, 5, 3), 0, 100, dtype=tf.float32)
os = pipeline(xs)
self.assertAllClose(xs + augmentations_per_image, os)
def test_supports_empty_layers_argument(self):
pipeline = layers.RandomAugmentationPipeline(
layers=[], augmentations_per_image=1, rate=1.0
)
xs = tf.random.uniform((2, 5, 5, 3), 0, 100, dtype=tf.float32)
os = pipeline(xs)
self.assertAllClose(xs, os)
def test_calls_layers_augmentations_in_graph(self):
layer = AddOneToInputs()
pipeline = layers.RandomAugmentationPipeline(
layers=[layer], augmentations_per_image=3, rate=1.0
)
@tf.function()
def call_pipeline(xs):
return pipeline(xs)
xs = tf.random.uniform((2, 5, 5, 3), 0, 100, dtype=tf.float32)
os = call_pipeline(xs)
self.assertAllClose(xs + 3, os)
@parameterized.named_parameters(("1", 1), ("3", 3), ("5", 5))
def test_calls_layers_augmentations_per_image_times_single_image(
self, augmentations_per_image
):
layer = AddOneToInputs()
pipeline = layers.RandomAugmentationPipeline(
layers=[layer], augmentations_per_image=augmentations_per_image, rate=1.0
)
xs = tf.random.uniform((5, 5, 3), 0, 100, dtype=tf.float32)
os = pipeline(xs)
self.assertAllClose(xs + augmentations_per_image, os)
@parameterized.named_parameters(("1", 1), ("3", 3), ("5", 5))
def test_respects_rate(self, augmentations_per_image):
layer = AddOneToInputs()
pipeline = layers.RandomAugmentationPipeline(
layers=[layer], augmentations_per_image=augmentations_per_image, rate=0.0
)
xs = tf.random.uniform((2, 5, 5, 3), 0, 100, dtype=tf.float32)
os = pipeline(xs)
self.assertAllClose(xs, os)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.layers.preprocessing.base_image_augmentation_layer import (
BaseImageAugmentationLayer,
)
from keras_cv.utils import preprocessing
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class RandomChannelShift(BaseImageAugmentationLayer):
"""Randomly shift values for each channel of the input image(s).
The input images should have values in the `[0-255]` or `[0-1]` range.
Input shape:
3D (unbatched) or 4D (batched) tensor with shape:
`(..., height, width, channels)`, in `channels_last` format.
Output shape:
3D (unbatched) or 4D (batched) tensor with shape:
`(..., height, width, channels)`, in `channels_last` format.
Args:
value_range: The range of values the incoming images will have.
Represented as a two number tuple written [low, high].
This is typically either `[0, 1]` or `[0, 255]` depending
on how your preprocessing pipeline is setup.
factor: A scalar value, or tuple/list of two floating values in
the range `[0.0, 1.0]`. If `factor` is a single value, it will
interpret as equivalent to the tuple `(0.0, factor)`. The `factor`
will sampled between its range for every image to augment.
channels: integer, the number of channels to shift. Defaults to 3 which
corresponds to an RGB shift. In some cases, there may ber more or less
channels.
seed: Integer. Used to create a random seed.
Usage:
```python
(images, labels), _ = tf.keras.datasets.cifar10.load_data()
rgb_shift = keras_cv.layers.RandomChannelShift(value_range=(0, 255), factor=0.5)
augmented_images = rgb_shift(images)
```
"""
def __init__(self, value_range, factor, channels=3, seed=None, **kwargs):
super().__init__(**kwargs, seed=seed)
self.seed = seed
self.value_range = value_range
self.channels = channels
self.factor = preprocessing.parse_factor(factor, seed=self.seed)
def get_random_transformation(
self, image=None, label=None, bounding_boxes=None, **kwargs
):
shifts = []
for _ in range(self.channels):
shifts.append(self._get_shift())
return shifts
def _get_shift(self):
invert = preprocessing.random_inversion(self._random_generator)
return invert * self.factor() * 0.5
def augment_image(self, image, transformation=None, **kwargs):
image = preprocessing.transform_value_range(image, self.value_range, (0, 1))
unstack_rgb = tf.unstack(image, axis=-1)
result = []
for c_i in range(self.channels):
result.append(unstack_rgb[c_i] + transformation[c_i])
result = tf.stack(
result,
axis=-1,
)
result = tf.clip_by_value(result, 0.0, 1.0)
image = preprocessing.transform_value_range(result, (0, 1), self.value_range)
return image
def augment_bounding_boxes(self, bounding_boxes, **kwargs):
return bounding_boxes
def augment_label(self, label, transformation=None, **kwargs):
return label
def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs):
return segmentation_mask
def get_config(self):
config = super().get_config()
config.update(
{
"factor": self.factor,
"channels": self.channels,
"value_range": self.value_range,
"seed": self.seed,
}
)
return config
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import tensorflow as tf
from absl.testing import parameterized
from keras_cv.layers import preprocessing
class RandomChannelShiftTest(tf.test.TestCase, parameterized.TestCase):
def test_return_shapes(self):
xs = tf.ones((2, 512, 512, 3))
layer = preprocessing.RandomChannelShift(factor=1.0, value_range=(0, 255))
xs = layer(xs, training=True)
self.assertEqual(xs.shape, [2, 512, 512, 3])
def test_non_square_image(self):
xs = tf.cast(
tf.stack(
[2 * tf.ones((1024, 512, 3)), tf.ones((1024, 512, 3))],
axis=0,
),
dtype=tf.float32,
)
layer = preprocessing.RandomChannelShift(
factor=[0.1, 0.3], value_range=(0, 255)
)
xs = layer(xs, training=True)
self.assertFalse(tf.math.reduce_any(xs[0] == 2.0))
self.assertFalse(tf.math.reduce_any(xs[1] == 1.0))
def test_in_tf_function(self):
xs = tf.cast(
tf.stack([2 * tf.ones((100, 100, 3)), tf.ones((100, 100, 3))], axis=0),
dtype=tf.float32,
)
layer = preprocessing.RandomChannelShift(factor=0.3, value_range=(0, 255))
@tf.function
def augment(x):
return layer(x, training=True)
xs = augment(xs)
self.assertFalse(tf.math.reduce_any(xs[0] == 2.0))
self.assertFalse(tf.math.reduce_any(xs[1] == 1.0))
def test_5_channels(self):
xs = tf.cast(
tf.ones((512, 512, 5)),
dtype=tf.float32,
)
layer = preprocessing.RandomChannelShift(
factor=0.4, channels=5, value_range=(0, 255)
)
xs = layer(xs, training=True)
self.assertFalse(tf.math.reduce_any(xs == 1.0))
def test_1_channel(self):
xs = tf.cast(
tf.ones((512, 512, 1)),
dtype=tf.float32,
)
layer = preprocessing.RandomChannelShift(
factor=0.4, channels=1, value_range=(0, 255)
)
xs = layer(xs, training=True)
self.assertFalse(tf.math.reduce_any(xs == 1.0))
def test_in_single_image(self):
xs = tf.cast(
tf.ones((512, 512, 3)),
dtype=tf.float32,
)
layer = preprocessing.RandomChannelShift(factor=0.4, value_range=(0, 255))
xs = layer(xs, training=True)
self.assertFalse(tf.math.reduce_any(xs == 1.0))
def test_config(self):
layer = preprocessing.RandomChannelShift(
factor=[0.1, 0.5], value_range=(0, 255), seed=101
)
config = layer.get_config()
self.assertEqual(config["factor"].get_config()["lower"], 0.1)
self.assertEqual(config["factor"].get_config()["upper"], 0.5)
self.assertEqual(config["value_range"], (0, 255))
self.assertEqual(config["channels"], 3)
self.assertEqual(config["seed"], 101)
reconstructed_layer = preprocessing.RandomChannelShift.from_config(config)
self.assertEqual(reconstructed_layer.factor, layer.factor)
self.assertEqual(reconstructed_layer.value_range, layer.value_range)
self.assertEqual(reconstructed_layer.seed, layer.seed)
self.assertEqual(reconstructed_layer.channels, layer.channels)
def test_inference(self):
layer = preprocessing.RandomChannelShift(factor=0.8, value_range=(0, 255))
inputs = np.random.randint(0, 255, size=(224, 224, 3))
output = layer(inputs, training=False)
self.assertAllClose(inputs, output)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.layers.preprocessing.base_image_augmentation_layer import (
BaseImageAugmentationLayer,
)
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class RandomChoice(BaseImageAugmentationLayer):
"""RandomChoice constructs a pipeline based on provided arguments.
The implemented policy does the following: for each inputs provided in `call`(), the
policy selects a random layer from the provided list of `layers`. It then calls the
`layer()` on the inputs.
Usage:
```python
# construct a list of layers
layers = keras_cv.layers.RandAugment.get_standard_policy(
value_range=(0, 255), magnitude=0.75, magnitude_stddev=0.3
)
layers = layers[:4] # slice out some layers you don't want for whatever reason
layers = layers + [keras_cv.layers.GridMask()]
# create the pipeline.
pipeline = keras_cv.layers.RandomChoice(layers=layers)
augmented_images = pipeline(images)
```
Args:
layers: a list of `keras.Layers`. These are randomly inputs during
augmentation to augment the inputs passed in `call()`. The layers passed
should subclass `BaseImageAugmentationLayer`.
auto_vectorize: whether to use `tf.vectorized_map` or `tf.map_fn` to
apply the augmentations. This offers a significant performance boost, but
can only be used if all the layers provided to the `layers` argument
support auto vectorization.
seed: Integer. Used to create a random seed.
"""
def __init__(
self,
layers,
auto_vectorize=False,
seed=None,
**kwargs,
):
super().__init__(**kwargs, seed=seed, force_generator=True)
self.layers = layers
self.auto_vectorize = auto_vectorize
self.seed = seed
def _curry_call_layer(self, inputs, layer):
def call_layer():
return layer(inputs)
return call_layer
def _augment(self, inputs, *args, **kwargs):
selected_op = self._random_generator.random_uniform(
(), minval=0, maxval=len(self.layers), dtype=tf.int32
)
# Warning:
# Do not replace the currying function with a lambda.
# Originally we used a lambda, but due to Python's
# lack of loop level scope this causes unexpected
# behavior running outside of graph mode.
#
# Autograph has an edge case where the behavior of Python for loop
# variables is inconsistent between Python and graph execution.
# By using a list comprehension and currying, we mitigate
# our code against both of these cases.
branch_fns = [
(i, self._curry_call_layer(inputs, layer))
for (i, layer) in enumerate(self.layers)
]
return tf.switch_case(
branch_index=selected_op,
branch_fns=branch_fns,
default=lambda: inputs,
)
def get_config(self):
config = super().get_config()
config.update(
{
"layers": self.layers,
"auto_vectorize": self.auto_vectorize,
"seed": self.seed,
}
)
return config
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from absl.testing import parameterized
from keras_cv import layers
class AddOneToInputs(tf.keras.layers.Layer):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.call_counter = tf.Variable(initial_value=0)
def call(self, inputs):
result = inputs.copy()
result["images"] = inputs["images"] + 1
self.call_counter.assign_add(1)
return result
class RandomAugmentationPipelineTest(tf.test.TestCase, parameterized.TestCase):
def test_calls_layer_augmentation_per_image(self):
layer = AddOneToInputs()
pipeline = layers.RandomChoice(layers=[layer])
xs = tf.random.uniform((2, 5, 5, 3), 0, 100, dtype=tf.float32)
os = pipeline(xs)
self.assertAllClose(xs + 1, os)
def test_calls_layer_augmentation_in_graph(self):
layer = AddOneToInputs()
pipeline = layers.RandomChoice(layers=[layer])
@tf.function()
def call_pipeline(xs):
return pipeline(xs)
xs = tf.random.uniform((2, 5, 5, 3), 0, 100, dtype=tf.float32)
os = call_pipeline(xs)
self.assertAllClose(xs + 1, os)
def test_calls_layer_augmentation_single_image(self):
layer = AddOneToInputs()
pipeline = layers.RandomChoice(layers=[layer])
xs = tf.random.uniform((5, 5, 3), 0, 100, dtype=tf.float32)
os = pipeline(xs)
self.assertAllClose(xs + 1, os)
def test_calls_choose_one_layer_augmentation(self):
batch_size = 10
pipeline = layers.RandomChoice(layers=[AddOneToInputs(), AddOneToInputs()])
xs = tf.random.uniform((batch_size, 5, 5, 3), 0, 100, dtype=tf.float32)
os = pipeline(xs)
self.assertAllClose(xs + 1, os)
total_calls = pipeline.layers[0].call_counter + pipeline.layers[1].call_counter
self.assertEqual(total_calls, batch_size)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.layers.preprocessing.base_image_augmentation_layer import (
BaseImageAugmentationLayer,
)
from keras_cv.utils import preprocessing
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class RandomColorDegeneration(BaseImageAugmentationLayer):
"""Randomly performs the color degeneration operation on given images.
The sharpness operation first converts an image to gray scale, then back to color.
It then takes a weighted average between original image and the degenerated image.
This makes colors appear more dull.
Args:
factor: A tuple of two floats, a single float or a
`keras_cv.FactorSampler`. `factor` controls the extent to which the
image sharpness is impacted. `factor=0.0` makes this layer perform a no-op
operation, while a value of 1.0 uses the degenerated result entirely.
Values between 0 and 1 result in linear interpolation between the original
image and the sharpened image.
Values should be between `0.0` and `1.0`. If a tuple is used, a `factor` is
sampled between the two values for every image augmented. If a single float
is used, a value between `0.0` and the passed float is sampled. In order to
ensure the value is always the same, please pass a tuple with two identical
floats: `(0.5, 0.5)`.
seed: Integer. Used to create a random seed.
"""
def __init__(
self,
factor,
seed=None,
**kwargs,
):
super().__init__(**kwargs)
self.factor = preprocessing.parse_factor(
factor,
)
self.seed = seed
def get_random_transformation(self, **kwargs):
return self.factor()
def augment_image(self, image, transformation=None, **kwargs):
degenerate = tf.image.grayscale_to_rgb(tf.image.rgb_to_grayscale(image))
result = preprocessing.blend(image, degenerate, transformation)
return result
def augment_bounding_boxes(self, bounding_boxes, **kwargs):
return bounding_boxes
def augment_label(self, label, transformation=None, **kwargs):
return label
def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs):
return segmentation_mask
def get_config(self):
config = super().get_config()
config.update({"factor": self.factor, "seed": self.seed})
return config
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.layers import preprocessing
class RandomColorDegenerationTest(tf.test.TestCase):
def test_random_color_degeneration_base_case(self):
img_shape = (50, 50, 3)
xs = tf.stack(
[2 * tf.ones(img_shape), tf.ones(img_shape)],
axis=0,
)
layer = preprocessing.RandomColorDegeneration(0.0)
ys = layer(xs)
self.assertEqual(xs.shape, ys.shape)
def test_color_degeneration_full_factor(self):
img_shape = (50, 50, 1)
r = tf.ones(img_shape)
g = 2 * tf.ones(img_shape)
b = 3 * tf.ones(img_shape)
xs = tf.concat([r, g, b], axis=-1)
layer = preprocessing.RandomColorDegeneration(factor=(1, 1))
ys = layer(xs)
# Color degeneration uses standard luma conversion for RGB->Grayscale.
# The formula for luma is result= 0.2989*r + 0.5870*g + 0.1140*b
luma_result = 0.2989 + 2 * 0.5870 + 3 * 0.1140
self.assertAllClose(ys, tf.ones_like(ys) * luma_result)
def test_color_degeneration_70p_factor(self):
img_shape = (50, 50, 1)
r = tf.ones(img_shape)
g = 2 * tf.ones(img_shape)
b = 3 * tf.ones(img_shape)
xs = tf.concat([r, g, b], axis=-1)
layer = preprocessing.RandomColorDegeneration(factor=(0.7, 0.7))
ys = layer(xs)
# Color degeneration uses standard luma conversion for RGB->Grayscale.
# The formula for luma is result= 0.2989*r + 0.5870*g + 0.1140*b
luma_result = 0.2989 + 2 * 0.5870 + 3 * 0.1140
# with factor=0.7, luma_result should be blended at a 70% rate with the original
r_result = luma_result * 0.7 + 1 * 0.3
g_result = luma_result * 0.7 + 2 * 0.3
b_result = luma_result * 0.7 + 3 * 0.3
r = ys[..., 0]
g = ys[..., 1]
b = ys[..., 2]
self.assertAllClose(r, tf.ones_like(r) * r_result)
self.assertAllClose(g, tf.ones_like(g) * g_result)
self.assertAllClose(b, tf.ones_like(b) * b_result)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.layers import preprocessing
from keras_cv.layers.preprocessing.base_image_augmentation_layer import (
BaseImageAugmentationLayer,
)
from keras_cv.utils import preprocessing as preprocessing_utils
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class RandomColorJitter(BaseImageAugmentationLayer):
"""RandomColorJitter class randomly apply brightness, contrast, saturation
and hue image processing operation sequentially and randomly on the
input. It expects input as RGB image. The expected image should be
`(0-255)` pixel ranges.
Input shape:
3D (unbatched) or 4D (batched) tensor with shape:
`(..., height, width, channels)`, in `channels_last` format
Output shape:
3D (unbatched) or 4D (batched) tensor with shape:
`(..., height, width, channels)`, in `channels_last` format
Args:
value_range: the range of values the incoming images will have.
Represented as a two number tuple written [low, high].
This is typically either `[0, 1]` or `[0, 255]` depending
on how your preprocessing pipeline is setup.
brightness_factor: Float or a list/tuple of 2 floats between -1.0
and 1.0. The factor is used to determine the lower bound and
upper bound of the brightness adjustment. A float value will be
chosen randomly between the limits. When -1.0 is chosen, the
output image will be black, and when 1.0 is chosen, the image
will be fully white. When only one float is provided, eg, 0.2,
then -0.2 will be used for lower bound and 0.2 will be used for
upper bound.
contrast_factor: A positive float represented as fraction of value,
or a tuple of size 2 representing lower and upper bound. When
represented as a single float, lower = upper. The contrast factor
will be randomly picked between `[1.0 - lower, 1.0 + upper]`.
saturation_factor: Either a tuple of two floats or a single float.
`factor` controls the extent to which the image saturation is
impacted. `factor=0.5` makes this layer perform a no-op operation.
`factor=0.0` makes the image to be fully grayscale. `factor=1.0`
makes the image to be fully saturated.
hue_factor: A tuple of two floats, a single float or
`keras_cv.FactorSampler`. `factor` controls the extent to which the
image sharpness is impacted. `factor=0.0` makes this layer perform
a no-op operation, while a value of 1.0 performs the most aggressive
contrast adjustment available. If a tuple is used, a `factor` is sampled
between the two values for every image augmented. If a single float
is used, a value between `0.0` and the passed float is sampled.
In order to ensure the value is always the same, please pass a tuple
with two identical floats: `(0.5, 0.5)`.
seed: Integer. Used to create a random seed.
Usage:
```python
(images, labels), _ = tf.keras.datasets.cifar10.load_data()
color_jitter = keras_cv.layers.RandomColorJitter(
value_range=(0, 255),
brightness_factor=(-0.2, 0.5),
contrast_factor=(0.5, 0.9),
saturation_factor=(0.5, 0.9),
hue_factor=(0.5, 0.9),
)
augmented_images = color_jitter(images)
```
"""
def __init__(
self,
value_range,
brightness_factor,
contrast_factor,
saturation_factor,
hue_factor,
seed=None,
**kwargs,
):
super().__init__(**kwargs)
self.value_range = value_range
self.brightness_factor = brightness_factor
self.contrast_factor = contrast_factor
self.saturation_factor = saturation_factor
self.hue_factor = hue_factor
self.seed = seed
self.random_brightness = preprocessing.RandomBrightness(
factor=self.brightness_factor, value_range=(0, 255), seed=self.seed
)
self.random_contrast = preprocessing.RandomContrast(
factor=self.contrast_factor, seed=self.seed
)
self.random_saturation = preprocessing.RandomSaturation(
factor=self.saturation_factor, seed=self.seed
)
self.random_hue = preprocessing.RandomHue(
factor=self.hue_factor, value_range=(0, 255), seed=self.seed
)
def augment_image(self, image, transformation=None, **kwargs):
image = preprocessing_utils.transform_value_range(
image,
original_range=self.value_range,
target_range=(0, 255),
dtype=image.dtype,
)
image = self.random_brightness(image)
image = self.random_contrast(image)
image = self.random_saturation(image)
image = self.random_hue(image)
image = preprocessing_utils.transform_value_range(
image,
original_range=(0, 255),
target_range=self.value_range,
dtype=image.dtype,
)
return image
def augment_bounding_boxes(self, bounding_boxes, **kwargs):
return bounding_boxes
def augment_label(self, label, transformation=None, **kwargs):
return label
def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs):
return segmentation_mask
def get_config(self):
config = super().get_config()
config.update(
{
"value_range": self.value_range,
"brightness_factor": self.brightness_factor,
"contrast_factor": self.contrast_factor,
"saturation_factor": self.saturation_factor,
"hue_factor": self.hue_factor,
"seed": self.seed,
}
)
return config
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import tensorflow as tf
from absl.testing import parameterized
from keras_cv.layers import preprocessing
class RandomColorJitterTest(tf.test.TestCase, parameterized.TestCase):
# Test 1: Check input and output shape. It should match.
def test_return_shapes(self):
batch_input = tf.ones((2, 512, 512, 3))
non_square_batch_input = tf.ones((2, 1024, 512, 3))
unbatch_input = tf.ones((512, 512, 3))
layer = preprocessing.RandomColorJitter(
value_range=(0, 255),
brightness_factor=0.5,
contrast_factor=(0.5, 0.9),
saturation_factor=(0.5, 0.9),
hue_factor=0.5,
)
batch_output = layer(batch_input, training=True)
non_square_batch_output = layer(non_square_batch_input, training=True)
unbatch_output = layer(unbatch_input, training=True)
self.assertEqual(batch_output.shape, [2, 512, 512, 3])
self.assertEqual(non_square_batch_output.shape, [2, 1024, 512, 3])
self.assertEqual(unbatch_output.shape, [512, 512, 3])
# Test 2: Check if the factor ranges are set properly.
def test_factor_range(self):
layer = preprocessing.RandomColorJitter(
value_range=(0, 255),
brightness_factor=(-0.2, 0.5),
contrast_factor=(0.5, 0.9),
saturation_factor=(0.5, 0.9),
hue_factor=(0.5, 0.9),
)
self.assertEqual(layer.brightness_factor, (-0.2, 0.5))
self.assertEqual(layer.contrast_factor, (0.5, 0.9))
self.assertEqual(layer.saturation_factor, (0.5, 0.9))
self.assertEqual(layer.hue_factor, (0.5, 0.9))
# Test 3: Test if it is OK to run on graph mode.
def test_in_tf_function(self):
inputs = tf.ones((2, 512, 512, 3))
layer = preprocessing.RandomColorJitter(
value_range=(0, 255),
brightness_factor=0.5,
contrast_factor=(0.5, 0.9),
saturation_factor=(0.5, 0.9),
hue_factor=0.5,
)
@tf.function
def augment(x):
return layer(x, training=True)
outputs = augment(inputs)
self.assertNotAllClose(inputs, outputs)
# Test 4: Check if get_config and from_config work as expected.
def test_config(self):
layer = preprocessing.RandomColorJitter(
value_range=(0, 255),
brightness_factor=0.5,
contrast_factor=(0.5, 0.9),
saturation_factor=(0.5, 0.9),
hue_factor=0.5,
)
config = layer.get_config()
self.assertEqual(config["brightness_factor"], 0.5)
self.assertEqual(config["contrast_factor"], (0.5, 0.9))
self.assertEqual(config["saturation_factor"], (0.5, 0.9))
self.assertEqual(config["hue_factor"], 0.5)
reconstructed_layer = preprocessing.RandomColorJitter.from_config(config)
self.assertEqual(reconstructed_layer.brightness_factor, layer.brightness_factor)
self.assertEqual(reconstructed_layer.contrast_factor, layer.contrast_factor)
self.assertEqual(reconstructed_layer.saturation_factor, layer.saturation_factor)
self.assertEqual(reconstructed_layer.hue_factor, layer.hue_factor)
# Test 5: Check if inference model is OK.
def test_inference(self):
layer = preprocessing.RandomColorJitter(
value_range=(0, 255),
brightness_factor=0.5,
contrast_factor=(0.5, 0.9),
saturation_factor=(0.5, 0.9),
hue_factor=0.5,
)
inputs = np.random.randint(0, 255, size=(224, 224, 3))
output = layer(inputs, training=False)
self.assertAllClose(inputs, output)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv import core
from keras_cv.layers.preprocessing.base_image_augmentation_layer import (
BaseImageAugmentationLayer,
)
from keras_cv.utils import preprocessing
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class RandomCropAndResize(BaseImageAugmentationLayer):
"""Randomly crops a part of an image and resizes it to provided size.
This implementation takes an intuitive approach, where we crop the images to a
random height and width, and then resize them. To do this, we first sample a
random value for area using `crop_area_factor` and a value for aspect ratio using
`aspect_ratio_factor`. Further we get the new height and width by
dividing and multiplying the old height and width by the random area
respectively. We then sample offsets for height and width and clip them such
that the cropped area does not exceed image boundaries. Finally we do the
actual cropping operation and resize the image to `target_size`.
Args:
target_size: A tuple of two integers used as the target size to ultimately crop
images to.
crop_area_factor: A tuple of two floats, ConstantFactorSampler or
UniformFactorSampler. The ratio of area of the cropped part to
that of original image is sampled using this factor. Represents the
lower and upper bounds for the area relative to the original image
of the cropped image before resizing it to `target_size`. For
self-supervised pretraining a common value for this parameter is
`(0.08, 1.0)`. For fine tuning and classification a common value for this
is `0.8, 1.0`.
aspect_ratio_factor: A tuple of two floats, ConstantFactorSampler or
UniformFactorSampler. Aspect ratio means the ratio of width to
height of the cropped image. In the context of this layer, the aspect ratio
sampled represents a value to distort the aspect ratio by.
Represents the lower and upper bound for the aspect ratio of the
cropped image before resizing it to `target_size`. For most tasks, this
should be `(3/4, 4/3)`. To perform a no-op provide the value `(1.0, 1.0)`.
interpolation: (Optional) A string specifying the sampling method for
resizing. Defaults to "bilinear".
seed: (Optional) Used to create a random seed. Defaults to None.
"""
def __init__(
self,
target_size,
crop_area_factor,
aspect_ratio_factor,
interpolation="bilinear",
seed=None,
**kwargs,
):
super().__init__(seed=seed, **kwargs)
self._check_class_arguments(target_size, crop_area_factor, aspect_ratio_factor)
self.target_size = target_size
self.aspect_ratio_factor = preprocessing.parse_factor(
aspect_ratio_factor,
min_value=0.0,
max_value=None,
param_name="aspect_ratio_factor",
seed=seed,
)
self.crop_area_factor = preprocessing.parse_factor(
crop_area_factor,
max_value=1.0,
param_name="crop_area_factor",
seed=seed,
)
self.interpolation = interpolation
self.seed = seed
def get_random_transformation(
self, image=None, label=None, bounding_box=None, **kwargs
):
crop_area_factor = self.crop_area_factor()
aspect_ratio = self.aspect_ratio_factor()
new_height = tf.clip_by_value(
tf.sqrt(crop_area_factor / aspect_ratio), 0.0, 1.0
) # to avoid unwanted/unintuitive effects
new_width = tf.clip_by_value(tf.sqrt(crop_area_factor * aspect_ratio), 0.0, 1.0)
height_offset = self._random_generator.random_uniform(
(),
minval=tf.minimum(0.0, 1.0 - new_height),
maxval=tf.maximum(0.0, 1.0 - new_height),
dtype=tf.float32,
)
width_offset = self._random_generator.random_uniform(
(),
minval=tf.minimum(0.0, 1.0 - new_width),
maxval=tf.maximum(0.0, 1.0 - new_width),
dtype=tf.float32,
)
y1 = height_offset
y2 = height_offset + new_height
x1 = width_offset
x2 = width_offset + new_width
return [[y1, x1, y2, x2]]
def call(self, inputs, training=True):
if training:
return super().call(inputs, training)
else:
inputs = self._ensure_inputs_are_compute_dtype(inputs)
inputs, meta_data = self._format_inputs(inputs)
output = inputs
# self._resize() returns valid results for both batched and
# unbatched
output["images"] = self._resize(inputs["images"])
if "segmentation_masks" in inputs:
output["segmentation_masks"] = self._resize(
inputs["segmentation_masks"], interpolation="nearest"
)
return self._format_output(output, meta_data)
def augment_image(self, image, transformation, **kwargs):
return self._crop_and_resize(image, transformation)
def augment_target(self, target, **kwargs):
return target
def _resize(self, image, **kwargs):
outputs = tf.keras.preprocessing.image.smart_resize(
image, self.target_size, **kwargs
)
# smart_resize will always output float32, so we need to re-cast.
return tf.cast(outputs, self.compute_dtype)
def _check_class_arguments(
self, target_size, crop_area_factor, aspect_ratio_factor
):
if (
not isinstance(target_size, (tuple, list))
or len(target_size) != 2
or not isinstance(target_size[0], int)
or not isinstance(target_size[1], int)
or isinstance(target_size, int)
):
raise ValueError(
"`target_size` must be tuple of two integers. "
f"Received target_size={target_size}"
)
if (
not isinstance(crop_area_factor, (tuple, list, core.FactorSampler))
or isinstance(crop_area_factor, float)
or isinstance(crop_area_factor, int)
):
raise ValueError(
"`crop_area_factor` must be tuple of two positive floats less than "
"or equal to 1 or keras_cv.core.FactorSampler instance. Received "
f"crop_area_factor={crop_area_factor}"
)
if (
not isinstance(aspect_ratio_factor, (tuple, list, core.FactorSampler))
or isinstance(aspect_ratio_factor, float)
or isinstance(aspect_ratio_factor, int)
):
raise ValueError(
"`aspect_ratio_factor` must be tuple of two positive floats or "
"keras_cv.core.FactorSampler instance. Received "
f"aspect_ratio_factor={aspect_ratio_factor}"
)
def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs):
return self._crop_and_resize(
segmentation_mask, transformation, method="nearest"
)
def get_config(self):
config = super().get_config()
config.update(
{
"target_size": self.target_size,
"crop_area_factor": self.crop_area_factor,
"aspect_ratio_factor": self.aspect_ratio_factor,
"interpolation": self.interpolation,
"seed": self.seed,
}
)
return config
def _crop_and_resize(self, image, transformation, method=None):
image = tf.expand_dims(image, axis=0)
boxes = transformation
# See bit.ly/tf_crop_resize for more details
augmented_image = tf.image.crop_and_resize(
image, # image shape: [B, H, W, C]
boxes, # boxes: (1, 4) in this case; represents area
# to be cropped from the original image
[0], # box_indices: maps boxes to images along batch axis
# [0] since there is only one image
self.target_size, # output size
method=method or self.interpolation,
)
return tf.squeeze(augmented_image, axis=0)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import tensorflow as tf
from absl.testing import parameterized
from keras_cv.layers import preprocessing
class RandomCropAndResizeTest(tf.test.TestCase, parameterized.TestCase):
height, width = 300, 300
batch_size = 4
target_size = (224, 224)
seed = 42
def test_train_augments_image(self):
# Checks if original and augmented images are different
input_image_shape = (self.batch_size, self.height, self.width, 3)
image = tf.random.uniform(shape=input_image_shape, seed=self.seed)
layer = preprocessing.RandomCropAndResize(
target_size=self.target_size,
aspect_ratio_factor=(3 / 4, 4 / 3),
crop_area_factor=(0.8, 1.0),
seed=self.seed,
)
output = layer(image, training=True)
input_image_resized = tf.image.resize(image, self.target_size)
self.assertNotAllClose(output, input_image_resized)
def test_grayscale(self):
input_image_shape = (self.batch_size, self.height, self.width, 1)
image = tf.random.uniform(shape=input_image_shape)
layer = preprocessing.RandomCropAndResize(
target_size=self.target_size,
aspect_ratio_factor=(3 / 4, 4 / 3),
crop_area_factor=(0.8, 1.0),
)
output = layer(image, training=True)
input_image_resized = tf.image.resize(image, self.target_size)
self.assertAllEqual(output.shape, (4, 224, 224, 1))
self.assertNotAllClose(output, input_image_resized)
def test_preserves_image(self):
image_shape = (self.batch_size, self.height, self.width, 3)
image = tf.random.uniform(shape=image_shape)
layer = preprocessing.RandomCropAndResize(
target_size=self.target_size,
aspect_ratio_factor=(3 / 4, 4 / 3),
crop_area_factor=(0.8, 1.0),
)
input_resized = tf.image.resize(image, self.target_size)
output = layer(image, training=False)
self.assertAllClose(output, input_resized)
@parameterized.named_parameters(
("Not tuple or list", dict()),
("Length not equal to 2", [1, 2, 3]),
("Members not int", (2.3, 4.5)),
("Single integer", 5),
)
def test_target_size_errors(self, target_size):
with self.assertRaisesRegex(
ValueError,
"`target_size` must be tuple of two integers. Received target_size=(.*)",
):
_ = preprocessing.RandomCropAndResize(
target_size=target_size,
aspect_ratio_factor=(3 / 4, 4 / 3),
crop_area_factor=(0.8, 1.0),
)
@parameterized.named_parameters(
("Not tuple or list", dict()),
("Single integer", 5),
("Single float", 5.0),
)
def test_aspect_ratio_factor_errors(self, aspect_ratio_factor):
with self.assertRaisesRegex(
ValueError,
"`aspect_ratio_factor` must be tuple of two positive floats or "
"keras_cv.core.FactorSampler instance. Received aspect_ratio_factor=(.*)",
):
_ = preprocessing.RandomCropAndResize(
target_size=(224, 224),
aspect_ratio_factor=aspect_ratio_factor,
crop_area_factor=(0.8, 1.0),
)
@parameterized.named_parameters(
("Not tuple or list", dict()),
("Single integer", 5),
("Single float", 5.0),
)
def test_crop_area_factor_errors(self, crop_area_factor):
with self.assertRaisesRegex(
ValueError,
"`crop_area_factor` must be tuple of two positive floats less than or "
"equal to 1 or keras_cv.core.FactorSampler instance. Received "
"crop_area_factor=(.*)",
):
_ = preprocessing.RandomCropAndResize(
target_size=(224, 224),
aspect_ratio_factor=(3 / 4, 4 / 3),
crop_area_factor=crop_area_factor,
)
def test_augment_sparse_segmentation_mask(self):
classes = 8
input_image_shape = (1, self.height, self.width, 3)
mask_shape = (1, self.height, self.width, 1)
image = tf.random.uniform(shape=input_image_shape, seed=self.seed)
mask = np.random.randint(2, size=mask_shape) * (classes - 1)
inputs = {"images": image, "segmentation_masks": mask}
# Crop-only to exactly 1/2 of the size
layer = preprocessing.RandomCropAndResize(
target_size=(150, 150),
aspect_ratio_factor=(1, 1),
crop_area_factor=(1, 1),
seed=self.seed,
)
input_mask_resized = tf.image.crop_and_resize(
mask, [[0, 0, 1, 1]], [0], (150, 150), "nearest"
)
output = layer(inputs, training=True)
self.assertAllClose(output["segmentation_masks"], input_mask_resized)
# Crop to an arbitrary size and make sure we don't do bad interpolation
layer = preprocessing.RandomCropAndResize(
target_size=(233, 233),
aspect_ratio_factor=(3 / 4, 4 / 3),
crop_area_factor=(0.8, 1.0),
seed=self.seed,
)
output = layer(inputs, training=True)
self.assertAllInSet(output["segmentation_masks"], [0, 7])
def test_augment_one_hot_segmentation_mask(self):
classes = 8
input_image_shape = (1, self.height, self.width, 3)
mask_shape = (1, self.height, self.width, 1)
image = tf.random.uniform(shape=input_image_shape, seed=self.seed)
mask = tf.one_hot(
tf.squeeze(np.random.randint(2, size=mask_shape) * (classes - 1), axis=-1),
classes,
)
inputs = {"images": image, "segmentation_masks": mask}
# Crop-only to exactly 1/2 of the size
layer = preprocessing.RandomCropAndResize(
target_size=(150, 150),
aspect_ratio_factor=(1, 1),
crop_area_factor=(1, 1),
seed=self.seed,
)
input_mask_resized = tf.image.crop_and_resize(
mask, [[0, 0, 1, 1]], [0], (150, 150), "nearest"
)
output = layer(inputs, training=True)
self.assertAllClose(output["segmentation_masks"], input_mask_resized)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.layers.preprocessing.base_image_augmentation_layer import (
BaseImageAugmentationLayer,
)
from keras_cv.utils import fill_utils
from keras_cv.utils import preprocessing
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class RandomCutout(BaseImageAugmentationLayer):
"""Randomly cut out rectangles from images and fill them.
Args:
height_factor: A tuple of two floats, a single float or a
`keras_cv.FactorSampler`. `height_factor` controls the size of the
cutouts. `height_factor=0.0` means the rectangle will be of size 0% of the
image height, `height_factor=0.1` means the rectangle will have a size of
10% of the image height, and so forth.
Values should be between `0.0` and `1.0`. If a tuple is used, a
`height_factor` is sampled between the two values for every image augmented.
If a single float is used, a value between `0.0` and the passed float is
sampled. In order to ensure the value is always the same, please pass a
tuple with two identical floats: `(0.5, 0.5)`.
width_factor: A tuple of two floats, a single float or a
`keras_cv.FactorSampler`. `width_factor` controls the size of the
cutouts. `width_factor=0.0` means the rectangle will be of size 0% of the
image height, `width_factor=0.1` means the rectangle will have a size of 10%
of the image width, and so forth.
Values should be between `0.0` and `1.0`. If a tuple is used, a
`width_factor` is sampled between the two values for every image augmented.
If a single float is used, a value between `0.0` and the passed float is
sampled. In order to ensure the value is always the same, please pass a
tuple with two identical floats: `(0.5, 0.5)`.
fill_mode: Pixels inside the patches are filled according to the given
mode (one of `{"constant", "gaussian_noise"}`).
- *constant*: Pixels are filled with the same constant value.
- *gaussian_noise*: Pixels are filled with random gaussian noise.
fill_value: a float represents the value to be filled inside the patches
when `fill_mode="constant"`.
seed: Integer. Used to create a random seed.
Sample usage:
```python
(images, labels), _ = tf.keras.datasets.cifar10.load_data()
random_cutout = keras_cv.layers.preprocessing.RandomCutout(0.5, 0.5)
augmented_images = random_cutout(images)
```
"""
def __init__(
self,
height_factor,
width_factor,
fill_mode="constant",
fill_value=0.0,
seed=None,
**kwargs,
):
super().__init__(seed=seed, **kwargs)
self.height_factor = preprocessing.parse_factor(
height_factor, param_name="height_factor", seed=seed
)
self.width_factor = preprocessing.parse_factor(
width_factor, param_name="width_factor", seed=seed
)
self.fill_mode = fill_mode
self.fill_value = fill_value
self.seed = seed
if fill_mode not in ["gaussian_noise", "constant"]:
raise ValueError(
'`fill_mode` should be "gaussian_noise" '
f'or "constant". Got `fill_mode`={fill_mode}'
)
def _parse_bounds(self, factor):
if isinstance(factor, (tuple, list)):
return factor[0], factor[1]
else:
return type(factor)(0), factor
def get_random_transformation(self, image=None, **kwargs):
center_x, center_y = self._compute_rectangle_position(image)
rectangle_height, rectangle_width = self._compute_rectangle_size(image)
return center_x, center_y, rectangle_height, rectangle_width
def augment_image(self, image, transformation=None, **kwargs):
"""Apply random cutout."""
inputs = tf.expand_dims(image, 0)
center_x, center_y, rectangle_height, rectangle_width = transformation
rectangle_fill = self._compute_rectangle_fill(inputs)
inputs = fill_utils.fill_rectangle(
inputs,
center_x,
center_y,
rectangle_width,
rectangle_height,
rectangle_fill,
)
return inputs[0]
def augment_label(self, label, transformation=None, **kwargs):
return label
def _compute_rectangle_position(self, inputs):
input_shape = tf.shape(inputs)
image_height, image_width = (
input_shape[0],
input_shape[1],
)
center_x = self._random_generator.random_uniform(
[1], 0, image_width, dtype=tf.int32
)
center_y = self._random_generator.random_uniform(
[1], 0, image_height, dtype=tf.int32
)
return center_x, center_y
def _compute_rectangle_size(self, inputs):
input_shape = tf.shape(inputs)
image_height, image_width = (
input_shape[0],
input_shape[1],
)
height = self.height_factor()
width = self.width_factor()
height = height * tf.cast(image_height, tf.float32)
width = width * tf.cast(image_width, tf.float32)
height = tf.cast(tf.math.ceil(height), tf.int32)
width = tf.cast(tf.math.ceil(width), tf.int32)
height = tf.minimum(height, image_height)
width = tf.minimum(width, image_width)
return tf.expand_dims(height, axis=0), tf.expand_dims(width, axis=0)
def _compute_rectangle_fill(self, inputs):
input_shape = tf.shape(inputs)
if self.fill_mode == "constant":
fill_value = tf.fill(input_shape, self.fill_value)
else:
# gaussian noise
fill_value = tf.random.normal(input_shape)
return fill_value
def get_config(self):
config = {
"height_factor": self.height_factor,
"width_factor": self.width_factor,
"fill_mode": self.fill_mode,
"fill_value": self.fill_value,
"seed": self.seed,
}
base_config = super().get_config()
return dict(list(base_config.items()) + list(config.items()))
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.layers import preprocessing
class RandomCutoutTest(tf.test.TestCase):
def _run_test(self, height_factor, width_factor):
img_shape = (40, 40, 3)
xs = tf.stack(
[2 * tf.ones(img_shape), tf.ones(img_shape)],
axis=0,
)
xs = tf.cast(xs, tf.float32)
fill_value = 0.0
layer = preprocessing.RandomCutout(
height_factor=height_factor,
width_factor=width_factor,
fill_mode="constant",
fill_value=fill_value,
seed=1,
)
xs = layer(xs)
# Some pixels should be replaced with fill value
self.assertTrue(tf.math.reduce_any(xs[0] == fill_value))
self.assertTrue(tf.math.reduce_any(xs[0] == 2.0))
self.assertTrue(tf.math.reduce_any(xs[1] == fill_value))
self.assertTrue(tf.math.reduce_any(xs[1] == 1.0))
def test_return_shapes(self):
xs = tf.ones((2, 512, 512, 3))
layer = preprocessing.RandomCutout(height_factor=0.5, width_factor=0.5, seed=1)
xs = layer(xs)
self.assertEqual(xs.shape, [2, 512, 512, 3])
def test_return_shapes_single_element(self):
xs = tf.ones((512, 512, 3))
layer = preprocessing.RandomCutout(height_factor=0.5, width_factor=0.5, seed=1)
xs = layer(xs)
self.assertEqual(xs.shape, [512, 512, 3])
def test_random_cutout_single_float(self):
self._run_test(0.5, 0.5)
def test_random_cutout_tuple_float(self):
self._run_test((0.4, 0.9), (0.1, 0.3))
def test_random_cutout_fail_mix_bad_param_values(self):
fn = lambda: self._run_test(0.5, (15.0, 30))
self.assertRaises(ValueError, fn)
def test_random_cutout_fail_reverse_lower_upper_float(self):
fn = lambda: self._run_test(0.5, (0.9, 0.4))
self.assertRaises(ValueError, fn)
def test_random_cutout_call_results_one_channel(self):
xs = tf.cast(
tf.stack(
[2 * tf.ones((40, 40, 1)), tf.ones((40, 40, 1))],
axis=0,
),
tf.float32,
)
patch_value = 0.0
layer = preprocessing.RandomCutout(
height_factor=0.5,
width_factor=0.5,
fill_mode="constant",
fill_value=patch_value,
seed=1,
)
xs = layer(xs)
# Some pixels should be replaced with fill value
self.assertTrue(tf.math.reduce_any(xs[0] == patch_value))
self.assertTrue(tf.math.reduce_any(xs[0] == 2.0))
self.assertTrue(tf.math.reduce_any(xs[1] == patch_value))
self.assertTrue(tf.math.reduce_any(xs[1] == 1.0))
def test_random_cutout_call_tiny_image(self):
img_shape = (4, 4, 3)
xs = tf.stack(
[2 * tf.ones(img_shape), tf.ones(img_shape)],
axis=0,
)
xs = tf.cast(xs, tf.float32)
fill_value = 0.0
layer = preprocessing.RandomCutout(
height_factor=(0.4, 0.9),
width_factor=(0.1, 0.3),
fill_mode="constant",
fill_value=fill_value,
seed=1,
)
xs = layer(xs)
# Some pixels should be replaced with fill value
self.assertTrue(tf.math.reduce_any(xs[0] == fill_value))
self.assertTrue(tf.math.reduce_any(xs[0] == 2.0))
self.assertTrue(tf.math.reduce_any(xs[1] == fill_value))
self.assertTrue(tf.math.reduce_any(xs[1] == 1.0))
def test_in_tf_function(self):
xs = tf.cast(
tf.stack([2 * tf.ones((100, 100, 1)), tf.ones((100, 100, 1))], axis=0),
tf.float32,
)
patch_value = 0.0
layer = preprocessing.RandomCutout(
height_factor=0.5,
width_factor=0.5,
fill_mode="constant",
fill_value=patch_value,
seed=1,
)
@tf.function
def augment(x):
return layer(x)
xs = augment(xs)
# Some pixels should be replaced with fill value
self.assertTrue(tf.math.reduce_any(xs[0] == patch_value))
self.assertTrue(tf.math.reduce_any(xs[0] == 2.0))
self.assertTrue(tf.math.reduce_any(xs[1] == patch_value))
self.assertTrue(tf.math.reduce_any(xs[1] == 1.0))
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv import bounding_box
from keras_cv.layers.preprocessing.base_image_augmentation_layer import (
BaseImageAugmentationLayer,
)
# In order to support both unbatched and batched inputs, the horizontal
# and vertical axis is reverse indexed
H_AXIS = -3
W_AXIS = -2
# Defining modes for random flipping
HORIZONTAL = "horizontal"
VERTICAL = "vertical"
HORIZONTAL_AND_VERTICAL = "horizontal_and_vertical"
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class RandomFlip(BaseImageAugmentationLayer):
"""A preprocessing layer which randomly flips images during training.
This layer will flip the images horizontally and or vertically based on the
`mode` attribute. During inference time, the output will be identical to
input. Call the layer with `training=True` to flip the input.
Input shape:
3D (unbatched) or 4D (batched) tensor with shape:
`(..., height, width, channels)`, in `"channels_last"` format.
Output shape:
3D (unbatched) or 4D (batched) tensor with shape:
`(..., height, width, channels)`, in `"channels_last"` format.
Arguments:
mode: String indicating which flip mode to use. Can be `"horizontal"`,
`"vertical"`, or `"horizontal_and_vertical"`. Defaults to
`"horizontal"`. `"horizontal"` is a left-right flip and `"vertical"` is
a top-bottom flip.
seed: Integer. Used to create a random seed.
"""
def __init__(self, mode=HORIZONTAL, seed=None, bounding_box_format=None, **kwargs):
super().__init__(seed=seed, force_generator=True, **kwargs)
self.mode = mode
self.seed = seed
if mode == HORIZONTAL:
self.horizontal = True
self.vertical = False
elif mode == VERTICAL:
self.horizontal = False
self.vertical = True
elif mode == HORIZONTAL_AND_VERTICAL:
self.horizontal = True
self.vertical = True
else:
raise ValueError(
"RandomFlip layer {name} received an unknown mode="
"{arg}".format(name=self.name, arg=mode)
)
self.auto_vectorize = True
self.bounding_box_format = bounding_box_format
def augment_label(self, label, transformation, **kwargs):
return label
def augment_image(self, image, transformation, **kwargs):
return RandomFlip._flip_image(image, transformation)
def get_random_transformation(self, **kwargs):
flip_horizontal = False
flip_vertical = False
if self.horizontal:
flip_horizontal = self._random_generator.random_uniform(shape=[]) > 0.5
if self.vertical:
flip_vertical = self._random_generator.random_uniform(shape=[]) > 0.5
return {
"flip_horizontal": tf.cast(flip_horizontal, dtype=tf.bool),
"flip_vertical": tf.cast(flip_vertical, dtype=tf.bool),
}
def _flip_image(image, transformation):
flipped_output = tf.cond(
transformation["flip_horizontal"],
lambda: tf.image.flip_left_right(image),
lambda: image,
)
flipped_output = tf.cond(
transformation["flip_vertical"],
lambda: tf.image.flip_up_down(flipped_output),
lambda: flipped_output,
)
flipped_output.set_shape(image.shape)
return flipped_output
def _flip_bounding_boxes_horizontal(bounding_boxes):
x1, x2, x3, x4, rest = tf.split(
bounding_boxes, [1, 1, 1, 1, bounding_boxes.shape[-1] - 4], axis=-1
)
output = tf.stack(
[
1 - x3,
x2,
1 - x1,
x4,
rest,
],
axis=-1,
)
output = tf.squeeze(output, axis=1)
return output
def _flip_bounding_boxes_vertical(bounding_boxes):
x1, x2, x3, x4, rest = tf.split(
bounding_boxes, [1, 1, 1, 1, bounding_boxes.shape[-1] - 4], axis=-1
)
output = tf.stack(
[
x1,
1 - x4,
x3,
1 - x2,
rest,
],
axis=-1,
)
output = tf.squeeze(output, axis=1)
return output
def augment_bounding_boxes(
self, bounding_boxes, transformation=None, image=None, **kwargs
):
if self.bounding_box_format is None:
raise ValueError(
"`RandomFlip()` was called with bounding boxes,"
"but no `bounding_box_format` was specified in the constructor."
"Please specify a bounding box format in the constructor. i.e."
"`RandomFlip(bounding_box_format='xyxy')`"
)
bounding_boxes = bounding_box.convert_format(
bounding_boxes,
source=self.bounding_box_format,
target="rel_xyxy",
images=image,
)
bounding_boxes = tf.cond(
transformation["flip_horizontal"],
lambda: RandomFlip._flip_bounding_boxes_horizontal(bounding_boxes),
lambda: bounding_boxes,
)
bounding_boxes = tf.cond(
transformation["flip_vertical"],
lambda: RandomFlip._flip_bounding_boxes_vertical(bounding_boxes),
lambda: bounding_boxes,
)
bounding_boxes = bounding_box.clip_to_image(
bounding_boxes,
bounding_box_format="rel_xyxy",
images=image,
)
bounding_boxes = bounding_box.convert_format(
bounding_boxes,
source="rel_xyxy",
target=self.bounding_box_format,
dtype=self.compute_dtype,
images=image,
)
return bounding_boxes
def augment_segmentation_mask(
self, segmentation_mask, transformation=None, **kwargs
):
return RandomFlip._flip_image(segmentation_mask, transformation)
def compute_output_shape(self, input_shape):
return input_shape
def get_config(self):
config = {
"mode": self.mode,
"seed": self.seed,
"bounding_box_format": self.bounding_box_format,
}
base_config = super().get_config()
return dict(list(base_config.items()) + list(config.items()))
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
import tensorflow as tf
from absl.testing import parameterized
from keras_cv import bounding_box
from keras_cv.layers.preprocessing.random_flip import RandomFlip
class RandomFlipTest(tf.test.TestCase, parameterized.TestCase):
def test_horizontal_flip(self):
np.random.seed(1337)
mock_random = [0.6, 0.6]
inp = np.random.random((2, 5, 8, 3))
expected_output = np.flip(inp, axis=2)
layer = RandomFlip("horizontal")
with unittest.mock.patch.object(
layer._random_generator,
"random_uniform",
side_effect=mock_random,
):
actual_output = layer(inp, training=True)
self.assertAllClose(expected_output, actual_output)
def test_vertical_flip(self):
np.random.seed(1337)
mock_random = [0.6, 0.6]
inp = np.random.random((2, 5, 8, 3))
expected_output = np.flip(inp, axis=1)
layer = RandomFlip("vertical")
with unittest.mock.patch.object(
layer._random_generator,
"random_uniform",
side_effect=mock_random,
):
actual_output = layer(inp, training=True)
self.assertAllClose(expected_output, actual_output)
def test_flip_both(self):
np.random.seed(1337)
mock_random = [0.6, 0.6, 0.6, 0.6]
inp = np.random.random((2, 5, 8, 3))
expected_output = np.flip(inp, axis=2)
expected_output = np.flip(expected_output, axis=1)
layer = RandomFlip("horizontal_and_vertical")
with unittest.mock.patch.object(
layer._random_generator,
"random_uniform",
side_effect=mock_random,
):
actual_output = layer(inp, training=True)
self.assertAllClose(expected_output, actual_output)
def test_random_flip_inference(self):
input_images = np.random.random((2, 5, 8, 3)).astype(np.float32)
expected_output = input_images
layer = RandomFlip()
actual_output = layer(input_images, training=False)
self.assertAllClose(expected_output, actual_output)
def test_random_flip_default(self):
input_images = np.random.random((2, 5, 8, 3)).astype(np.float32)
expected_output = np.flip(input_images, axis=2)
mock_random = [0.6, 0.6, 0.6, 0.6]
layer = RandomFlip()
with unittest.mock.patch.object(
layer._random_generator,
"random_uniform",
side_effect=mock_random,
):
actual_output = layer(input_images, training=True)
self.assertAllClose(expected_output, actual_output)
def test_config_with_custom_name(self):
layer = RandomFlip(name="image_preproc")
config = layer.get_config()
layer_1 = RandomFlip.from_config(config)
self.assertEqual(layer_1.name, layer.name)
def test_random_flip_unbatched_image(self):
input_image = np.random.random((4, 4, 1)).astype(np.float32)
expected_output = np.flip(input_image, axis=0)
mock_random = [0.6, 0.6, 0.6, 0.6]
layer = RandomFlip("vertical")
with unittest.mock.patch.object(
layer._random_generator,
"random_uniform",
side_effect=mock_random,
):
actual_output = layer(input_image, training=True)
self.assertAllClose(expected_output, actual_output)
def test_output_dtypes(self):
inputs = np.array([[[1], [2]], [[3], [4]]], dtype="float64")
layer = RandomFlip()
self.assertAllEqual(layer(inputs).dtype, "float32")
layer = RandomFlip(dtype="uint8")
self.assertAllEqual(layer(inputs).dtype, "uint8")
def test_augment_bbox_batched_input(self):
image = tf.zeros([20, 20, 3])
bboxes = tf.convert_to_tensor(
[[[0, 0, 10, 10], [4, 4, 12, 12]], [[4, 4, 12, 12], [0, 0, 10, 10]]]
)
bboxes = bounding_box.add_class_id(bboxes)
input = {"images": [image, image], "bounding_boxes": bboxes}
mock_random = [0.6, 0.6, 0.6, 0.6]
layer = RandomFlip("horizontal_and_vertical", bounding_box_format="xyxy")
with unittest.mock.patch.object(
layer._random_generator,
"random_uniform",
side_effect=mock_random,
):
output = layer(input, training=True)
expected_output = np.asarray(
[
[[10, 10, 20, 20, 0], [8, 8, 16, 16, 0]],
[[8, 8, 16, 16, 0], [10, 10, 20, 20, 0]],
]
)
expected_output = np.reshape(expected_output, (2, 2, 5))
self.assertAllClose(expected_output, output["bounding_boxes"])
def test_augment_bbox_ragged(self):
image = tf.zeros([2, 20, 20, 3])
bboxes = tf.ragged.constant(
[[[0, 0, 10, 10], [4, 4, 12, 12]], [[0, 0, 10, 10]]], dtype=tf.float32
)
bboxes = bounding_box.add_class_id(bboxes)
input = {"images": image, "bounding_boxes": bboxes}
mock_random = [0.6, 0.6, 0.6]
layer = RandomFlip("horizontal_and_vertical", bounding_box_format="xyxy")
with unittest.mock.patch.object(
layer._random_generator,
"random_uniform",
side_effect=mock_random,
):
output = layer(input, training=True)
expected_output = tf.ragged.constant(
[
[[10, 10, 20, 20, 0], [8, 8, 16, 16, 0]],
[[10, 10, 20, 20, 0]],
],
dtype=tf.float32,
ragged_rank=1,
)
self.assertAllClose(expected_output, output["bounding_boxes"])
def test_augment_segmentation_mask(self):
np.random.seed(1337)
image = np.random.random((1, 20, 20, 3)).astype(np.float32)
mask = np.random.randint(2, size=(1, 20, 20, 1)).astype(np.float32)
input = {"images": image, "segmentation_masks": mask}
# Flip both vertically and horizontally
mock_random = [0.6, 0.6]
layer = RandomFlip("horizontal_and_vertical")
with unittest.mock.patch.object(
layer._random_generator,
"random_uniform",
side_effect=mock_random,
):
output = layer(input, training=True)
expected_mask = np.flip(np.flip(mask, axis=1), axis=2)
self.assertAllClose(expected_mask, output["segmentation_masks"])
def test_ragged_bounding_boxes(self):
input_image = np.random.random((2, 512, 512, 3)).astype(np.float32)
bboxes = tf.ragged.constant(
[
[[200, 200, 400, 400], [100, 100, 300, 300]],
[[200, 200, 400, 400]],
],
dtype=tf.float32,
)
bboxes = bounding_box.add_class_id(bboxes)
input = {"images": input_image, "bounding_boxes": bboxes}
layer = RandomFlip(bounding_box_format="xyxy")
_ = layer(input)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.layers.preprocessing.base_image_augmentation_layer import (
BaseImageAugmentationLayer,
)
from keras_cv.utils import preprocessing
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class RandomGaussianBlur(BaseImageAugmentationLayer):
"""Applies a Gaussian Blur with random strength to an image.
Args:
kernel_size: int, 2 element tuple or 2 element list. x and y dimensions for
the kernel used. If tuple or list, first element is used for the x dimension
and second element is used for y dimension. If int, kernel will be squared.
factor: A tuple of two floats, a single float or a
`keras_cv.FactorSampler`. `factor` controls the extent to which the
image is blurred. Mathematically, `factor` represents the `sigma` value in
a gaussian blur. `factor=0.0` makes this layer perform a no-op
operation, and high values make the blur stronger. In order to
ensure the value is always the same, please pass a tuple with two identical
floats: `(0.5, 0.5)`.
"""
def __init__(self, kernel_size, factor, **kwargs):
super().__init__(**kwargs)
self.factor = preprocessing.parse_factor(
factor, min_value=0.0, max_value=None, param_name="factor"
)
self.kernel_size = kernel_size
if isinstance(kernel_size, (tuple, list)):
self.x = kernel_size[0]
self.y = kernel_size[1]
else:
if isinstance(kernel_size, int):
self.x = self.y = kernel_size
else:
raise ValueError(
"`kernel_size` must be list, tuple or integer "
", got {} ".format(type(self.kernel_size))
)
def get_random_transformation(self, **kwargs):
factor = self.factor()
blur_v = RandomGaussianBlur.get_kernel(factor, self.y)
blur_h = RandomGaussianBlur.get_kernel(factor, self.x)
blur_v = tf.reshape(blur_v, [self.y, 1, 1, 1])
blur_h = tf.reshape(blur_h, [1, self.x, 1, 1])
return (blur_v, blur_h)
def augment_image(self, image, transformation=None, **kwargs):
image = tf.expand_dims(image, axis=0)
num_channels = tf.shape(image)[-1]
blur_v, blur_h = transformation
blur_h = tf.tile(blur_h, [1, 1, num_channels, 1])
blur_v = tf.tile(blur_v, [1, 1, num_channels, 1])
blurred = tf.nn.depthwise_conv2d(
image, blur_h, strides=[1, 1, 1, 1], padding="SAME"
)
blurred = tf.nn.depthwise_conv2d(
blurred, blur_v, strides=[1, 1, 1, 1], padding="SAME"
)
return tf.squeeze(blurred, axis=0)
def augment_bounding_boxes(self, bounding_boxes, **kwargs):
return bounding_boxes
def augment_label(self, label, transformation=None, **kwargs):
return label
def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs):
return segmentation_mask
@staticmethod
def get_kernel(factor, filter_size):
x = tf.cast(
tf.range(-filter_size // 2 + 1, filter_size // 2 + 1), dtype=tf.float32
)
blur_filter = tf.exp(
-tf.pow(x, 2.0) / (2.0 * tf.pow(tf.cast(factor, dtype=tf.float32), 2.0))
)
blur_filter /= tf.reduce_sum(blur_filter)
return blur_filter
def get_config(self):
config = super().get_config()
config.update({"factor": self.factor, "kernel_size": self.kernel_size})
return config
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment