Commit 1a3c83d6 authored by zhanggzh's avatar zhanggzh
Browse files

增加keras-cv模型及训练代码

parent 9846958a
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from tensorflow.keras.__internal__.layers import BaseRandomLayer
from keras_cv.utils import conv_utils
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class DropBlock2D(BaseRandomLayer):
"""Applies DropBlock regularization to input features.
DropBlock is a form of structured dropout, where units in a contiguous
region of a feature map are dropped together. DropBlock works better than
dropout on convolutional layers due to the fact that activation units in
convolutional layers are spatially correlated.
It is advised to use DropBlock after activation in Conv -> BatchNorm -> Activation
block in further layers of the network. For example, the paper mentions using
DropBlock in 3rd and 4th group of ResNet blocks.
Reference:
- [DropBlock: A regularization method for convolutional networks](
https://arxiv.org/abs/1810.12890
)
Args:
rate: float. Probability of dropping a unit. Must be between 0 and 1.
For best results, the value should be between 0.05-0.25.
block_size: integer, or tuple of integers. The size of the block to be
dropped. In case of an integer a square block will be dropped. In case of a
tuple, the numbers are block's (height, width).
Must be bigger than 0, and should not be bigger than the input feature map
size. The paper authors use `block_size=7` for input feature's of size
`14x14xchannels`.
If this value is greater or equal to the input feature map size you will
encounter `nan` values.
seed: integer. To use as random seed.
name: string. The name of the layer.
Usage:
DropBlock2D can be used inside a `tf.keras.Model`:
```python
# (...)
x = Conv2D(32, (1, 1))(x)
x = BatchNormalization()(x)
x = ReLU()(x)
x = DropBlock2D(0.1, block_size=7)(x)
# (...)
```
When used directly, the layer will zero-out some inputs in a contiguous region and
normalize the remaining values.
```python
# Small feature map shape for demonstration purposes:
features = tf.random.stateless_uniform((1, 4, 4, 1), seed=[0, 1])
# Preview the feature map
print(features[..., 0])
# tf.Tensor(
# [[[0.08216608 0.40928006 0.39318466 0.3162533 ]
# [0.34717774 0.73199546 0.56369007 0.9769211 ]
# [0.55243933 0.13101244 0.2941643 0.5130266 ]
# [0.38977218 0.80855536 0.6040567 0.10502195]]], shape=(1, 4, 4),
# dtype=float32)
layer = DropBlock2D(0.1, block_size=2, seed=1234) # Small size for demonstration
output = layer(features, training=True)
# Preview the feature map after dropblock:
print(output[..., 0])
# tf.Tensor(
# [[[0.10955477 0.54570675 0.5242462 0.42167106]
# [0.46290365 0.97599393 0. 0. ]
# [0.7365858 0.17468326 0. 0. ]
# [0.51969624 1.0780739 0.80540895 0.14002927]]], shape=(1, 4, 4),
# dtype=float32)
# We can observe two things:
# 1. A 2x2 block has been dropped
# 2. The inputs have been slightly scaled to account for missing values.
# The number of blocks dropped can vary, between the channels - sometimes no blocks
# will be dropped, and sometimes there will be multiple overlapping blocks.
# Let's present on a larger feature map:
features = tf.random.stateless_uniform((1, 4, 4, 36), seed=[0, 1])
layer = DropBlock2D(0.1, (2, 2), seed=123)
output = layer(features, training=True)
print(output[..., 0]) # no drop
# tf.Tensor(
# [[[0.09136613 0.98085546 0.15265216 0.19690938]
# [0.48835075 0.52433217 0.1661478 0.7067729 ]
# [0.07383626 0.9938906 0.14309917 0.06882786]
# [0.43242374 0.04158871 0.24213943 0.1903095 ]]], shape=(1, 4, 4),
# dtype=float32)
print(output[..., 9]) # drop single block
# tf.Tensor(
# [[[0.14568178 0.01571623 0.9082305 1.0545396 ]
# [0.24126057 0.86874676 0. 0. ]
# [0.44101703 0.29805306 0. 0. ]
# [0.56835717 0.04925899 0.6745584 0.20550345]]], shape=(1, 4, 4), dtype=float32)
print(output[..., 22]) # drop two blocks
# tf.Tensor(
# [[[0.69479376 0.49463132 1.0627024 0.58349967]
# [0. 0. 0.36143216 0.58699244]
# [0. 0. 0. 0. ]
# [0.0315055 1.0117861 0. 0. ]]], shape=(1, 4, 4),
# dtype=float32)
print(output[..., 29]) # drop two blocks with overlap
# tf.Tensor(
# [[[0.2137237 0.9120104 0.9963533 0.33937347]
# [0.21868704 0.44030213 0.5068906 0.20034194]
# [0. 0. 0. 0.5915383 ]
# [0. 0. 0. 0.9526224 ]]], shape=(1, 4, 4),
# dtype=float32)
```
"""
def __init__(
self,
rate,
block_size,
seed=None,
**kwargs,
):
super().__init__(seed=seed, **kwargs)
if not 0.0 <= rate <= 1.0:
raise ValueError(
f"rate must be a number between 0 and 1. " f"Received: {rate}"
)
self._rate = rate
self._dropblock_height, self._dropblock_width = conv_utils.normalize_tuple(
value=block_size, n=2, name="block_size", allow_zero=False
)
self.seed = seed
def call(self, x, training=None):
if not training or self._rate == 0.0:
return x
_, height, width, _ = tf.split(tf.shape(x), 4)
# Unnest scalar values
height = tf.squeeze(height)
width = tf.squeeze(width)
dropblock_height = tf.math.minimum(self._dropblock_height, height)
dropblock_width = tf.math.minimum(self._dropblock_width, width)
gamma = (
self._rate
* tf.cast(width * height, dtype=tf.float32)
/ tf.cast(dropblock_height * dropblock_width, dtype=tf.float32)
/ tf.cast(
(width - self._dropblock_width + 1)
* (height - self._dropblock_height + 1),
tf.float32,
)
)
# Forces the block to be inside the feature map.
w_i, h_i = tf.meshgrid(tf.range(width), tf.range(height))
valid_block = tf.logical_and(
tf.logical_and(
w_i >= int(dropblock_width // 2),
w_i < width - (dropblock_width - 1) // 2,
),
tf.logical_and(
h_i >= int(dropblock_height // 2),
h_i < width - (dropblock_height - 1) // 2,
),
)
valid_block = tf.reshape(valid_block, [1, height, width, 1])
random_noise = self._random_generator.random_uniform(
tf.shape(x), dtype=tf.float32
)
valid_block = tf.cast(valid_block, dtype=tf.float32)
seed_keep_rate = tf.cast(1 - gamma, dtype=tf.float32)
block_pattern = (1 - valid_block + seed_keep_rate + random_noise) >= 1
block_pattern = tf.cast(block_pattern, dtype=tf.float32)
window_size = [1, self._dropblock_height, self._dropblock_width, 1]
# Double negative and max_pool is essentially min_pooling
block_pattern = -tf.nn.max_pool(
-block_pattern,
ksize=window_size,
strides=[1, 1, 1, 1],
padding="SAME",
)
# Slightly scale the values, to account for magnitude change
percent_ones = tf.cast(tf.reduce_sum(block_pattern), tf.float32) / tf.cast(
tf.size(block_pattern), tf.float32
)
return x / tf.cast(percent_ones, x.dtype) * tf.cast(block_pattern, x.dtype)
def get_config(self):
config = {
"rate": self._rate,
"block_size": (self._dropblock_height, self._dropblock_width),
"seed": self.seed,
}
base_config = super().get_config()
return dict(list(base_config.items()) + list(config.items()))
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.layers.regularization.dropblock_2d import DropBlock2D
class DropBlock2DTest(tf.test.TestCase):
FEATURE_SHAPE = (1, 14, 14, 256) # Shape of ResNet block group 3
rng = tf.random.Generator.from_non_deterministic_state()
def test_layer_not_created_with_invalid_block_size(self):
invalid_sizes = [0, -10, (5, -2), (0, 7), (1, 2, 3, 4)]
for size in invalid_sizes:
with self.assertRaises(ValueError):
DropBlock2D(block_size=size, rate=0.1)
def test_layer_not_created_with_invalid_rate(self):
invalid_rates = [1.1, -0.1]
for rate in invalid_rates:
with self.assertRaises(ValueError):
DropBlock2D(rate=rate, block_size=7)
def test_input_unchanged_in_eval_mode(self):
dummy_inputs = self.rng.uniform(shape=self.FEATURE_SHAPE)
layer = DropBlock2D(rate=0.1, block_size=7)
output = layer(dummy_inputs, training=False)
self.assertAllClose(dummy_inputs, output)
def test_input_unchanged_with_rate_equal_to_zero(self):
dummy_inputs = self.rng.uniform(shape=self.FEATURE_SHAPE)
layer = DropBlock2D(rate=0.0, block_size=7)
output = layer(dummy_inputs, training=True)
self.assertAllClose(dummy_inputs, output)
def test_input_gets_partially_zeroed_out_in_train_mode(self):
dummy_inputs = self.rng.uniform(shape=self.FEATURE_SHAPE)
layer = DropBlock2D(rate=0.1, block_size=7)
output = layer(dummy_inputs, training=True)
num_input_zeros = self._count_zeros(dummy_inputs)
num_output_zeros = self._count_zeros(output)
self.assertGreater(num_output_zeros, num_input_zeros)
def test_batched_input_gets_partially_zeroed_out_in_train_mode(self):
batched_shape = (4, *self.FEATURE_SHAPE[1:])
dummy_inputs = self.rng.uniform(shape=batched_shape)
layer = DropBlock2D(rate=0.1, block_size=7)
output = layer(dummy_inputs, training=True)
num_input_zeros = self._count_zeros(dummy_inputs)
num_output_zeros = self._count_zeros(output)
self.assertGreater(num_output_zeros, num_input_zeros)
def test_input_gets_partially_zeroed_out_with_non_square_block_size(self):
dummy_inputs = self.rng.uniform(shape=self.FEATURE_SHAPE)
layer = DropBlock2D(rate=0.1, block_size=(7, 10))
output = layer(dummy_inputs, training=True)
num_input_zeros = self._count_zeros(dummy_inputs)
num_output_zeros = self._count_zeros(output)
self.assertGreater(num_output_zeros, num_input_zeros)
@staticmethod
def _count_zeros(tensor: tf.Tensor) -> tf.Tensor:
return tf.size(tensor) - tf.math.count_nonzero(tensor, dtype=tf.int32)
def test_works_with_xla(self):
dummy_inputs = self.rng.uniform(shape=self.FEATURE_SHAPE)
layer = DropBlock2D(rate=0.1, block_size=7)
@tf.function(jit_compile=True)
def apply(x):
return layer(x, training=True)
apply(dummy_inputs)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from tensorflow.keras import layers
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class SqueezeAndExcite2D(layers.Layer):
"""
Implements Squeeze and Excite block as in
[Squeeze-and-Excitation Networks](https://arxiv.org/pdf/1709.01507.pdf).
This layer tries to use a content aware mechanism to assign channel-wise
weights adaptively. It first squeezes the feature maps into a single value
using global average pooling, which are then fed into two Conv1D layers,
which act like fully-connected layers. The first layer reduces the
dimensionality of the feature maps by a factor of `ratio`, whereas the second
layer restores it to its original value.
The resultant values are the adaptive weights for each channel. These
weights are then multiplied with the original inputs to scale the outputs
based on their individual weightages.
Args:
filters: Number of input and output filters. The number of input and
output filters is same.
ratio: Ratio for bottleneck filters. Number of bottleneck filters =
filters * ratio. Defaults to 0.25.
squeeze_activation: (Optional) String, callable (or tf.keras.layers.Layer) or
tf.keras.activations.Activation instance denoting activation to
be applied after squeeze convolution. Defaults to `relu`.
excite_activation: (Optional) String, callable (or tf.keras.layers.Layer) or
tf.keras.activations.Activation instance denoting activation to
be applied after excite convolution. Defaults to `sigmoid`.
Usage:
```python
# (...)
input = tf.ones((1, 5, 5, 16), dtype=tf.float32)
x = tf.keras.layers.Conv2D(16, (3, 3))(input)
output = keras_cv.layers.SqueezeAndExciteBlock(16)(x)
# (...)
```
"""
def __init__(
self,
filters,
ratio=0.25,
squeeze_activation="relu",
excite_activation="sigmoid",
**kwargs,
):
super().__init__(**kwargs)
self.filters = filters
if ratio <= 0.0 or ratio >= 1.0:
raise ValueError(f"`ratio` should be a float between 0 and 1. Got {ratio}")
if filters <= 0 or not isinstance(filters, int):
raise ValueError(f"`filters` should be a positive integer. Got {filters}")
self.ratio = ratio
self.bottleneck_filters = int(self.filters * self.ratio)
self.squeeze_activation = squeeze_activation
self.excite_activation = excite_activation
self.global_average_pool = layers.GlobalAveragePooling2D(keepdims=True)
self.squeeze_conv = layers.Conv2D(
self.bottleneck_filters,
(1, 1),
activation=self.squeeze_activation,
)
self.excite_conv = layers.Conv2D(
self.filters, (1, 1), activation=self.excite_activation
)
def call(self, inputs, training=True):
x = self.global_average_pool(inputs) # x: (batch_size, 1, 1, filters)
x = self.squeeze_conv(x) # x: (batch_size, 1, 1, bottleneck_filters)
x = self.excite_conv(x) # x: (batch_size, 1, 1, filters)
x = tf.math.multiply(x, inputs) # x: (batch_size, h, w, filters)
return x
def get_config(self):
config = {
"filters": self.filters,
"ratio": self.ratio,
"squeeze_activation": self.squeeze_activation,
"excite_activation": self.excite_activation,
}
base_config = super().get_config()
return dict(list(base_config.items()) + list(config.items()))
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.layers import SqueezeAndExcite2D
class SqueezeAndExcite2DTest(tf.test.TestCase):
def test_maintains_shape(self):
input_shape = (1, 4, 4, 8)
inputs = tf.random.uniform(input_shape)
layer = SqueezeAndExcite2D(8, ratio=0.25)
outputs = layer(inputs)
self.assertEquals(inputs.shape, outputs.shape)
def test_custom_activation(self):
def custom_activation(x):
return x * tf.random.uniform(x.shape, seed=42)
input_shape = (1, 4, 4, 8)
inputs = tf.random.uniform(input_shape)
layer = SqueezeAndExcite2D(
8,
ratio=0.25,
squeeze_activation=custom_activation,
excite_activation=custom_activation,
)
outputs = layer(inputs)
self.assertEquals(inputs.shape, outputs.shape)
def test_raises_invalid_ratio_error(self):
with self.assertRaisesRegex(
ValueError, "`ratio` should be a float" " between 0 and 1. Got (.*?)"
):
_ = SqueezeAndExcite2D(8, ratio=1.1)
def test_raises_invalid_filters_error(self):
with self.assertRaisesRegex(
ValueError, "`filters` should be a positive" " integer. Got (.*?)"
):
_ = SqueezeAndExcite2D(-8.7)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class StochasticDepth(tf.keras.layers.Layer):
"""
Implements the Stochastic Depth layer. It randomly drops residual branches
in residual architectures. It is used as a drop-in replacement for addition
operation. Note that this layer DOES NOT drop a residual block across
individual samples but across the entire batch.
Reference:
- [Deep Networks with Stochastic Depth](https://arxiv.org/abs/1603.09382).
- Docstring taken from [stochastic_depth.py](https://tinyurl.com/mr3y2af6)
Args:
rate: float, the probability of the residual branch being dropped.
Usage:
`StochasticDepth` can be used in a residual network as follows:
```python
# (...)
input = tf.ones((1, 3, 3, 1), dtype=tf.float32)
residual = tf.keras.layers.Conv2D(1, 1)(input)
output = keras_cv.layers.StochasticDepth()([input, residual])
# (...)
```
At train time, StochasticDepth returns:
$$
x[0] + b_l * x[1],
$$
where $b_l$ is a random Bernoulli variable with probability
$P(b_l = 1) = rate$. At test time, StochasticDepth rescales the activations
of the residual branch based on the drop rate ($rate$):
$$
x[0] + (1 - rate) * x[1]
$$
"""
def __init__(self, rate=0.5, **kwargs):
super().__init__(**kwargs)
self.rate = rate
self.survival_probability = 1.0 - self.rate
def call(self, x, training=None):
if len(x) != 2:
raise ValueError(
f"""Input must be a list of length 2. """
f"""Got input with length={len(x)}."""
)
shortcut, residual = x
b_l = tf.keras.backend.random_bernoulli([], p=self.survival_probability)
if training:
return shortcut + b_l * residual
else:
return shortcut + self.survival_probability * residual
def get_config(self):
config = {"rate": self.rate}
base_config = super().get_config()
return dict(list(base_config.items()) + list(config.items()))
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.layers.regularization.stochastic_depth import StochasticDepth
class StochasticDepthTest(tf.test.TestCase):
FEATURE_SHAPE = (1, 14, 14, 256)
def test_inputs_have_two_elements(self):
inputs = tf.random.uniform(self.FEATURE_SHAPE, 0, 1)
inputs = [inputs, inputs, inputs]
with self.assertRaisesRegex(
ValueError, "Input must be a list of length 2. " "Got input with length=3."
):
StochasticDepth()(inputs)
def test_eval_mode(self):
inputs = tf.random.uniform(self.FEATURE_SHAPE, 0, 1)
inputs = [inputs, inputs]
rate = 0.5
outputs = StochasticDepth(rate=rate)(inputs, training=False)
self.assertAllClose(inputs[0] * (1 + rate), outputs)
def test_training_mode(self):
inputs = tf.random.uniform(self.FEATURE_SHAPE, 0, 1)
inputs = [inputs, inputs]
rate = 0.5
outputs = StochasticDepth(rate=rate)(inputs, training=True)
outputs_sum = tf.math.reduce_sum(outputs)
inputs_sum = tf.math.reduce_sum(inputs[0])
self.assertIn(outputs_sum, [inputs_sum, inputs_sum * 2])
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import inspect
import tensorflow as tf
from absl.testing import parameterized
from keras_cv import core
from keras_cv import layers as cv_layers
from keras_cv.models.segmentation.__internal__ import SegmentationHead
def exhaustive_compare(obj1, obj2):
classes_supporting_get_config = (
core.FactorSampler,
tf.keras.layers.Layer,
cv_layers.BaseImageAugmentationLayer,
)
# If both objects are either one of list or tuple then their individual
# elements also must be checked exhaustively.
if isinstance(obj1, (list, tuple)) and isinstance(obj2, (list, tuple)):
# Length based checks.
if len(obj1) == 0 and len(obj2) == 0:
return True
if len(obj1) != len(obj2):
return False
# Exhaustive check for all elements.
for v1, v2 in list(zip(obj1, obj2)):
return exhaustive_compare(v1, v2)
# If the objects are dicts then we simply call the `config_equals` function
# which supports dicts.
elif isinstance(obj1, (dict)) and isinstance(obj2, (dict)):
return config_equals(v1, v2)
# If both objects are subclasses of Keras classes that support `get_config`
# method, then we compare their individual attributes using `config_equals`.
elif isinstance(obj1, classes_supporting_get_config) and isinstance(
obj2, classes_supporting_get_config
):
return config_equals(obj1.get_config(), obj2.get_config())
# Following checks are if either of the objects are _functions_, not methods
# or callables, since Layers and other unforeseen objects may also fit into
# this category. Specifically for Keras activation functions.
elif inspect.isfunction(obj1) and inspect.isfunction(obj2):
return tf.keras.utils.serialize_keras_object(
obj1
) == tf.keras.utils.serialize_keras_object(obj2)
elif inspect.isfunction(obj1) and not inspect.isfunction(obj2):
return tf.keras.utils.serialize_keras_object(obj1) == obj2
elif inspect.isfunction(obj2) and not inspect.isfunction(obj1):
return obj1 == tf.keras.utils.serialize_keras_object(obj2)
# Lastly check for primitive datatypes and objects that don't need
# additional preprocessing.
else:
return obj1 == obj2
def config_equals(config1, config2):
# Both `config1` and `config2` are python dicts. So the first check is to
# see if both of them have same keys.
if config1.keys() != config2.keys():
return False
# Iterate over all keys of the configs and compare each entry exhaustively.
for key in list(config1.keys()):
v1, v2 = config1[key], config2[key]
if not exhaustive_compare(v1, v2):
return False
return True
class SerializationTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.named_parameters(
("Augmenter", cv_layers.Augmenter, {"layers": [cv_layers.Grayscale()]}),
("AutoContrast", cv_layers.AutoContrast, {"value_range": (0, 255)}),
("ChannelShuffle", cv_layers.ChannelShuffle, {"seed": 1}),
("CutMix", cv_layers.CutMix, {"seed": 1}),
("Equalization", cv_layers.Equalization, {"value_range": (0, 255)}),
("Grayscale", cv_layers.Grayscale, {}),
("GridMask", cv_layers.GridMask, {"seed": 1}),
("MixUp", cv_layers.MixUp, {"seed": 1}),
("Mosaic", cv_layers.Mosaic, {"seed": 1}),
(
"RandomChannelShift",
cv_layers.RandomChannelShift,
{"value_range": (0, 255), "factor": 0.5},
),
(
"Posterization",
cv_layers.Posterization,
{"bits": 3, "value_range": (0, 255)},
),
(
"RandomColorDegeneration",
cv_layers.RandomColorDegeneration,
{"factor": 0.5, "seed": 1},
),
(
"RandomCutout",
cv_layers.RandomCutout,
{"height_factor": 0.2, "width_factor": 0.2, "seed": 1},
),
(
"RandomHue",
cv_layers.RandomHue,
{"factor": 0.5, "value_range": (0, 255), "seed": 1},
),
(
"RandomSaturation",
cv_layers.RandomSaturation,
{"factor": 0.5, "seed": 1},
),
(
"RandomSharpness",
cv_layers.RandomSharpness,
{"factor": 0.5, "value_range": (0, 255), "seed": 1},
),
(
"RandomShear",
cv_layers.RandomShear,
{"x_factor": 0.3, "x_factor": 0.3, "seed": 1},
),
("Solarization", cv_layers.Solarization, {"value_range": (0, 255)}),
(
"RandAugment",
cv_layers.RandAugment,
{
"value_range": (0, 255),
"magnitude": 0.5,
"augmentations_per_image": 3,
"rate": 0.3,
"magnitude_stddev": 0.1,
},
),
(
"RandomAugmentationPipeline",
cv_layers.RandomAugmentationPipeline,
{
"layers": [
cv_layers.RandomSaturation(factor=0.5),
cv_layers.RandomColorDegeneration(factor=0.5),
],
"augmentations_per_image": 1,
"rate": 1.0,
},
),
(
"RandomChoice",
cv_layers.RandomChoice,
{"layers": [], "seed": 3, "auto_vectorize": False},
),
(
"RandomColorJitter",
cv_layers.RandomColorJitter,
{
"value_range": (0, 255),
"brightness_factor": (-0.2, 0.5),
"contrast_factor": (0.5, 0.9),
"saturation_factor": (0.5, 0.9),
"hue_factor": (0.5, 0.9),
"seed": 1,
},
),
(
"RandomCropAndResize",
cv_layers.RandomCropAndResize,
{
"target_size": (224, 224),
"crop_area_factor": (0.8, 1.0),
"aspect_ratio_factor": (3 / 4, 4 / 3),
},
),
(
"RandomlyZoomedCrop",
cv_layers.RandomlyZoomedCrop,
{
"height": 224,
"width": 224,
"zoom_factor": (0.8, 1.0),
"aspect_ratio_factor": (3 / 4, 4 / 3),
},
),
(
"DropBlock2D",
cv_layers.DropBlock2D,
{"rate": 0.1, "block_size": (7, 7), "seed": 1234},
),
(
"StochasticDepth",
cv_layers.StochasticDepth,
{"rate": 0.1},
),
(
"SqueezeAndExcite2D",
cv_layers.SqueezeAndExcite2D,
{
"filters": 16,
"ratio": 0.25,
"squeeze_activation": tf.keras.layers.ReLU(),
"excite_activation": tf.keras.activations.relu,
},
),
(
"DropPath",
cv_layers.DropPath,
{
"rate": 0.2,
},
),
(
"MaybeApply",
cv_layers.MaybeApply,
{
"rate": 0.5,
"layer": None,
"seed": 1234,
},
),
(
"RandomJpegQuality",
cv_layers.RandomJpegQuality,
{"factor": (75, 100)},
),
(
"AugMix",
cv_layers.AugMix,
{
"value_range": (0, 255),
"severity": 0.3,
"num_chains": 3,
"chain_depth": -1,
"alpha": 1.0,
"seed": 1,
},
),
(
"NonMaxSuppression",
cv_layers.NonMaxSuppression,
{
"classes": 5,
"bounding_box_format": "xyxy",
"confidence_threshold": 0.5,
"iou_threshold": 0.5,
"max_detections": 100,
"max_detections_per_class": 100,
},
),
(
"RandomRotation",
cv_layers.RandomRotation,
{
"factor": 0.5,
},
),
(
"SegmentationHead",
SegmentationHead,
{
"classes": 11,
"convs": 3,
"filters": 256,
"activations": tf.keras.activations.relu,
"output_scale_factor": None,
},
),
(
"SpatialPyramidPooling",
cv_layers.SpatialPyramidPooling,
{
"level": 3,
"dilation_rates": [6, 12, 18],
"num_channels": 256,
"activation": "relu",
"dropout": 0.1,
},
),
)
def test_layer_serialization(self, layer_cls, init_args):
layer = layer_cls(**init_args)
config = layer.get_config()
self.assertAllInitParametersAreInConfig(layer_cls, config)
model = tf.keras.models.Sequential(layer)
model_config = model.get_config()
reconstructed_model = tf.keras.Sequential().from_config(model_config)
reconstructed_layer = reconstructed_model.layers[0]
self.assertTrue(
config_equals(layer.get_config(), reconstructed_layer.get_config())
)
def assertAllInitParametersAreInConfig(self, layer_cls, config):
excluded_name = ["args", "kwargs", "*"]
parameter_names = {
v
for v in inspect.signature(layer_cls).parameters.keys()
if v not in excluded_name
}
intersection_with_config = {v for v in config.keys() if v in parameter_names}
self.assertSetEqual(parameter_names, intersection_with_config)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Any
from typing import List
from typing import Mapping
import tensorflow as tf
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class SpatialPyramidPooling(tf.keras.layers.Layer):
"""Implements the Atrous Spatial Pyramid Pooling.
References:
[Rethinking Atrous Convolution for Semantic Image Segmentation](
https://arxiv.org/pdf/1706.05587.pdf)
[Encoder-Decoder with Atrous Separable Convolution for Semantic Image
Segmentation](https://arxiv.org/pdf/1802.02611.pdf)
inp = tf.keras.layers.Input((384, 384, 3))
backbone = tf.keras.applications.EfficientNetB0(input_tensor=inp, include_top=False)
layer_names = ['block2b_add', 'block3b_add', 'block5c_add', 'top_activation']
backbone_outputs = {}
for i, layer_name in enumerate(layer_names):
backbone_outputs[i+2] = backbone.get_layer(layer_name).output
# output_dict is a dict with 4 as keys, since it only process the level 4 backbone
# inputs
output_dict = keras_cv.layers.SpatialPyramidPooling(
level=4, dilation_rates=[6, 12, 18])(backbone_outputs)
# output[4].shape = [None, 16, 16, 256]
"""
def __init__(
self,
level: int,
dilation_rates: List[int],
num_channels: int = 256,
activation: str = "relu",
dropout: float = 0.0,
**kwargs,
):
"""Initializes an Atrous Spatial Pyramid Pooling layer.
Args:
level: An `int` level to apply spatial pyramid pooling. This will be used to
get the exact input tensor from the input dict in `call()`.
dilation_rates: A `list` of integers for parallel dilated conv. Usually a
sample choice of rates are [6, 12, 18].
num_channels: An `int` number of output channels. Default to 256.
activation: A `str` activation to be used. Default to 'relu'.
dropout: A `float` for the dropout rate of the final projection output after
the activations and batch norm. Default to 0.0, which means no dropout is
applied to the output.
**kwargs: Additional keyword arguments to be passed.
"""
super().__init__(**kwargs)
self.level = level
self.dilation_rates = dilation_rates
self.num_channels = num_channels
self.activation = activation
self.dropout = dropout
def build(self, input_shape):
# Retrieve the input at the level so that we can get the exact shape.
if not isinstance(input_shape, dict):
raise ValueError(
"SpatialPyramidPooling expects input features to be a dict with int keys, "
f"received {input_shape}"
)
if self.level not in input_shape:
raise ValueError(
f"SpatialPyramidPooling expect the input dict to contain key {self.level}, "
f"received {input_shape}"
)
input_shape_at_level = input_shape[self.level]
height = input_shape_at_level[1]
width = input_shape_at_level[2]
channels = input_shape_at_level[3]
# This is the parallel networks that process the input features with different
# dilation rates. The output from each channel will be merged together and feed
# to the output.
self.aspp_parallel_channels = []
# Channel1 with Conv2D and 1x1 kernel size.
conv_sequential = tf.keras.Sequential(
[
tf.keras.layers.Conv2D(
filters=self.num_channels, kernel_size=(1, 1), use_bias=False
),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.Activation(self.activation),
]
)
self.aspp_parallel_channels.append(conv_sequential)
# Channel 2 and afterwards are based on self.dilation_rates, and each of them
# will have conv2D with 3x3 kernel size.
for dilation_rate in self.dilation_rates:
conv_sequential = tf.keras.Sequential(
[
tf.keras.layers.Conv2D(
filters=self.num_channels,
kernel_size=(3, 3),
padding="same",
dilation_rate=dilation_rate,
use_bias=False,
),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.Activation(self.activation),
]
)
self.aspp_parallel_channels.append(conv_sequential)
# Last channel is the global average pooling with conv2D 1x1 kernel.
pool_sequential = tf.keras.Sequential(
[
tf.keras.layers.GlobalAveragePooling2D(),
tf.keras.layers.Reshape((1, 1, channels)),
tf.keras.layers.Conv2D(
filters=self.num_channels, kernel_size=(1, 1), use_bias=False
),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.Activation(self.activation),
tf.keras.layers.Resizing(height, width, interpolation="bilinear"),
]
)
self.aspp_parallel_channels.append(pool_sequential)
# Final projection layers
self.projection = tf.keras.Sequential(
[
tf.keras.layers.Conv2D(
filters=self.num_channels, kernel_size=(1, 1), use_bias=False
),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.Activation(self.activation),
tf.keras.layers.Dropout(rate=self.dropout),
]
)
def call(self, inputs, training=None):
"""Calls the Atrous Spatial Pyramid Pooling layer on an input.
The input the of the layer will be a dict of {`level`, `tf.Tensor`}, and layer
will pick the actual input based on the `level` from its init args.
The output of the layer will be a dict of {`level`, `tf.Tensor`} with only one level.
Args:
inputs: A `dict` of `tf.Tensor` where
- key: A `int` of the level of the multilevel feature maps.
- values: A `tf.Tensor` of shape [batch, height_l, width_l,
filter_size].
Returns:
A `dict` of `tf.Tensor` where
- key: A `int` of the level of the multilevel feature maps.
- values: A `tf.Tensor` of output of SpatialPyramidPooling module. The shape
of the output should be [batch, height_l, width_l, num_channels]
"""
if not isinstance(inputs, dict):
raise ValueError(
"SpatialPyramidPooling expects input features to be a dict with int keys, "
f"received {inputs}"
)
if self.level not in inputs:
raise ValueError(
f"SpatialPyramidPooling expect the input dict to contain key {self.level}, "
f"received {inputs}"
)
input_at_level = inputs[self.level]
result = []
for channel in self.aspp_parallel_channels:
result.append(
tf.cast(
channel(input_at_level, training=training), input_at_level.dtype
)
)
result = tf.concat(result, axis=-1)
result = self.projection(result, training=training)
return {self.level: result}
def get_config(self) -> Mapping[str, Any]:
config = {
"level": self.level,
"dilation_rates": self.dilation_rates,
"num_channels": self.num_channels,
"activation": self.activation,
"dropout": self.dropout,
}
base_config = super().get_config()
return dict(list(base_config.items()) + list(config.items()))
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.layers import SpatialPyramidPooling
class SpatialPyramidPoolingTest(tf.test.TestCase):
def test_return_type_and_shape(self):
layer = SpatialPyramidPooling(level=4, dilation_rates=[6, 12, 18])
c2 = tf.ones([2, 64, 64, 3])
c3 = tf.ones([2, 32, 32, 3])
c4 = tf.ones([2, 16, 16, 3])
c5 = tf.ones([2, 8, 8, 3])
inputs = {2: c2, 3: c3, 4: c4, 5: c5}
output = layer(inputs, training=True)
self.assertTrue(isinstance(output, dict))
self.assertLen(output, 1)
self.assertEquals(output[4].shape, [2, 16, 16, 256])
def test_with_keras_tensor(self):
layer = SpatialPyramidPooling(level=4, dilation_rates=[6, 12, 18])
c2 = tf.keras.layers.Input([64, 64, 3])
c3 = tf.keras.layers.Input([32, 32, 3])
c4 = tf.keras.layers.Input([16, 16, 3])
c5 = tf.keras.layers.Input([8, 8, 3])
inputs = {2: c2, 3: c3, 4: c4, 5: c5}
output = layer(inputs, training=True)
self.assertTrue(isinstance(output, dict))
self.assertLen(output, 1)
self.assertEquals(output[4].shape, [None, 16, 16, 256])
def test_invalid_input_type(self):
layer = SpatialPyramidPooling(level=4, dilation_rates=[6, 12, 18])
c4 = tf.keras.layers.Input([16, 16, 3])
with self.assertRaisesRegexp(
ValueError,
"SpatialPyramidPooling expects input features to be a dict with int keys",
):
layer(c4, training=True)
def test_invalid_input_level(self):
layer = SpatialPyramidPooling(level=4, dilation_rates=[6, 12, 18])
c2 = tf.keras.layers.Input([64, 64, 3])
c3 = tf.keras.layers.Input([32, 32, 3])
c5 = tf.keras.layers.Input([8, 8, 3])
inputs = {2: c2, 3: c3, 5: c5}
with self.assertRaisesRegexp(
ValueError, "SpatialPyramidPooling expect the input dict to contain key 4"
):
layer(inputs, training=True)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from keras_cv.losses.focal import FocalLoss
from keras_cv.losses.iou_loss import IoULoss
from keras_cv.losses.simclr_loss import SimCLRLoss
from keras_cv.losses.smooth_l1 import SmoothL1Loss
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
import tensorflow.keras.backend as K
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class FocalLoss(tf.keras.losses.Loss):
"""Implements Focal loss
Focal loss is a modified cross-entropy designed to perform better with
class imbalance. For this reason, it's commonly used with object detectors.
Args:
alpha: a float value between 0 and 1 representing a weighting factor
used to deal with class imbalance. Positive classes and negative
classes have alpha and (1 - alpha) as their weighting factors
respectively. Defaults to 0.25.
gamma: a positive float value representing the tunable focusing
parameter. Defaults to 2.
from_logits: Whether `y_pred` is expected to be a logits tensor. By
default, `y_pred` is assumed to encode a probability distribution.
Default to `False`.
label_smoothing: Float in `[0, 1]`. If higher than 0 then smooth the
labels by squeezing them towards `0.5`, i.e., using `1. - 0.5 * label_smoothing`
for the target class and `0.5 * label_smoothing` for the non-target
class.
References:
- [Focal Loss paper](https://arxiv.org/abs/1708.02002)
Standalone usage:
```python
y_true = tf.random.uniform([10], 0, maxval=4)
y_pred = tf.random.uniform([10], 0, maxval=4)
loss = FocalLoss()
loss(y_true, y_pred).numpy()
```
Usage with the `compile()` API:
```python
model.compile(optimizer='adam', loss=keras_cv.losses.FocalLoss())
```
"""
def __init__(
self,
alpha=0.25,
gamma=2,
from_logits=False,
label_smoothing=0,
**kwargs,
):
super().__init__(**kwargs)
self._alpha = float(alpha)
self._gamma = float(gamma)
self.from_logits = from_logits
self.label_smoothing = label_smoothing
def _smooth_labels(self, y_true):
return y_true * (1.0 - self.label_smoothing) + 0.5 * self.label_smoothing
def call(self, y_true, y_pred):
y_pred = tf.convert_to_tensor(y_pred)
y_true = tf.cast(y_true, y_pred.dtype)
if self.label_smoothing:
y_true = self._smooth_labels(y_true)
if self.from_logits:
y_pred = tf.nn.sigmoid(y_pred)
cross_entropy = K.binary_crossentropy(y_true, y_pred)
alpha = tf.where(tf.equal(y_true, 1.0), self._alpha, (1.0 - self._alpha))
pt = y_true * y_pred + (1.0 - y_true) * (1.0 - y_pred)
loss = alpha * tf.pow(1.0 - pt, self._gamma) * cross_entropy
# In most losses you mean over the final axis to achieve a scalar
# Focal loss however is a special case in that it is meant to focus on
# a small number of hard examples in a batch. Most of the time this
# comes in the form of thousands of background class boxes and a few
# positive boxes.
# If you mean over the final axis you will get a number close to 0,
# which will encourage your model to exclusively predict background
# class boxes.
return K.sum(loss, axis=-1)
def get_config(self):
config = super().get_config()
config.update(
{
"alpha": self.alpha,
"gamma": self.gamma,
"from_logits": self.from_logits,
"label_smoothing": self.label_smoothing,
}
)
return config
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.losses import FocalLoss
class FocalTest(tf.test.TestCase):
def test_output_shape(self):
y_true = tf.cast(
tf.random.uniform(shape=[2, 5], minval=0, maxval=2, dtype=tf.int32),
tf.float32,
)
y_pred = tf.random.uniform(shape=[2, 5], minval=0, maxval=1, dtype=tf.float32)
focal_loss = FocalLoss(reduction="sum")
self.assertAllEqual(focal_loss(y_true, y_pred).shape, [])
def test_output_shape_reduction_none(self):
y_true = tf.cast(
tf.random.uniform(shape=[2, 5], minval=0, maxval=2, dtype=tf.int32),
tf.float32,
)
y_pred = tf.random.uniform(shape=[2, 5], minval=0, maxval=1, dtype=tf.float32)
focal_loss = FocalLoss(reduction="none")
self.assertAllEqual(
focal_loss(y_true, y_pred).shape,
[
2,
],
)
def test_output_shape_from_logits(self):
y_true = tf.cast(
tf.random.uniform(shape=[2, 5], minval=0, maxval=2, dtype=tf.int32),
tf.float32,
)
y_pred = tf.random.uniform(
shape=[2, 5], minval=-10, maxval=10, dtype=tf.float32
)
focal_loss = FocalLoss(reduction="none", from_logits=True)
self.assertAllEqual(
focal_loss(y_true, y_pred).shape,
[
2,
],
)
def test_from_logits_argument(self):
y_true = tf.random.uniform((2, 8, 10))
y_logits = tf.random.uniform((2, 8, 10), minval=-1000, maxval=1000)
y_pred = tf.nn.sigmoid(y_logits)
focal_loss_on_logits = FocalLoss(from_logits=True)
focal_loss = FocalLoss()
self.assertAllClose(
focal_loss_on_logits(y_true, y_logits), focal_loss(y_true, y_pred)
)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv import bounding_box
class IoULoss(tf.keras.losses.Loss):
"""Implements the IoU Loss
IoU loss is commonly used for object detection. This loss aims to directly
optimize the IoU score between true boxes and predicted boxes. The length of the
last dimension should be atleast 4 to represent the bounding boxes.
Args:
bounding_box_format: a case-insensitive string which is one of `"xyxy"`,
`"rel_xyxy"`, `"xyWH"`, `"center_xyWH"`, `"yxyx"`, `"rel_yxyx"`.
Each bounding box is defined by at least these 4 values. The inputs
may contain additional information such as classes and confidence after
these 4 values but these values will be ignored while calculating
this loss. For detailed information on the supported formats, see the
[KerasCV bounding box documentation](https://keras.io/api/keras_cv/bounding_box/formats/).
mode: must be one of
- `"linear"`. The loss will be calculated as 1 - iou
- `"squared"`. The loss will be calculated as 1 - iou<sup>2</sup>
- `"log"`. The loss will be calculated as -ln(iou)
Defaults to "log".
References:
- [UnitBox paper](https://arxiv.org/pdf/1608.01471)
Sample Usage:
```python
y_true = tf.random.uniform((5, 10, 5), minval=0, maxval=10, dtype=tf.dtypes.int32)
y_pred = tf.random.uniform((5, 10, 4), minval=0, maxval=10, dtype=tf.dtypes.int32)
loss = IoULoss(bounding_box_format = "xyWH")
loss(y_true, y_pred).numpy()
```
Usage with the `compile()` API:
```python
model.compile(optimizer='adam', loss=keras_cv.losses.IoULoss())
```
"""
def __init__(self, bounding_box_format, mode="log", **kwargs):
super().__init__(**kwargs)
self.bounding_box_format = bounding_box_format
self.mode = mode
if self.mode not in ["linear", "square", "log"]:
raise ValueError(
"IoULoss expects mode to be one of 'linear', 'square' or 'log' "
f"Received mode={self.mode}, "
)
def call(self, y_true, y_pred):
y_pred = tf.convert_to_tensor(y_pred)
y_true = tf.cast(y_true, y_pred.dtype)
if y_pred.shape[-1] < 4:
raise ValueError(
"IoULoss expects y_pred.shape[-1] to be at least 4 to represent "
f"the bounding boxes. Received y_pred.shape[-1]={y_pred.shape[-1]}."
)
if y_true.shape[-1] < 4:
raise ValueError(
"IoULoss expects y_true.shape[-1] to be at least 4 to represent "
f"the bounding boxes. Received y_true.shape[-1]={y_true.shape[-1]}."
)
ious = bounding_box.compute_iou(y_true, y_pred, self.bounding_box_format)
mean_iou = tf.reduce_mean(ious, axis=[-2, -1])
if self.mode == "linear":
loss = 1 - mean_iou
elif self.mode == "square":
loss = 1 - mean_iou**2
elif self.mode == "log":
loss = -tf.math.log(mean_iou)
return loss
def get_config(self):
config = super().get_config()
config.update(
{
"bounding_box_format": self.bounding_box_format,
"mode": self.mode,
}
)
return config
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.losses.iou_loss import IoULoss
class IoUTest(tf.test.TestCase):
def test_output_shape(self):
y_true = tf.random.uniform(shape=(2, 2, 4), minval=0, maxval=10, dtype=tf.int32)
y_pred = tf.random.uniform(shape=(2, 2, 4), minval=0, maxval=20, dtype=tf.int32)
iou_loss = IoULoss(bounding_box_format="xywh")
self.assertAllEqual(iou_loss(y_true, y_pred).shape, ())
def test_output_shape_reduction_none(self):
y_true = tf.random.uniform(shape=(2, 2, 4), minval=0, maxval=10, dtype=tf.int32)
y_pred = tf.random.uniform(shape=(2, 2, 4), minval=0, maxval=20, dtype=tf.int32)
iou_loss = IoULoss(bounding_box_format="xywh", reduction="none")
self.assertAllEqual(
iou_loss(y_true, y_pred).shape,
[
2,
],
)
def test_output_shape_relative(self):
y_true = [
[0.0, 0.0, 0.1, 0.1, 4, 0.9],
[0.0, 0.0, 0.2, 0.3, 4, 0.76],
[0.4, 0.5, 0.5, 0.6, 3, 0.89],
[0.2, 0.2, 0.3, 0.3, 6, 0.04],
]
y_pred = [
[0.0, 0.0, 0.5, 0.6, 4, 0.9],
[0.0, 0.0, 0.7, 0.3, 1, 0.76],
[0.4, 0.5, 0.5, 0.6, 4, 0.04],
[0.2, 0.1, 0.3, 0.3, 7, 0.48],
]
iou_loss = IoULoss(bounding_box_format="rel_xyxy")
self.assertAllEqual(iou_loss(y_true, y_pred).shape, ())
def test_output_value(self):
y_true = [
[0, 0, 1, 1, 4, 0.9],
[0, 0, 2, 3, 4, 0.76],
[4, 5, 3, 6, 3, 0.89],
[2, 2, 3, 3, 6, 0.04],
]
y_pred = [
[0, 0, 5, 6, 4, 0.9],
[0, 0, 7, 3, 1, 0.76],
[4, 5, 5, 6, 4, 0.04],
[2, 1, 3, 3, 7, 0.48],
]
iou_loss = IoULoss(bounding_box_format="xywh")
# -log(compute_iou(y_true, y_pred)) = 2.0311017
self.assertAllClose(iou_loss(y_true, y_pred), 2.0311017)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import tensorflow as tf
from absl.testing import parameterized
from keras_cv.losses import FocalLoss
class ModelGardenFocalLoss(tf.keras.losses.Loss):
def __init__(
self, alpha, gamma, reduction=tf.keras.losses.Reduction.AUTO, name=None
):
self._alpha = alpha
self._gamma = gamma
super().__init__(reduction=reduction, name=name)
def call(self, y_true, y_pred):
with tf.name_scope("focal_loss"):
y_true = tf.cast(y_true, dtype=tf.float32)
y_pred = tf.cast(y_pred, dtype=tf.float32)
positive_label_mask = tf.equal(y_true, 1.0)
cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(
labels=y_true, logits=y_pred
)
probs = tf.sigmoid(y_pred)
probs_gt = tf.where(positive_label_mask, probs, 1.0 - probs)
# With small gamma, the implementation could produce NaN during back prop.
modulator = tf.pow(1.0 - probs_gt, self._gamma)
loss = modulator * cross_entropy
weighted_loss = tf.where(
positive_label_mask, self._alpha * loss, (1.0 - self._alpha) * loss
)
return weighted_loss
class FocalLossModelGardenComparisonTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.named_parameters(
("sum", "sum"),
)
def test_model_garden_implementation_has_same_outputs(self, reduction):
focal_loss = FocalLoss(
alpha=0.25, gamma=2.0, from_logits=True, reduction=reduction
)
model_garden_focal_loss = ModelGardenFocalLoss(
alpha=0.25, gamma=2.0, reduction=reduction
)
for _ in range(10):
y_true = np.random.randint(size=(200,), low=0, high=10)
y_true = tf.one_hot(y_true, depth=10)
y_true = tf.cast(y_true, tf.float32)
y_pred = tf.random.uniform((200, 10), dtype=tf.float32)
self.assertAllClose(
focal_loss(y_true, y_pred), model_garden_focal_loss(y_true, y_pred)
)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from tensorflow import keras
LARGE_NUM = 1e9
class SimCLRLoss(tf.keras.losses.Loss):
"""Implements SimCLR Cosine Similarity loss.
SimCLR loss is used for contrastive self-supervised learning.
Args:
temperature: a float value between 0 and 1, used as a scaling factor for cosine similarity.
References:
- [SimCLR paper](https://arxiv.org/pdf/2002.05709)
"""
def __init__(self, temperature, **kwargs):
super().__init__(**kwargs)
self.temperature = temperature
def call(self, projections_1, projections_2):
"""Computes SimCLR loss for a pair of projections in a contrastive learning trainer.
Note that unlike most loss functions, this should not be called with y_true and y_pred,
but with two unlabeled projections. It can otherwise be treated as a normal loss function.
Args:
projections_1: a tensor with the output of the first projection model in a contrastive learning trainer
projections_2: a tensor with the output of the second projection model in a contrastive learning trainer
Returns:
A tensor with the SimCLR loss computed from the input projections
"""
# Normalize the projections
projections_1 = tf.math.l2_normalize(projections_1, axis=1)
projections_2 = tf.math.l2_normalize(projections_2, axis=1)
# Produce artificial labels, 1 for each image in the batch.
batch_size = tf.shape(projections_1)[0]
labels = tf.one_hot(tf.range(batch_size), batch_size * 2)
masks = tf.one_hot(tf.range(batch_size), batch_size)
# Compute logits
logits_11 = (
tf.matmul(projections_1, projections_1, transpose_b=True) / self.temperature
)
logits_11 = logits_11 - masks * LARGE_NUM
logits_22 = (
tf.matmul(projections_2, projections_2, transpose_b=True) / self.temperature
)
logits_22 = logits_22 - masks * LARGE_NUM
logits_12 = (
tf.matmul(projections_1, projections_2, transpose_b=True) / self.temperature
)
logits_21 = (
tf.matmul(projections_2, projections_1, transpose_b=True) / self.temperature
)
loss_a = keras.losses.categorical_crossentropy(
labels, tf.concat([logits_12, logits_11], 1), from_logits=True
)
loss_b = keras.losses.categorical_crossentropy(
labels, tf.concat([logits_21, logits_22], 1), from_logits=True
)
return loss_a + loss_b
def get_config(self):
config = super().get_config()
config.update({"temperature": self.temperature})
return config
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.losses.simclr_loss import SimCLRLoss
class SimCLRLossTest(tf.test.TestCase):
def test_output_shape(self):
projections_1 = tf.random.uniform(
shape=(10, 128), minval=0, maxval=10, dtype=tf.float32
)
projections_2 = tf.random.uniform(
shape=(10, 128), minval=0, maxval=10, dtype=tf.float32
)
simclr_loss = SimCLRLoss(temperature=1)
self.assertAllEqual(simclr_loss(projections_1, projections_2).shape, ())
def test_output_shape_reduction_none(self):
projections_1 = tf.random.uniform(
shape=(10, 128), minval=0, maxval=10, dtype=tf.float32
)
projections_2 = tf.random.uniform(
shape=(10, 128), minval=0, maxval=10, dtype=tf.float32
)
simclr_loss = SimCLRLoss(temperature=1, reduction="none")
self.assertAllEqual(simclr_loss(projections_1, projections_2).shape, (10,))
def test_output_value(self):
projections_1 = [
[1.0, 2.0, 3.0, 4.0],
[2.0, 3.0, 4.0, 5.0],
[3.0, 4.0, 5.0, 6.0],
]
projections_2 = [
[6.0, 5.0, 4.0, 3.0],
[5.0, 4.0, 3.0, 2.0],
[4.0, 3.0, 2.0, 1.0],
]
simclr_loss = SimCLRLoss(temperature=0.5)
self.assertAllClose(simclr_loss(projections_1, projections_2), 3.566689)
simclr_loss = SimCLRLoss(temperature=0.1)
self.assertAllClose(simclr_loss(projections_1, projections_2), 5.726100)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
# --- Implementing Smooth L1 loss and Focal Loss as keras custom losses ---
class SmoothL1Loss(tf.keras.losses.Loss):
"""Implements Smooth L1 loss.
SmoothL1Loss implements the SmoothL1 function, where values less than `l1_cutoff`
contribute to the overall loss based on their squared difference, and values greater
than l1_cutoff contribute based on their raw difference.
Args:
l1_cutoff: differences between y_true and y_pred that are larger than `l1_cutoff` are
treated as `L1` values
"""
def __init__(self, l1_cutoff=1.0, **kwargs):
super().__init__(**kwargs)
self.l1_cutoff = l1_cutoff
def call(self, y_true, y_pred):
difference = y_true - y_pred
absolute_difference = tf.abs(difference)
squared_difference = difference**2
loss = tf.where(
absolute_difference < self.l1_cutoff,
0.5 * squared_difference,
absolute_difference - 0.5,
)
return tf.keras.backend.mean(loss, axis=-1)
def get_config(self):
config = {
"l1_cutoff": self.l1_cutoff,
}
base_config = super().get_config()
return dict(list(base_config.items()) + list(config.items()))
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from absl.testing import parameterized
import keras_cv
class SmoothL1LossTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.named_parameters(
("none", "none", (20,)),
("sum", "sum", ()),
("sum_over_batch_size", "sum_over_batch_size", ()),
)
def test_proper_output_shapes(self, reduction, target_size):
loss = keras_cv.losses.SmoothL1Loss(l1_cutoff=0.5, reduction=reduction)
result = loss(
y_true=tf.random.uniform((20, 300)),
y_pred=tf.random.uniform((20, 300)),
)
self.assertEqual(result.shape, target_size)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from keras_cv.metrics.coco.mean_average_precision import COCOMeanAveragePrecision
from keras_cv.metrics.coco.recall import COCORecall
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment