"StyleText/fonts/ch_standard.ttf" did not exist on "f2d98c5e76b8b2fa9d912ccfe3b51e8d4df8211e"
Commit 1a3c83d6 authored by zhanggzh's avatar zhanggzh
Browse files

增加keras-cv模型及训练代码

parent 9846958a
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from tensorflow.keras import layers
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class PredictionHead(layers.Layer):
"""The class/box predictions head.
Arguments:
output_filters: Number of convolution filters in the final layer.
bias_initializer: Bias Initializer for the final convolution layer.
Returns:
A function representing either the classification
or the box regression head depending on `output_filters`.
"""
def __init__(self, output_filters, bias_initializer, num_conv_layers=3, **kwargs):
super().__init__(**kwargs)
self.output_filters = output_filters
self.bias_initializer = bias_initializer
self.num_conv_layers = num_conv_layers
self.conv_layers = [
layers.Conv2D(
256,
kernel_size=3,
padding="same",
kernel_initializer=tf.keras.initializers.Orthogonal(),
activation="relu",
)
for _ in range(num_conv_layers)
]
self.prediction_layer = layers.Conv2D(
self.output_filters,
kernel_size=3,
strides=1,
padding="same",
kernel_initializer=tf.keras.initializers.Orthogonal(),
bias_initializer=self.bias_initializer,
)
def call(self, x, training=False):
for layer in self.conv_layers:
x = layer(x, training=training)
x = self.prediction_layer(x, training=training)
return x
def get_config(self):
config = {
"bias_initializer": self.bias_initializer,
"output_filters": self.output_filters,
"num_conv_layers": self.num_conv_layers,
}
base_config = super().get_config()
return dict(list(base_config.items()) + list(config.items()))
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import tensorflow as tf
from tensorflow import keras
import keras_cv
from keras_cv import bounding_box
from keras_cv import layers as cv_layers
from keras_cv.models.object_detection.object_detection_base_model import (
ObjectDetectionBaseModel,
)
from keras_cv.models.object_detection.retina_net.__internal__ import (
layers as layers_lib,
)
# TODO(lukewood): update docstring to include documentation on creating a custom label
# decoder/etc.
# TODO(lukewood): link to keras.io guide on creating custom backbone and FPN.
class RetinaNet(ObjectDetectionBaseModel):
"""A Keras model implementing the RetinaNet architecture.
Implements the RetinaNet architecture for object detection. The constructor
requires `classes`, `bounding_box_format` and a `backbone`. Optionally, a
custom label encoder, feature pyramid network, and prediction decoder may all be
provided.
Usage:
```python
retina_net = keras_cv.models.RetinaNet(
classes=20,
bounding_box_format="xywh",
backbone="resnet50",
backbone_weights="imagenet",
include_rescaling=True,
)
```
Args:
classes: the number of classes in your dataset excluding the background
class. Classes should be represented by integers in the range
[0, classes).
bounding_box_format: The format of bounding boxes of input dataset. Refer
[to the keras.io docs](https://keras.io/api/keras_cv/bounding_box/formats/)
for more details on supported bounding box formats.
backbone: Either `"resnet50"` or a custom backbone model.
include_rescaling: Required if provided backbone is a pre-configured model.
If set to `True`, inputs will be passed through a `Rescaling(1/255.0)`
layer.
backbone_weights: (Optional) if using a KerasCV provided backbone, the
underlying backbone model will be loaded using the weights provided in this
argument. Can be a model checkpoint path, or a string from the supported
weight sets in the underlying model.
anchor_generator: (Optional) a `keras_cv.layers.AnchorGenerator`. If provided,
the anchor generator will be passed to both the `label_encoder` and the
`prediction_decoder`. Only to be used when both `label_encoder` and
`prediction_decoder` are both `None`. Defaults to an anchor generator with
the parameterization: `strides=[2**i for i in range(3, 8)]`,
`scales=[2**x for x in [0, 1 / 3, 2 / 3]]`,
`sizes=[32.0, 64.0, 128.0, 256.0, 512.0]`,
and `aspect_ratios=[0.5, 1.0, 2.0]`.
label_encoder: (Optional) a keras.Layer that accepts an image Tensor and a
bounding box Tensor to its `call()` method, and returns RetinaNet training
targets. By default, a KerasCV standard LabelEncoder is created and used.
Results of this `call()` method are passed to the `loss` object passed into
`compile()` as the `y_true` argument.
prediction_decoder: (Optional) A `keras.layer` that is responsible for
transforming RetinaNet predictions into usable bounding box Tensors. If
not provided, a default is provided. The default `prediction_decoder`
layer uses a `NonMaxSuppression` operation for box pruning.
feature_pyramid: (Optional) A `keras.Model` representing a feature pyramid
network (FPN). The feature pyramid network is called on the outputs of the
`backbone`. The KerasCV default backbones return three outputs in a list,
but custom backbones may be written and used with custom feature pyramid
networks. If not provided, a default feature pyramid neetwork is produced
by the library. The default feature pyramid network is compatible with all
standard keras_cv backbones.
classification_head: (Optional) A `keras.Layer` that performs classification of
the bounding boxes. If not provided, a simple ConvNet with 1 layer will be
used.
box_head: (Optional) A `keras.Layer` that performs regression of
the bounding boxes. If not provided, a simple ConvNet with 1 layer will be
used.
evaluate_train_time_metrics: (Optional) whether or not to evaluate metrics
passed in `compile()` inside of the `train_step()`. This is NOT
recommended, as it dramatically reduces performance due to the synchronous
label decoding and COCO metric evaluation. For example, on a single GPU on
the PascalVOC dataset epoch time goes from 3 minutes to 30 minutes with this
set to `True`. Defaults to `False`.
"""
def __init__(
self,
classes,
bounding_box_format,
backbone,
include_rescaling=None,
backbone_weights=None,
anchor_generator=None,
label_encoder=None,
prediction_decoder=None,
feature_pyramid=None,
classification_head=None,
box_head=None,
evaluate_train_time_metrics=False,
name="RetinaNet",
**kwargs,
):
if anchor_generator is not None and (prediction_decoder or label_encoder):
raise ValueError(
"`anchor_generator` is only to be provided when "
"both `label_encoder` and `prediction_decoder` are both `None`. "
f"Received `anchor_generator={anchor_generator}` "
f"`label_encoder={label_encoder}`, "
f"`prediction_decoder={prediction_decoder}`. To customize the behavior of "
"the anchor_generator inside of a custom `label_encoder` or custom "
"`prediction_decoder` you should provide both to `RetinaNet`, and ensure "
"that the `anchor_generator` provided to both is identical"
)
anchor_generator = anchor_generator or RetinaNet.default_anchor_generator(
bounding_box_format
)
label_encoder = label_encoder or cv_layers.RetinaNetLabelEncoder(
bounding_box_format=bounding_box_format, anchor_generator=anchor_generator
)
super().__init__(
bounding_box_format=bounding_box_format,
label_encoder=label_encoder,
name=name,
**kwargs,
)
self.evaluate_train_time_metrics = evaluate_train_time_metrics
self.label_encoder = label_encoder
self.anchor_generator = anchor_generator
if bounding_box_format.lower() != "xywh":
raise ValueError(
"`keras_cv.models.RetinaNet` only supports the 'xywh' "
"`bounding_box_format`. In future releases, more formats will be "
"supported. For now, please pass `bounding_box_format='xywh'`. "
f"Received `bounding_box_format={bounding_box_format}`"
)
self.bounding_box_format = bounding_box_format
self.classes = classes
self.backbone = _parse_backbone(backbone, include_rescaling, backbone_weights)
self._prediction_decoder = prediction_decoder or cv_layers.NmsPredictionDecoder(
bounding_box_format=bounding_box_format,
anchor_generator=anchor_generator,
classes=classes,
)
# initialize trainable networks
self.feature_pyramid = feature_pyramid or layers_lib.FeaturePyramid()
prior_probability = tf.constant_initializer(-np.log((1 - 0.01) / 0.01))
self.classification_head = classification_head or layers_lib.PredictionHead(
output_filters=9 * classes, bias_initializer=prior_probability
)
self.box_head = box_head or layers_lib.PredictionHead(
output_filters=9 * 4, bias_initializer="zeros"
)
self._metrics_bounding_box_format = None
self.loss_metric = tf.keras.metrics.Mean(name="loss")
self.classification_loss_metric = tf.keras.metrics.Mean(
name="classification_loss"
)
self.box_loss_metric = tf.keras.metrics.Mean(name="box_loss")
self.regularization_loss_metric = tf.keras.metrics.Mean(
name="regularization_loss"
)
# Construct should run in eager mode
if any(
self.prediction_decoder.box_variance.numpy()
!= self.label_encoder.box_variance.numpy()
):
raise ValueError(
"`prediction_decoder` and `label_encoder` must "
"have matching `box_variance` arguments. Did you customize the "
"`box_variance` in either `prediction_decoder` or `label_encoder`? "
"If so, please also customize the other. Received: "
f"`prediction_decoder.box_variance={prediction_decoder.box_variance}`, "
f"`label_encoder.box_variance={label_encoder.box_variance}`."
)
@property
def prediction_decoder(self):
return self._prediction_decoder
@prediction_decoder.setter
def prediction_decoder(self, prediction_decoder):
self._prediction_decoder = prediction_decoder
self.make_predict_function(force=True)
self.make_test_function(force=True)
self.make_train_function(force=True)
@staticmethod
def default_anchor_generator(bounding_box_format):
strides = [2**i for i in range(3, 8)]
scales = [2**x for x in [0, 1 / 3, 2 / 3]]
sizes = [32.0, 64.0, 128.0, 256.0, 512.0]
aspect_ratios = [0.5, 1.0, 2.0]
return cv_layers.AnchorGenerator(
bounding_box_format=bounding_box_format,
sizes=sizes,
aspect_ratios=aspect_ratios,
scales=scales,
strides=strides,
clip_boxes=True,
)
@property
def metrics(self):
return super().metrics + self.train_metrics
@property
def train_metrics(self):
return [
self.loss_metric,
self.classification_loss_metric,
self.regularization_loss_metric,
self.box_loss_metric,
]
def call(self, x, training=False):
backbone_outputs = self.backbone(x, training=training)
features = self.feature_pyramid(backbone_outputs, training=training)
N = tf.shape(x)[0]
cls_outputs = []
box_outputs = []
for feature in features:
box_outputs.append(
tf.reshape(self.box_head(feature, training=training), [N, -1, 4])
)
cls_outputs.append(
tf.reshape(
self.classification_head(feature, training=training),
[N, -1, self.classes],
)
)
cls_outputs = tf.concat(cls_outputs, axis=1)
box_outputs = tf.concat(box_outputs, axis=1)
return tf.concat([box_outputs, cls_outputs], axis=-1)
def decode_training_predictions(self, x, train_predictions):
# no-op if default decoder is used.
pred_for_inference = bounding_box.convert_format(
train_predictions,
source=self.bounding_box_format,
target=self.prediction_decoder.bounding_box_format,
images=x,
)
pred_for_inference = self.prediction_decoder(x, pred_for_inference)
return bounding_box.convert_format(
pred_for_inference,
source=self.prediction_decoder.bounding_box_format,
target=self.bounding_box_format,
images=x,
)
def compile(
self, box_loss=None, classification_loss=None, loss=None, metrics=None, **kwargs
):
super().compile(metrics=metrics, **kwargs)
if loss is not None:
raise ValueError(
"`RetinaNet` does not accept a `loss` to `compile()`. "
"Instead, please pass `box_loss` and `classification_loss`. "
"`loss` will be ignored during training."
)
box_loss = _parse_box_loss(box_loss)
classification_loss = _parse_classification_loss(classification_loss)
metrics = metrics or []
if hasattr(classification_loss, "from_logits"):
if not classification_loss.from_logits:
raise ValueError(
"RetinaNet.compile() expects `from_logits` to be True for "
"`classification_loss`. Got "
"`classification_loss.from_logits="
f"{classification_loss.from_logits}`"
)
if hasattr(box_loss, "bounding_box_format"):
if box_loss.bounding_box_format != self.bounding_box_format:
raise ValueError(
"Wrong `bounding_box_format` passed to `box_loss` in "
"`RetinaNet.compile()`. "
f"Got `box_loss.bounding_box_format={box_loss.bounding_box_format}`, "
f"want `box_loss.bounding_box_format={self.bounding_box_format}`"
)
self.box_loss = box_loss
self.classification_loss = classification_loss
if len(metrics) != 0:
self._metrics_bounding_box_format = metrics[0].bounding_box_format
else:
self._metrics_bounding_box_format = self.bounding_box_format
any_wrong_format = any(
[
m.bounding_box_format != self._metrics_bounding_box_format
for m in metrics
]
)
if metrics and any_wrong_format:
raise ValueError(
"All metrics passed to RetinaNet.compile() must have "
"the same `bounding_box_format` attribute. For example, if one metric "
"uses 'xyxy', all other metrics must use 'xyxy'. Received "
f"metrics={metrics}."
)
def compute_losses(self, y_true, y_pred):
if y_true.shape[-1] != 5:
raise ValueError(
"y_true should have shape (None, None, 5). Got "
f"y_true.shape={tuple(y_true.shape)}"
)
if y_pred.shape[-1] != self.classes + 4:
raise ValueError(
"y_pred should have shape (None, None, classes + 4). "
f"Got y_pred.shape={tuple(y_pred.shape)}. Does your model's `classes` "
"parameter match your losses `classes` parameter?"
)
box_labels = y_true[:, :, :4]
box_predictions = y_pred[:, :, :4]
cls_labels = tf.one_hot(
tf.cast(y_true[:, :, 4], dtype=tf.int32),
depth=self.classes,
dtype=tf.float32,
)
cls_predictions = y_pred[:, :, 4:]
positive_mask = tf.cast(tf.greater(y_true[:, :, 4], -1.0), dtype=tf.float32)
ignore_mask = tf.cast(tf.equal(y_true[:, :, 4], -2.0), dtype=tf.float32)
classification_loss = self.classification_loss(cls_labels, cls_predictions)
box_loss = self.box_loss(box_labels, box_predictions)
if len(classification_loss.shape) != 2:
raise ValueError(
"RetinaNet expects the output shape of `classification_loss` to be "
"`(batch_size, num_anchor_boxes)`. Expected "
f"classification_loss(predictions)={box_predictions.shape[:2]}, got "
f"classification_loss(predictions)={classification_loss.shape}. "
"Try passing `reduction='none'` to your classification_loss's "
"constructor."
)
if len(box_loss.shape) != 2:
raise ValueError(
"RetinaNet expects the output shape of `box_loss` to be "
"`(batch_size, num_anchor_boxes)`. Expected "
f"box_loss(predictions)={box_predictions.shape[:2]}, got "
f"box_loss(predictions)={box_loss.shape}. "
"Try passing `reduction='none'` to your box_loss's "
"constructor."
)
classification_loss = tf.where(
tf.equal(ignore_mask, 1.0), 0.0, classification_loss
)
box_loss = tf.where(tf.equal(positive_mask, 1.0), box_loss, 0.0)
normalizer = tf.reduce_sum(positive_mask, axis=-1)
classification_loss = tf.math.divide_no_nan(
tf.reduce_sum(classification_loss, axis=-1), normalizer
)
box_loss = tf.math.divide_no_nan(tf.reduce_sum(box_loss, axis=-1), normalizer)
classification_loss = tf.reduce_sum(classification_loss, axis=-1)
box_loss = tf.reduce_sum(box_loss, axis=-1)
# ensure losses are scalars
# only runs at trace time
if tuple(classification_loss.shape) != ():
raise ValueError(
"Expected `classification_loss` to be a scalar by the "
"end of `compute_losses()`, instead got "
f"`classification_loss.shape={classification_loss.shape}`"
)
if tuple(box_loss.shape) != ():
raise ValueError(
"Expected `box_loss` to be a scalar by the "
"end of `compute_losses()`, instead got "
f"`box_loss.shape={box_loss.shape}`"
)
return classification_loss, box_loss
def _backward(self, y_true, y_pred):
classification_loss, box_loss = self.compute_losses(
y_true,
y_pred,
)
regularization_loss = 0.0
for loss in self.losses:
regularization_loss += tf.nn.scale_regularization_loss(loss)
loss = classification_loss + box_loss + regularization_loss
self.classification_loss_metric.update_state(classification_loss)
self.box_loss_metric.update_state(box_loss)
self.regularization_loss_metric.update_state(regularization_loss)
self.loss_metric.update_state(loss)
return loss
def train_step(self, data):
x, y = data
y_for_metrics, y_training_target = y
with tf.GradientTape() as tape:
y_pred = self(x, training=True)
loss = self._backward(y_training_target, y_pred)
# Training specific code
trainable_vars = self.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
# Early exit for no train time metrics
if not self.evaluate_train_time_metrics:
# To minimize GPU transfers, we update metrics AFTER we take grads and apply
# them.
return {m.name: m.result() for m in self.train_metrics}
predictions = self.decode_training_predictions(x, y_pred)
self._update_metrics(y_for_metrics, predictions)
return {m.name: m.result() for m in self.metrics}
def test_step(self, data):
x, y = data
y_for_metrics, y_training_target = y
y_pred = self(x, training=False)
_ = self._backward(y_training_target, y_pred)
predictions = self.decode_training_predictions(x, y_pred)
self._update_metrics(y_for_metrics, predictions)
return {m.name: m.result() for m in self.metrics}
def predict_step(self, x):
predictions = super().predict_step(x)
return self.decode_training_predictions(x, predictions)
def _update_metrics(self, y_true, y_pred):
y_true = bounding_box.convert_format(
y_true,
source=self.bounding_box_format,
target=self._metrics_bounding_box_format,
)
y_pred = bounding_box.convert_format(
y_pred,
source=self.bounding_box_format,
target=self._metrics_bounding_box_format,
)
self.compiled_metrics.update_state(y_true, y_pred)
def _parse_backbone(backbone, include_rescaling, backbone_weights):
if isinstance(backbone, str) and include_rescaling is None:
raise ValueError(
"When using a preconfigured backbone, please do provide a "
"`include_rescaling` parameter. `include_rescaling` is passed to the "
"Keras application constructor for the provided backbone. When "
"`include_rescaling=True`, image inputs are passed through a "
"`layers.Rescaling(1/255.0)` layer. When `include_rescaling=False`, no "
"downscaling is performed. "
f"Received backbone={backbone}, include_rescaling={include_rescaling}."
)
if isinstance(backbone, str):
if backbone == "resnet50":
return _resnet50_backbone(include_rescaling, backbone_weights)
else:
raise ValueError(
"backbone expected to be one of ['resnet50', keras.Model]. "
f"Received backbone={backbone}."
)
if include_rescaling or backbone_weights:
raise ValueError(
"When a custom backbone is used, include_rescaling and "
f"backbone_weights are not supported. Received backbone={backbone}, "
f"include_rescaling={include_rescaling}, and "
f"backbone_weights={backbone_weights}."
)
if not isinstance(backbone, keras.Model):
raise ValueError(
"Custom backbones should be subclasses of a keras.Model. "
f"Received backbone={backbone}."
)
return backbone
def _parse_box_loss(loss):
if not isinstance(loss, str):
# support arbitrary callables
return loss
# case insensitive comparison
if loss.lower() == "smoothl1":
return keras_cv.losses.SmoothL1Loss(l1_cutoff=1.0, reduction="none")
if loss.lower() == "huber":
return keras.losses.Huber(reduction="none")
raise ValueError(
"Expected `box_loss` to be either a Keras Loss, "
f"callable, or the string 'SmoothL1'. Got loss={loss}."
)
def _parse_classification_loss(loss):
if not isinstance(loss, str):
# support arbitrary callables
return loss
# case insensitive comparison
if loss.lower() == "focal":
return keras_cv.losses.FocalLoss(from_logits=True, reduction="none")
raise ValueError(
"Expected `classification_loss` to be either a Keras Loss, "
f"callable, or the string 'Focal'. Got loss={loss}."
)
def _resnet50_backbone(include_rescaling, backbone_weights):
inputs = keras.layers.Input(shape=(None, None, 3))
x = inputs
if include_rescaling:
x = keras.applications.resnet.preprocess_input(x)
# TODO(lukewood): this should really be calling keras_cv.models.ResNet50
backbone = keras.applications.ResNet50(
include_top=False, input_tensor=x, weights=backbone_weights
)
c3_output, c4_output, c5_output = [
backbone.get_layer(layer_name).output
for layer_name in ["conv3_block4_out", "conv4_block6_out", "conv5_block3_out"]
]
return keras.Model(inputs=inputs, outputs=[c3_output, c4_output, c5_output])
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import tempfile
import pytest
import tensorflow as tf
import keras_cv
@pytest.mark.skipif(
"INTEGRATION" not in os.environ or os.environ["INTEGRATION"] != "true",
reason="Takes a long time to run, only runs when INTEGRATION "
"environment variable is set. To run the test please run: \n"
"`INTEGRATION=true pytest keras_cv/",
)
class RetinaNetTest(tf.test.TestCase):
@pytest.fixture(autouse=True)
def cleanup_global_session(self):
# Code before yield runs before the test
yield
tf.keras.backend.clear_session()
def test_weight_setting(self):
x, y = _create_bounding_box_dataset(bounding_box_format="xywh")
pretrained_retina_net, new_retina_net = _create_retina_nets(x, y, epochs=1)
new_retina_net.set_weights(pretrained_retina_net.get_weights())
# check if all weights that show up via `get_weights()` are loaded
for retina_net_weight, post_load_weight in zip(
pretrained_retina_net.get_weights(), new_retina_net.get_weights()
):
self.assertAllEqual(retina_net_weight, post_load_weight)
for layer_original, layer_new in zip(
# manually check layers to make sure nothing is missed
_get_retina_net_layers(pretrained_retina_net),
_get_retina_net_layers(new_retina_net),
):
for weight, weight_new in zip(
layer_original.get_weights(), layer_new.get_weights()
):
self.assertAllEqual(weight, weight_new)
def test_decoder_doesnt_get_updated(self):
x, y = _create_bounding_box_dataset(bounding_box_format="xywh")
pretrained_retina_net, new_retina_net = _create_retina_nets(
x, y, epochs=1, custom_decoder=True
)
new_retina_net.set_weights(pretrained_retina_net.get_weights())
# check if all weights that show up via `get_weights()` are loaded
for retina_net_weight, post_load_weight in zip(
pretrained_retina_net.get_weights(), new_retina_net.get_weights()
):
self.assertAllEqual(retina_net_weight, post_load_weight)
pretrained_decoder = pretrained_retina_net.prediction_decoder
new_decoder = new_retina_net.prediction_decoder
self.assertEqual(new_decoder.suppression_layer.iou_threshold, 0.75)
self.assertNotEqual(
new_decoder.suppression_layer.iou_threshold,
pretrained_decoder.suppression_layer.iou_threshold,
)
@pytest.mark.skipif(os.name == "nt", reason="tempfile does not work on windows")
def test_savedmodel_creation(self):
x, y = _create_bounding_box_dataset(bounding_box_format="xywh")
pretrained_retina_net, new_retina_net = _create_retina_nets(x, y, epochs=1)
tmp = tempfile.mkdtemp()
pretrained_retina_net.save(f"{tmp}/checkpoint/")
load_model = tf.saved_model.load(f"{tmp}/checkpoint/")
_ = load_model(x)
@pytest.mark.skipif(os.name == "nt", reason="tempfile does not work on windows")
def test_savedmodel_format_weight_loading(self):
x, y = _create_bounding_box_dataset(bounding_box_format="xywh")
pretrained_retina_net, new_retina_net = _create_retina_nets(x, y, epochs=1)
tmp = tempfile.mkdtemp()
pretrained_retina_net.save_weights(f"{tmp}/checkpoint/")
new_retina_net.load_weights(f"{tmp}/checkpoint/")
for layer_original, layer_new in zip(
pretrained_retina_net.layers, new_retina_net.layers
):
for weight, weight_new in zip(
layer_original.get_weights(), layer_new.get_weights()
):
self.assertAllEqual(weight, weight_new)
def test_set_prediction_decoder(self):
x, y = _create_bounding_box_dataset(bounding_box_format="xywh")
pretrained_retina_net, _ = _create_retina_nets(x, y, epochs=0)
prediction_decoder = keras_cv.layers.NmsPredictionDecoder(
bounding_box_format="xywh",
anchor_generator=keras_cv.models.RetinaNet.default_anchor_generator(
bounding_box_format="xywh"
),
suppression_layer=keras_cv.layers.NonMaxSuppression(
iou_threshold=0.75,
bounding_box_format="xywh",
classes=20,
confidence_threshold=0.85,
),
)
pretrained_retina_net.prediction_decoder = prediction_decoder
_ = pretrained_retina_net.predict(x)
@pytest.mark.skipif(os.name == "nt", reason="tempfile does not work on windows")
def test_weight_loading(self):
x, y = _create_bounding_box_dataset(bounding_box_format="xywh")
pretrained_retina_net, new_retina_net = _create_retina_nets(x, y, epochs=1)
tmp = tempfile.mkdtemp()
pretrained_retina_net.save_weights(f"{tmp}/checkpoint.h5")
new_retina_net.load_weights(f"{tmp}/checkpoint.h5")
# manually check layers to make sure nothing is missed
for layer_original, layer_new in zip(
_get_retina_net_layers(pretrained_retina_net),
_get_retina_net_layers(new_retina_net),
):
for weight, weight_new in zip(
layer_original.get_weights(), layer_new.get_weights()
):
self.assertAllEqual(weight, weight_new)
# manually check layers to make sure nothing is missed in `get_weights()`
for layer_original, layer_new in zip(
pretrained_retina_net.layers, new_retina_net.layers
):
for weight, weight_new in zip(
layer_original.get_weights(), layer_new.get_weights()
):
self.assertAllEqual(weight, weight_new)
def test_weight_loading_via_metrics(self):
x, y = _create_bounding_box_dataset(bounding_box_format="xywh")
pretrained_retina_net, new_retina_net = _create_retina_nets(x, y, epochs=30)
tmp = tempfile.mkdtemp()
pretrained_retina_net.save_weights(f"{tmp}/checkpoint.h5")
new_retina_net.load_weights(f"{tmp}/checkpoint.h5")
metrics = pretrained_retina_net.evaluate(x, y, return_dict=True)
new_metrics = new_retina_net.evaluate(x, y, return_dict=True)
for key in metrics:
self.assertEqual(metrics[key], new_metrics[key])
def _get_retina_net_layers(model):
return [
model.backbone,
model.feature_pyramid,
model.prediction_decoder,
model.anchor_generator,
model.label_encoder,
model.classification_head,
model.box_head,
]
def _create_retina_nets(x, y, epochs=1, custom_decoder=False):
pretrained_retina_net = keras_cv.models.RetinaNet(
classes=20,
bounding_box_format="xywh",
backbone="resnet50",
backbone_weights="imagenet",
include_rescaling=True,
)
pretrained_retina_net.compile(
classification_loss=keras_cv.losses.FocalLoss(
from_logits=True,
reduction="none",
),
box_loss=keras_cv.losses.SmoothL1Loss(l1_cutoff=1.0, reduction="none"),
optimizer="adam",
metrics=[
keras_cv.metrics.COCOMeanAveragePrecision(
class_ids=range(20),
bounding_box_format="xyxy",
name="MaP",
),
keras_cv.metrics.COCORecall(
class_ids=range(20),
bounding_box_format="xyxy",
name="Recall",
),
],
)
pretrained_retina_net.build((None, None, None, 3))
# we need to fit the pretrained retina net to ensure the classification_head and
# regression head get updated.
if epochs != 0:
pretrained_retina_net.fit(x, y, epochs=epochs)
# New RetinaNet is constructed with a custom prediction decoder, and no
# pretrained backbone weights
prediction_decoder = None
if custom_decoder:
prediction_decoder = keras_cv.layers.NmsPredictionDecoder(
bounding_box_format="xywh",
anchor_generator=keras_cv.models.RetinaNet.default_anchor_generator(
bounding_box_format="xywh"
),
suppression_layer=keras_cv.layers.NonMaxSuppression(
iou_threshold=0.75,
bounding_box_format="xywh",
classes=20,
confidence_threshold=0.85,
),
)
new_retina_net = keras_cv.models.RetinaNet(
classes=20,
bounding_box_format="xywh",
backbone="resnet50",
backbone_weights=None,
include_rescaling=True,
prediction_decoder=prediction_decoder,
)
new_retina_net.compile(
classification_loss=keras_cv.losses.FocalLoss(
from_logits=True,
reduction="none",
),
box_loss=keras_cv.losses.SmoothL1Loss(l1_cutoff=1.0, reduction="none"),
optimizer="adam",
metrics=[
keras_cv.metrics.COCOMeanAveragePrecision(
class_ids=range(20),
bounding_box_format="xyxy",
name="MaP",
),
keras_cv.metrics.COCORecall(
class_ids=range(20),
bounding_box_format="xyxy",
name="Recall",
),
],
)
new_retina_net.build((None, None, None, 3))
return pretrained_retina_net, new_retina_net
def _create_bounding_box_dataset(bounding_box_format):
# Just about the easiest dataset you can have, all classes are 0, all boxes are
# exactly the same. [1, 1, 2, 2] are the coordinates in xyxy
xs = tf.ones((10, 512, 512, 3), dtype=tf.float32)
y_classes = tf.zeros((10, 10, 1), dtype=tf.float32)
ys = tf.constant([0.25, 0.25, 0.1, 0.1], dtype=tf.float32)
ys = tf.expand_dims(ys, axis=0)
ys = tf.expand_dims(ys, axis=0)
ys = tf.tile(ys, [10, 10, 1])
ys = tf.concat([ys, y_classes], axis=-1)
ys = keras_cv.bounding_box.convert_format(
ys, source="rel_xywh", target=bounding_box_format, images=xs, dtype=tf.float32
)
return xs, ys
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import statistics
import pytest
import tensorflow as tf
from tensorflow.keras import optimizers
import keras_cv
class RetinaNetTest(tf.test.TestCase):
@pytest.fixture(autouse=True)
def cleanup_global_session(self):
# Code before yield runs before the test
yield
tf.keras.backend.clear_session()
def test_retina_net_construction(self):
retina_net = keras_cv.models.RetinaNet(
classes=20,
bounding_box_format="xywh",
backbone="resnet50",
backbone_weights=None,
include_rescaling=True,
)
retina_net.compile(
classification_loss="focal",
box_loss="smoothl1",
optimizer="adam",
metrics=[
keras_cv.metrics.COCOMeanAveragePrecision(
class_ids=range(20),
bounding_box_format="xyxy",
name="Standard MaP",
),
],
)
# TODO(lukewood) uncomment when using keras_cv.models.ResNet50
# self.assertIsNotNone(retina_net.backbone.get_layer(name="rescaling"))
# TODO(lukewood): test compile with the FocalLoss class
def test_retina_net_include_rescaling_required_with_default_backbone(self):
with self.assertRaises(ValueError):
_ = keras_cv.models.RetinaNet(
classes=20,
bounding_box_format="xywh",
backbone="resnet50",
backbone_weights=None,
# Note no include_rescaling is provided
)
@pytest.mark.skipif(
"INTEGRATION" not in os.environ or os.environ["INTEGRATION"] != "true",
reason="Takes a long time to run, only runs when INTEGRATION "
"environment variable is set. To run the test please run: \n"
"`INTEGRATION=true pytest keras_cv/",
)
def test_retina_net_call(self):
retina_net = keras_cv.models.RetinaNet(
classes=20,
bounding_box_format="xywh",
backbone="resnet50",
backbone_weights=None,
include_rescaling=True,
)
images = tf.random.uniform((2, 512, 512, 3))
_ = retina_net(images)
_ = retina_net.predict(images)
def test_all_metric_formats_must_match(self):
retina_net = keras_cv.models.RetinaNet(
classes=20,
bounding_box_format="xywh",
backbone="resnet50",
backbone_weights=None,
include_rescaling=True,
)
# all metric formats must match
with self.assertRaises(ValueError):
retina_net.compile(
optimizer="adam",
metrics=[
keras_cv.metrics.COCOMeanAveragePrecision(
class_ids=range(20),
bounding_box_format="xyxy",
name="Standard MaP",
),
keras_cv.metrics.COCOMeanAveragePrecision(
class_ids=range(20),
bounding_box_format="rel_xyxy",
name="Standard MaP",
),
],
)
def test_loss_output_shape_error_messages(self):
retina_net = keras_cv.models.RetinaNet(
classes=20,
bounding_box_format="xywh",
backbone="resnet50",
backbone_weights=None,
include_rescaling=True,
)
xs, ys = _create_bounding_box_dataset("xywh")
# all metric formats must match
retina_net.compile(
optimizer="adam",
box_loss=keras_cv.losses.SmoothL1Loss(reduction="none"),
classification_loss=keras_cv.losses.FocalLoss(
from_logits=True, reduction="sum"
),
)
with self.assertRaisesRegex(
ValueError, "output shape of `classification_loss`"
):
retina_net.fit(x=xs, y=ys, epochs=1)
# all metric formats must match
retina_net.compile(
optimizer="adam",
box_loss=keras_cv.losses.SmoothL1Loss(reduction="sum"),
classification_loss=keras_cv.losses.FocalLoss(
from_logits=True, reduction="none"
),
)
with self.assertRaisesRegex(ValueError, "output shape of `box_loss`"):
retina_net.fit(x=xs, y=ys, epochs=1)
def test_wrong_logits(self):
retina_net = keras_cv.models.RetinaNet(
classes=2,
bounding_box_format="xywh",
backbone="resnet50",
backbone_weights=None,
include_rescaling=False,
)
with self.assertRaisesRegex(
ValueError,
"from_logits",
):
retina_net.compile(
optimizer=optimizers.SGD(learning_rate=0.25),
classification_loss=keras_cv.losses.FocalLoss(
from_logits=False, reduction="none"
),
box_loss=keras_cv.losses.SmoothL1Loss(l1_cutoff=1.0, reduction="none"),
)
def test_no_metrics(self):
retina_net = keras_cv.models.RetinaNet(
classes=2,
bounding_box_format="xywh",
backbone="resnet50",
backbone_weights=None,
include_rescaling=False,
)
retina_net.compile(
optimizer=optimizers.SGD(learning_rate=0.25),
classification_loss=keras_cv.losses.FocalLoss(
from_logits=True, reduction="none"
),
box_loss=keras_cv.losses.SmoothL1Loss(l1_cutoff=1.0, reduction="none"),
)
def test_weights_contained_in_trainable_variables(self):
bounding_box_format = "xywh"
retina_net = keras_cv.models.RetinaNet(
classes=1,
bounding_box_format=bounding_box_format,
backbone="resnet50",
backbone_weights=None,
include_rescaling=False,
evaluate_train_time_metrics=False,
)
retina_net.backbone.trainable = False
retina_net.compile(
optimizer=optimizers.Adam(),
classification_loss=keras_cv.losses.FocalLoss(
from_logits=True, reduction="none"
),
box_loss=keras_cv.losses.SmoothL1Loss(l1_cutoff=1.0, reduction="none"),
metrics=[],
)
xs, ys = _create_bounding_box_dataset(bounding_box_format)
# call once
_ = retina_net(xs)
variable_names = [x.name for x in retina_net.trainable_variables]
# classification_head
self.assertIn("RetinaNet/prediction_head/conv2d_8/kernel:0", variable_names)
# box_head
self.assertIn("RetinaNet/prediction_head_1/conv2d_12/kernel:0", variable_names)
def test_weights_change(self):
bounding_box_format = "xywh"
retina_net = keras_cv.models.RetinaNet(
classes=1,
bounding_box_format=bounding_box_format,
backbone="resnet50",
backbone_weights=None,
include_rescaling=False,
evaluate_train_time_metrics=False,
)
retina_net.compile(
optimizer=optimizers.Adam(),
classification_loss=keras_cv.losses.FocalLoss(
from_logits=True, reduction="none"
),
box_loss=keras_cv.losses.SmoothL1Loss(l1_cutoff=1.0, reduction="none"),
metrics=[],
)
xs, ys = _create_bounding_box_dataset(bounding_box_format)
# call once
_ = retina_net(xs)
original_fpn_weights = retina_net.feature_pyramid.get_weights()
original_box_head_weights = retina_net.box_head.get_weights()
original_classification_head_weights = (
retina_net.classification_head.get_weights()
)
retina_net.fit(x=xs, y=ys, epochs=1)
fpn_after_fit = retina_net.feature_pyramid.get_weights()
box_head_after_fit_weights = retina_net.box_head.get_weights()
classification_head_after_fit_weights = (
retina_net.classification_head.get_weights()
)
# print('after_fit', after_fit)
for w1, w2 in zip(
original_classification_head_weights, classification_head_after_fit_weights
):
self.assertNotAllClose(w1, w2)
for w1, w2 in zip(original_box_head_weights, box_head_after_fit_weights):
self.assertNotAllClose(w1, w2)
for w1, w2 in zip(original_fpn_weights, fpn_after_fit):
self.assertNotAllClose(w1, w2)
# TODO(lukewood): configure for other coordinate systems.
@pytest.mark.skipif(
"INTEGRATION" not in os.environ or os.environ["INTEGRATION"] != "true",
reason="Takes a long time to run, only runs when INTEGRATION "
"environment variable is set. To run the test please run: \n"
"`INTEGRATION=true pytest "
"keras_cv/models/object_detection/retina_net/retina_net_test.py -k "
"test_fit_coco_metrics -s`",
)
def test_fit_coco_metrics(self):
bounding_box_format = "xywh"
retina_net = keras_cv.models.RetinaNet(
classes=1,
bounding_box_format=bounding_box_format,
backbone="resnet50",
backbone_weights=None,
include_rescaling=False,
evaluate_train_time_metrics=True,
)
retina_net.compile(
optimizer=optimizers.Adam(),
classification_loss=keras_cv.losses.FocalLoss(
from_logits=True, reduction="none"
),
box_loss=keras_cv.losses.SmoothL1Loss(l1_cutoff=1.0, reduction="none"),
metrics=[
keras_cv.metrics.COCOMeanAveragePrecision(
class_ids=range(1),
bounding_box_format=bounding_box_format,
name="MaP",
),
keras_cv.metrics.COCORecall(
class_ids=range(1),
bounding_box_format=bounding_box_format,
name="Recall",
),
],
)
xs, ys = _create_bounding_box_dataset(bounding_box_format)
for _ in range(50):
history = retina_net.fit(x=xs, y=ys, epochs=10)
metrics = history.history
metrics = [metrics["Recall"], metrics["MaP"]]
metrics = [statistics.mean(metric) for metric in metrics]
minimum = 0.3
nonzero = [x > minimum for x in metrics]
if all(nonzero):
return
raise ValueError(
f"Did not achieve better than {minimum} for all metrics in 50 epochs"
)
def _create_bounding_box_dataset(bounding_box_format):
# Just about the easiest dataset you can have, all classes are 0, all boxes are
# exactly the same. [1, 1, 2, 2] are the coordinates in xyxy
xs = tf.ones((10, 512, 512, 3), dtype=tf.float32)
y_classes = tf.zeros((10, 10, 1), dtype=tf.float32)
ys = tf.constant([0.25, 0.25, 0.1, 0.1], dtype=tf.float32)
ys = tf.expand_dims(ys, axis=0)
ys = tf.expand_dims(ys, axis=0)
ys = tf.tile(ys, [10, 10, 1])
ys = tf.concat([ys, y_classes], axis=-1)
ys = keras_cv.bounding_box.convert_format(
ys, source="rel_xywh", target=bounding_box_format, images=xs, dtype=tf.float32
)
return xs, ys
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""ResNet models for KerasCV.
Reference:
- [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385) (CVPR 2015)
- [Based on the original keras.applications ResNet](https://github.com/keras-team/keras/blob/master/keras/applications/resnet.py)
"""
import tensorflow as tf
from tensorflow.keras import backend
from tensorflow.keras import layers
from keras_cv.models import utils
MODEL_CONFIGS = {
"ResNet18": {
"stackwise_filters": [64, 128, 256, 512],
"stackwise_blocks": [2, 2, 2, 2],
"stackwise_strides": [1, 2, 2, 2],
},
"ResNet34": {
"stackwise_filters": [64, 128, 256, 512],
"stackwise_blocks": [3, 4, 6, 3],
"stackwise_strides": [1, 2, 2, 2],
},
"ResNet50": {
"stackwise_filters": [64, 128, 256, 512],
"stackwise_blocks": [3, 4, 6, 3],
"stackwise_strides": [1, 2, 2, 2],
},
"ResNet101": {
"stackwise_filters": [64, 128, 256, 512],
"stackwise_blocks": [3, 4, 23, 3],
"stackwise_strides": [1, 2, 2, 2],
},
"ResNet152": {
"stackwise_filters": [64, 128, 256, 512],
"stackwise_blocks": [3, 8, 36, 3],
"stackwise_strides": [1, 2, 2, 2],
},
}
BN_AXIS = 3
BASE_DOCSTRING = """Instantiates the {name} architecture.
Reference:
- [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385)
- [Identity Mappings in Deep Residual Networks](https://arxiv.org/abs/1603.05027) (ECCV 2016)
This function returns a Keras {name} model.
The difference in Resnet and ResNetV2 rests in the structure of their
individual building blocks. In ResNetV2, the batch normalization and
ReLU activation preceed the convolution layers, as opposed to ResNetV1 where
the batch normalization and ReLU activation are applied after the
convolution layers.
For transfer learning use cases, make sure to read the [guide to transfer
learning & fine-tuning](https://keras.io/guides/transfer_learning/).
Args:
include_rescaling: whether or not to Rescale the inputs.If set to True,
inputs will be passed through a `Rescaling(1/255.0)` layer.
include_top: whether to include the fully-connected layer at the top of the
network. If provided, classes must be provided.
classes: optional number of classes to classify images into, only to be
specified if `include_top` is True.
weights: one of `None` (random initialization), or a pretrained weight file
path.
input_shape: optional shape tuple, defaults to (None, None, 3).
input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
to use as image input for the model.
pooling: optional pooling mode for feature extraction
when `include_top` is `False`.
- `None` means that the output of the model will be the 4D tensor output
of the last convolutional block.
- `avg` means that global average pooling will be applied to the output
of the last convolutional block, and thus the output of the model will
be a 2D tensor.
- `max` means that global max pooling will be applied.
name: (Optional) name to pass to the model. Defaults to "{name}".
classifier_activation: A `str` or callable. The activation function to use
on the "top" layer. Ignored unless `include_top=True`. Set
`classifier_activation=None` to return the logits of the "top" layer.
Returns:
A `keras.Model` instance.
"""
def BasicBlock(filters, kernel_size=3, stride=1, conv_shortcut=True, name=None):
"""A basic residual block.
Args:
x: input tensor.
filters: integer, filters of the basic layer.
kernel_size: default 3, kernel size of the basic layer.
stride: default 1, stride of the first layer.
conv_shortcut: default True, use convolution shortcut if True,
otherwise identity shortcut.
name: string, block label.
Returns:
Output tensor for the residual block.
"""
if name is None:
name = f"v1_basic_block_{backend.get_uid('v1_basic_block_')}"
def apply(x):
if conv_shortcut:
shortcut = layers.Conv2D(
filters, 1, strides=stride, use_bias=False, name=name + "_0_conv"
)(x)
shortcut = layers.BatchNormalization(
axis=BN_AXIS, epsilon=1.001e-5, name=name + "_0_bn"
)(shortcut)
else:
shortcut = x
x = layers.Conv2D(
filters,
kernel_size,
padding="SAME",
strides=stride,
use_bias=False,
name=name + "_1_conv",
)(x)
x = layers.BatchNormalization(
axis=BN_AXIS, epsilon=1.001e-5, name=name + "_1_bn"
)(x)
x = layers.Activation("relu", name=name + "_1_relu")(x)
x = layers.Conv2D(
filters, kernel_size, padding="SAME", use_bias=False, name=name + "_2_conv"
)(x)
x = layers.BatchNormalization(
axis=BN_AXIS, epsilon=1.001e-5, name=name + "_2_bn"
)(x)
x = layers.Add(name=name + "_add")([shortcut, x])
x = layers.Activation("relu", name=name + "_out")(x)
return x
return apply
def Block(filters, kernel_size=3, stride=1, conv_shortcut=True, name=None):
"""A residual block.
Args:
x: input tensor.
filters: integer, filters of the bottleneck layer.
kernel_size: default 3, kernel size of the bottleneck layer.
stride: default 1, stride of the first layer.
conv_shortcut: default True, use convolution shortcut if True,
otherwise identity shortcut.
name: string, block label.
Returns:
Output tensor for the residual block.
"""
if name is None:
name = f"v1_block_{backend.get_uid('v1_block')}"
def apply(x):
if conv_shortcut:
shortcut = layers.Conv2D(
4 * filters, 1, strides=stride, use_bias=False, name=name + "_0_conv"
)(x)
shortcut = layers.BatchNormalization(
axis=BN_AXIS, epsilon=1.001e-5, name=name + "_0_bn"
)(shortcut)
else:
shortcut = x
x = layers.Conv2D(
filters, 1, strides=stride, use_bias=False, name=name + "_1_conv"
)(x)
x = layers.BatchNormalization(
axis=BN_AXIS, epsilon=1.001e-5, name=name + "_1_bn"
)(x)
x = layers.Activation("relu", name=name + "_1_relu")(x)
x = layers.Conv2D(
filters, kernel_size, padding="SAME", use_bias=False, name=name + "_2_conv"
)(x)
x = layers.BatchNormalization(
axis=BN_AXIS, epsilon=1.001e-5, name=name + "_2_bn"
)(x)
x = layers.Activation("relu", name=name + "_2_relu")(x)
x = layers.Conv2D(4 * filters, 1, use_bias=False, name=name + "_3_conv")(x)
x = layers.BatchNormalization(
axis=BN_AXIS, epsilon=1.001e-5, name=name + "_3_bn"
)(x)
x = layers.Add(name=name + "_add")([shortcut, x])
x = layers.Activation("relu", name=name + "_out")(x)
return x
return apply
def Stack(filters, blocks, stride=2, name=None, block_fn=Block, first_shortcut=True):
"""A set of stacked residual blocks.
Args:
filters: integer, filters of the layers in a block.
blocks: integer, blocks in the stacked blocks.
stride1: default 2, stride of the first layer in the first block.
name: string, stack label.
block_fn: callable, `Block` or `BasicBlock`, the block function to stack.
first_shortcut: default True, use convolution shortcut if True,
otherwise identity shortcut.
Returns:
Output tensor for the stacked blocks.
"""
if name is None:
name = f"v1_stack_{backend.get_uid('v1_stack')}"
def apply(x):
x = block_fn(
filters, stride=stride, name=name + "_block1", conv_shortcut=first_shortcut
)(x)
for i in range(2, blocks + 1):
x = block_fn(filters, conv_shortcut=False, name=name + "_block" + str(i))(x)
return x
return apply
def ResNet(
stackwise_filters,
stackwise_blocks,
stackwise_strides,
include_rescaling,
include_top,
name="ResNet",
weights=None,
input_shape=(None, None, 3),
input_tensor=None,
pooling=None,
classes=None,
classifier_activation="softmax",
block_fn=Block,
**kwargs,
):
"""Instantiates the ResNet architecture.
Args:
stackwise_filters: number of filters for each stack in the model.
stackwise_blocks: number of blocks for each stack in the model.
stackwise_strides: stride for each stack in the model.
include_rescaling: whether or not to Rescale the inputs. If set to True,
inputs will be passed through a `Rescaling(1/255.0)` layer.
name: string, model name.
include_top: whether to include the fully-connected
layer at the top of the network.
weights: one of `None` (random initialization),
or the path to the weights file to be loaded.
input_shape: optional shape tuple, defaults to (None, None, 3).
input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
to use as image input for the model.
pooling: optional pooling mode for feature extraction
when `include_top` is `False`.
- `None` means that the output of the model will be
the 4D tensor output of the
last convolutional layer.
- `avg` means that global average pooling
will be applied to the output of the
last convolutional layer, and thus
the output of the model will be a 2D tensor.
- `max` means that global max pooling will
be applied.
classes: optional number of classes to classify images
into, only to be specified if `include_top` is True.
classifier_activation: A `str` or callable. The activation function to use
on the "top" layer. Ignored unless `include_top=True`. Set
`classifier_activation=None` to return the logits of the "top" layer.
block_fn: callable, `Block` or `BasicBlock`, the block function to stack.
Use 'basic_block' for ResNet18 and ResNet34.
**kwargs: Pass-through keyword arguments to `tf.keras.Model`.
Returns:
A `keras.Model` instance.
"""
if weights and not tf.io.gfile.exists(weights):
raise ValueError(
"The `weights` argument should be either `None` or the path to the "
"weights file to be loaded. Weights file not found at location: {weights}"
)
if include_top and not classes:
raise ValueError(
"If `include_top` is True, you should specify `classes`. "
f"Received: classes={classes}"
)
if include_top and pooling:
raise ValueError(
f"`pooling` must be `None` when `include_top=True`."
f"Received pooling={pooling} and include_top={include_top}. "
)
inputs = utils.parse_model_inputs(input_shape, input_tensor)
x = inputs
if include_rescaling:
x = layers.Rescaling(1 / 255.0)(x)
x = layers.Conv2D(
64, 7, strides=2, use_bias=False, padding="same", name="conv1_conv"
)(x)
x = layers.BatchNormalization(axis=BN_AXIS, epsilon=1.001e-5, name="conv1_bn")(x)
x = layers.Activation("relu", name="conv1_relu")(x)
x = layers.MaxPooling2D(3, strides=2, padding="same", name="pool1_pool")(x)
num_stacks = len(stackwise_filters)
for stack_index in range(num_stacks):
x = Stack(
filters=stackwise_filters[stack_index],
blocks=stackwise_blocks[stack_index],
stride=stackwise_strides[stack_index],
block_fn=block_fn,
first_shortcut=block_fn == Block or stack_index > 0,
)(x)
if include_top:
x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
x = layers.Dense(classes, activation=classifier_activation, name="predictions")(
x
)
else:
if pooling == "avg":
x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
elif pooling == "max":
x = layers.GlobalMaxPooling2D(name="max_pool")(x)
# Create model.
model = tf.keras.Model(inputs, x, name=name, **kwargs)
if weights is not None:
model.load_weights(weights)
return model
def ResNet18(
include_rescaling,
include_top,
classes=None,
weights=None,
input_shape=(None, None, 3),
input_tensor=None,
pooling=None,
classifier_activation="softmax",
name="resnet18",
**kwargs,
):
"""Instantiates the ResNet18 architecture."""
return ResNet(
stackwise_filters=MODEL_CONFIGS["ResNet18"]["stackwise_filters"],
stackwise_blocks=MODEL_CONFIGS["ResNet18"]["stackwise_blocks"],
stackwise_strides=MODEL_CONFIGS["ResNet18"]["stackwise_strides"],
include_rescaling=include_rescaling,
include_top=include_top,
name=name,
weights=weights,
input_shape=input_shape,
input_tensor=input_tensor,
pooling=pooling,
classes=classes,
classifier_activation=classifier_activation,
block_fn=BasicBlock,
**kwargs,
)
def ResNet34(
include_rescaling,
include_top,
classes=None,
weights=None,
input_shape=(None, None, 3),
input_tensor=None,
pooling=None,
classifier_activation="softmax",
name="resnet34",
**kwargs,
):
"""Instantiates the ResNet34 architecture."""
return ResNet(
stackwise_filters=MODEL_CONFIGS["ResNet34"]["stackwise_filters"],
stackwise_blocks=MODEL_CONFIGS["ResNet34"]["stackwise_blocks"],
stackwise_strides=MODEL_CONFIGS["ResNet34"]["stackwise_strides"],
include_rescaling=include_rescaling,
include_top=include_top,
name=name,
weights=weights,
input_shape=input_shape,
input_tensor=input_tensor,
pooling=pooling,
classes=classes,
classifier_activation=classifier_activation,
block_fn=BasicBlock,
**kwargs,
)
def ResNet50(
include_rescaling,
include_top,
classes=None,
weights=None,
input_shape=(None, None, 3),
input_tensor=None,
pooling=None,
classifier_activation="softmax",
name="resnet50",
**kwargs,
):
"""Instantiates the ResNet50 architecture."""
return ResNet(
stackwise_filters=MODEL_CONFIGS["ResNet50"]["stackwise_filters"],
stackwise_blocks=MODEL_CONFIGS["ResNet50"]["stackwise_blocks"],
stackwise_strides=MODEL_CONFIGS["ResNet50"]["stackwise_strides"],
include_rescaling=include_rescaling,
include_top=include_top,
name=name,
weights=weights,
input_shape=input_shape,
input_tensor=input_tensor,
pooling=pooling,
classes=classes,
classifier_activation=classifier_activation,
**kwargs,
)
def ResNet101(
include_rescaling,
include_top,
classes=None,
weights=None,
input_shape=(None, None, 3),
input_tensor=None,
pooling=None,
classifier_activation="softmax",
name="resnet101",
**kwargs,
):
"""Instantiates the ResNet101 architecture."""
return ResNet(
stackwise_filters=MODEL_CONFIGS["ResNet101"]["stackwise_filters"],
stackwise_blocks=MODEL_CONFIGS["ResNet101"]["stackwise_blocks"],
stackwise_strides=MODEL_CONFIGS["ResNet101"]["stackwise_strides"],
name=name,
include_rescaling=include_rescaling,
include_top=include_top,
weights=weights,
input_shape=input_shape,
input_tensor=input_tensor,
pooling=pooling,
classes=classes,
classifier_activation=classifier_activation,
**kwargs,
)
def ResNet152(
include_rescaling,
include_top,
classes=None,
weights=None,
input_shape=(None, None, 3),
input_tensor=None,
pooling=None,
classifier_activation="softmax",
name="resnet152",
**kwargs,
):
"""Instantiates the ResNet152 architecture."""
return ResNet(
stackwise_filters=MODEL_CONFIGS["ResNet152"]["stackwise_filters"],
stackwise_blocks=MODEL_CONFIGS["ResNet152"]["stackwise_blocks"],
stackwise_strides=MODEL_CONFIGS["ResNet152"]["stackwise_strides"],
include_rescaling=include_rescaling,
include_top=include_top,
name=name,
weights=weights,
input_shape=input_shape,
input_tensor=input_tensor,
pooling=pooling,
classes=classes,
classifier_activation=classifier_activation,
**kwargs,
)
setattr(ResNet18, "__doc__", BASE_DOCSTRING.format(name="ResNet18"))
setattr(ResNet34, "__doc__", BASE_DOCSTRING.format(name="ResNet34"))
setattr(ResNet50, "__doc__", BASE_DOCSTRING.format(name="ResNet50"))
setattr(ResNet101, "__doc__", BASE_DOCSTRING.format(name="ResNet101"))
setattr(ResNet152, "__doc__", BASE_DOCSTRING.format(name="ResNet152"))
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from absl.testing import parameterized
from keras_cv.models import resnet_v1
from .models_test import ModelsTest
MODEL_LIST = [
(resnet_v1.ResNet18, 512, {}),
(resnet_v1.ResNet34, 512, {}),
(resnet_v1.ResNet50, 2048, {}),
(resnet_v1.ResNet101, 2048, {}),
(resnet_v1.ResNet152, 2048, {}),
]
class ResNetV1Test(ModelsTest, tf.test.TestCase, parameterized.TestCase):
@parameterized.parameters(*MODEL_LIST)
def test_application_base(self, app, _, args):
super()._test_application_base(app, _, args)
@parameterized.parameters(*MODEL_LIST)
def test_application_with_rescaling(self, app, last_dim, args):
super()._test_application_with_rescaling(app, last_dim, args)
@parameterized.parameters(*MODEL_LIST)
def test_application_pooling(self, app, last_dim, args):
super()._test_application_pooling(app, last_dim, args)
@parameterized.parameters(*MODEL_LIST)
def test_application_variable_input_channels(self, app, last_dim, args):
super()._test_application_variable_input_channels(app, last_dim, args)
@parameterized.parameters(*MODEL_LIST)
def test_model_can_be_used_as_backbone(self, app, last_dim, args):
super()._test_model_can_be_used_as_backbone(app, last_dim, args)
if __name__ == "__main__":
tf.test.main()
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""ResNet models for Keras.
Reference:
- [Identity Mappings in Deep Residual Networks](https://arxiv.org/abs/1603.05027) (ECCV 2016)
- [Based on the original keras.applications ResNet](https://github.com/keras-team/keras/blob/master/keras/applications/resnet_v2.py)
"""
import types
import tensorflow as tf
from tensorflow.keras import backend
from tensorflow.keras import layers
from keras_cv.models import utils
from keras_cv.models.weights import parse_weights
MODEL_CONFIGS = {
"ResNet18V2": {
"stackwise_filters": [64, 128, 256, 512],
"stackwise_blocks": [2, 2, 2, 2],
"stackwise_strides": [1, 2, 2, 2],
},
"ResNet34V2": {
"stackwise_filters": [64, 128, 256, 512],
"stackwise_blocks": [3, 4, 6, 3],
"stackwise_strides": [1, 2, 2, 2],
},
"ResNet50V2": {
"stackwise_filters": [64, 128, 256, 512],
"stackwise_blocks": [3, 4, 6, 3],
"stackwise_strides": [1, 2, 2, 2],
},
"ResNet101V2": {
"stackwise_filters": [64, 128, 256, 512],
"stackwise_blocks": [3, 4, 23, 3],
"stackwise_strides": [1, 2, 2, 2],
},
"ResNet152V2": {
"stackwise_filters": [64, 128, 256, 512],
"stackwise_blocks": [3, 8, 36, 3],
"stackwise_strides": [1, 2, 2, 2],
},
}
BN_AXIS = 3
BASE_DOCSTRING = """Instantiates the {name} architecture.
Reference:
- [Identity Mappings in Deep Residual Networks](https://arxiv.org/abs/1603.05027) (ECCV 2016)
This function returns a Keras {name} model.
The difference in Resnet and ResNetV2 rests in the structure of their
individual building blocks. In ResNetV2, the batch normalization and
ReLU activation preceed the convolution layers, as opposed to ResNetV1 where
the batch normalization and ReLU activation are applied after the
convolution layers.
For transfer learning use cases, make sure to read the [guide to transfer
learning & fine-tuning](https://keras.io/guides/transfer_learning/).
Args:
include_rescaling: whether or not to Rescale the inputs.If set to True,
inputs will be passed through a `Rescaling(1/255.0)` layer.
include_top: whether to include the fully-connected layer at the top of the
network. If provided, classes must be provided.
classes: optional number of classes to classify images into, only to be
specified if `include_top` is True.
weights: one of `None` (random initialization), a pretrained weight file
path, or a reference to pre-trained weights (e.g. 'imagenet/classification')
(see available pre-trained weights in weights.py)
input_shape: optional shape tuple, defaults to (None, None, 3).
input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
to use as image input for the model.
pooling: optional pooling mode for feature extraction
when `include_top` is `False`.
- `None` means that the output of the model will be the 4D tensor output
of the last convolutional block.
- `avg` means that global average pooling will be applied to the output
of the last convolutional block, and thus the output of the model will
be a 2D tensor.
- `max` means that global max pooling will be applied.
name: (Optional) name to pass to the model. Defaults to "{name}".
classifier_activation: A `str` or callable. The activation function to use
on the "top" layer. Ignored unless `include_top=True`. Set
`classifier_activation=None` to return the logits of the "top" layer.
Returns:
A `keras.Model` instance.
"""
def BasicBlock(filters, kernel_size=3, stride=1, conv_shortcut=False, name=None):
"""A basic residual block (v2).
Args:
filters: integer, filters of the basic layer.
kernel_size: default 3, kernel size of the bottleneck layer.
stride: default 1, stride of the first layer.
conv_shortcut: default False, use convolution shortcut if True,
otherwise identity shortcut.
name: string, block label.
Returns:
Output tensor for the residual block.
"""
if name is None:
name = f"v2_basic_block_{backend.get_uid('v2_basic_block')}"
def apply(x):
use_preactivation = layers.BatchNormalization(
axis=BN_AXIS, epsilon=1.001e-5, name=name + "_use_preactivation_bn"
)(x)
use_preactivation = layers.Activation(
"relu", name=name + "_use_preactivation_relu"
)(use_preactivation)
if conv_shortcut:
shortcut = layers.Conv2D(filters, 1, strides=stride, name=name + "_0_conv")(
use_preactivation
)
else:
shortcut = (
layers.MaxPooling2D(1, strides=stride, name=name + "_0_max_pooling")(x)
if stride > 1
else x
)
x = layers.Conv2D(
filters,
kernel_size,
padding="SAME",
strides=1,
use_bias=False,
name=name + "_1_conv",
)(use_preactivation)
x = layers.BatchNormalization(
axis=BN_AXIS, epsilon=1.001e-5, name=name + "_1_bn"
)(x)
x = layers.Activation("relu", name=name + "_1_relu")(x)
x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name=name + "_2_pad")(x)
x = layers.Conv2D(
filters,
kernel_size,
strides=stride,
use_bias=False,
name=name + "_2_conv",
)(x)
x = layers.Add(name=name + "_out")([shortcut, x])
return x
return apply
def Block(filters, kernel_size=3, stride=1, conv_shortcut=False, name=None):
"""A residual block (v2).
Args:
filters: integer, filters of the bottleneck layer.
kernel_size: default 3, kernel size of the bottleneck layer.
stride: default 1, stride of the first layer.
conv_shortcut: default False, use convolution shortcut if True,
otherwise identity shortcut.
name: string, block label.
Returns:
Output tensor for the residual block.
"""
if name is None:
name = f"v2_block_{backend.get_uid('v2_block')}"
def apply(x):
use_preactivation = layers.BatchNormalization(
axis=BN_AXIS, epsilon=1.001e-5, name=name + "_use_preactivation_bn"
)(x)
use_preactivation = layers.Activation(
"relu", name=name + "_use_preactivation_relu"
)(use_preactivation)
if conv_shortcut:
shortcut = layers.Conv2D(
4 * filters, 1, strides=stride, name=name + "_0_conv"
)(use_preactivation)
else:
shortcut = (
layers.MaxPooling2D(1, strides=stride, name=name + "_0_max_pooling")(x)
if stride > 1
else x
)
x = layers.Conv2D(filters, 1, strides=1, use_bias=False, name=name + "_1_conv")(
use_preactivation
)
x = layers.BatchNormalization(
axis=BN_AXIS, epsilon=1.001e-5, name=name + "_1_bn"
)(x)
x = layers.Activation("relu", name=name + "_1_relu")(x)
x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name=name + "_2_pad")(x)
x = layers.Conv2D(
filters,
kernel_size,
strides=stride,
use_bias=False,
name=name + "_2_conv",
)(x)
x = layers.BatchNormalization(
axis=BN_AXIS, epsilon=1.001e-5, name=name + "_2_bn"
)(x)
x = layers.Activation("relu", name=name + "_2_relu")(x)
x = layers.Conv2D(4 * filters, 1, name=name + "_3_conv")(x)
x = layers.Add(name=name + "_out")([shortcut, x])
return x
return apply
def Stack(
filters,
blocks,
stride=2,
name=None,
block_fn=Block,
first_shortcut=True,
stack_index=1,
):
"""A set of stacked blocks.
Args:
filters: integer, filters of the layer in a block.
blocks: integer, blocks in the stacked blocks.
stride: default 2, stride of the first layer in the first block.
name: string, stack label.
block_fn: callable, `Block` or `BasicBlock`, the block function to stack.
first_shortcut: default True, use convolution shortcut if True,
otherwise identity shortcut.
Returns:
Output tensor for the stacked blocks.
"""
if name is None:
name = f"v2_stack_{stack_index}"
def apply(x):
x = block_fn(filters, conv_shortcut=first_shortcut, name=name + "_block1")(x)
for i in range(2, blocks):
x = block_fn(filters, name=name + "_block" + str(i))(x)
x = block_fn(filters, stride=stride, name=name + "_block" + str(blocks))(x)
return x
return apply
def ResNetV2(
stackwise_filters,
stackwise_blocks,
stackwise_strides,
include_rescaling,
include_top,
name="ResNetV2",
weights=None,
input_shape=(None, None, 3),
input_tensor=None,
pooling=None,
classes=None,
classifier_activation="softmax",
block_fn=Block,
**kwargs,
):
"""Instantiates the ResNetV2 architecture.
Args:
stackwise_filters: number of filters for each stack in the model.
stackwise_blocks: number of blocks for each stack in the model.
stackwise_strides: stride for each stack in the model.
include_rescaling: whether or not to Rescale the inputs. If set to True,
inputs will be passed through a `Rescaling(1/255.0)` layer.
name: string, model name.
include_top: whether to include the fully-connected
layer at the top of the network.
weights: one of `None` (random initialization),
or the path to the weights file to be loaded.
input_shape: optional shape tuple, defaults to (None, None, 3).
input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
to use as image input for the model.
pooling: optional pooling mode for feature extraction
when `include_top` is `False`.
- `None` means that the output of the model will be
the 4D tensor output of the
last convolutional layer.
- `avg` means that global average pooling
will be applied to the output of the
last convolutional layer, and thus
the output of the model will be a 2D tensor.
- `max` means that global max pooling will
be applied.
classes: optional number of classes to classify images
into, only to be specified if `include_top` is True.
classifier_activation: A `str` or callable. The activation function to use
on the "top" layer. Ignored unless `include_top=True`. Set
`classifier_activation=None` to return the logits of the "top" layer.
block_fn: callable, `Block` or `BasicBlock`, the block function to stack.
Use 'basic_block' for ResNet18 and ResNet34.
**kwargs: Pass-through keyword arguments to `tf.keras.Model`.
Returns:
A `keras.Model` instance.
"""
if weights and not tf.io.gfile.exists(weights):
raise ValueError(
"The `weights` argument should be either `None` or the path to the "
"weights file to be loaded. Weights file not found at location: {weights}"
)
if include_top and not classes:
raise ValueError(
"If `include_top` is True, you should specify `classes`. "
f"Received: classes={classes}"
)
if include_top and pooling:
raise ValueError(
f"`pooling` must be `None` when `include_top=True`."
f"Received pooling={pooling} and include_top={include_top}. "
)
inputs = utils.parse_model_inputs(input_shape, input_tensor)
x = inputs
if include_rescaling:
x = layers.Rescaling(1 / 255.0)(x)
x = layers.Conv2D(
64, 7, strides=2, use_bias=True, padding="same", name="conv1_conv"
)(x)
x = layers.MaxPooling2D(3, strides=2, padding="same", name="pool1_pool")(x)
num_stacks = len(stackwise_filters)
stack_level_outputs = {}
for stack_index in range(num_stacks):
x = Stack(
filters=stackwise_filters[stack_index],
blocks=stackwise_blocks[stack_index],
stride=stackwise_strides[stack_index],
block_fn=block_fn,
first_shortcut=block_fn == Block or stack_index > 0,
stack_index=stack_index,
)(x)
stack_level_outputs[stack_index + 2] = x
x = layers.BatchNormalization(axis=BN_AXIS, epsilon=1.001e-5, name="post_bn")(x)
x = layers.Activation("relu", name="post_relu")(x)
if include_top:
x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
x = layers.Dense(classes, activation=classifier_activation, name="predictions")(
x
)
else:
if pooling == "avg":
x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
elif pooling == "max":
x = layers.GlobalMaxPooling2D(name="max_pool")(x)
# Create model.
model = tf.keras.Model(inputs, x, name=name, **kwargs)
if weights is not None:
model.load_weights(weights)
# Set this private attribute for recreate backbone model with outputs at each of the
# resolution level.
model._backbone_level_outputs = stack_level_outputs
# TODO(scottzhu): Extract this into a standalone util function.
def as_backbone(self, min_level=None, max_level=None):
"""Convert the Resnet application model into a model backbone for other tasks.
The backbone model will usually take same inputs as the original application
model, but produce multiple outputs, one for each feature level. Those outputs
can be feed to network downstream, like FPN and RPN.
The output of the backbone model will be a dict with int as key and tensor as
value. The int key represent the level of the feature output.
A typical feature pyramid has five levels corresponding to scales P3, P4, P5,
P6, P7 in the backbone. Scale Pn represents a feature map 2n times smaller in
width and height than the input image.
Args:
min_level: optional int, the lowest level of feature to be included in the
output. Default to model's lowest feature level (based on the model structure).
max_level: optional int, the highest level of feature to be included in the
output. Default to model's highest feature level (based on the model structure).
Returns:
a `tf.keras.Model` which has dict as outputs.
Raises:
ValueError: When the model is lack of information for feature level, and can't
be converted to backbone model, or the min_level/max_level param is out of
range based on the model structure.
"""
if hasattr(self, "_backbone_level_outputs"):
backbone_level_outputs = self._backbone_level_outputs
model_levels = list(sorted(backbone_level_outputs.keys()))
if min_level is not None:
if min_level < model_levels[0]:
raise ValueError(
f"The min_level provided: {min_level} should be in "
f"the range of {model_levels}"
)
else:
min_level = model_levels[0]
if max_level is not None:
if max_level > model_levels[-1]:
raise ValueError(
f"The max_level provided: {max_level} should be in "
f"the range of {model_levels}"
)
else:
max_level = model_levels[-1]
outputs = {}
for level in range(min_level, max_level + 1):
outputs[level] = backbone_level_outputs[level]
return tf.keras.Model(inputs=self.inputs, outputs=outputs)
else:
raise ValueError(
"The current model doesn't have any feature level "
"information and can't be convert to backbone model."
)
# Bind the `to_backbone_model` method to the application model.
model.as_backbone = types.MethodType(as_backbone, model)
return model
def ResNet18V2(
include_rescaling,
include_top,
classes=None,
weights=None,
input_shape=(None, None, 3),
input_tensor=None,
pooling=None,
classifier_activation="softmax",
name="resnet18",
**kwargs,
):
"""Instantiates the ResNet18 architecture."""
return ResNetV2(
stackwise_filters=MODEL_CONFIGS["ResNet18V2"]["stackwise_filters"],
stackwise_blocks=MODEL_CONFIGS["ResNet18V2"]["stackwise_blocks"],
stackwise_strides=MODEL_CONFIGS["ResNet18V2"]["stackwise_strides"],
include_rescaling=include_rescaling,
include_top=include_top,
name=name,
weights=weights,
input_shape=input_shape,
input_tensor=input_tensor,
pooling=pooling,
classes=classes,
classifier_activation=classifier_activation,
block_fn=BasicBlock,
**kwargs,
)
def ResNet34V2(
include_rescaling,
include_top,
classes=None,
weights=None,
input_shape=(None, None, 3),
input_tensor=None,
pooling=None,
classifier_activation="softmax",
name="resnet34",
**kwargs,
):
"""Instantiates the ResNet34 architecture."""
return ResNetV2(
stackwise_filters=MODEL_CONFIGS["ResNet34V2"]["stackwise_filters"],
stackwise_blocks=MODEL_CONFIGS["ResNet34V2"]["stackwise_blocks"],
stackwise_strides=MODEL_CONFIGS["ResNet34V2"]["stackwise_strides"],
include_rescaling=include_rescaling,
include_top=include_top,
name=name,
weights=weights,
input_shape=input_shape,
input_tensor=input_tensor,
pooling=pooling,
classes=classes,
classifier_activation=classifier_activation,
block_fn=BasicBlock,
**kwargs,
)
def ResNet50V2(
include_rescaling,
include_top,
classes=None,
weights=None,
input_shape=(None, None, 3),
input_tensor=None,
pooling=None,
classifier_activation="softmax",
name="resnet50v2",
**kwargs,
):
"""Instantiates the ResNet50V2 architecture."""
return ResNetV2(
stackwise_filters=MODEL_CONFIGS["ResNet50V2"]["stackwise_filters"],
stackwise_blocks=MODEL_CONFIGS["ResNet50V2"]["stackwise_blocks"],
stackwise_strides=MODEL_CONFIGS["ResNet50V2"]["stackwise_strides"],
include_rescaling=include_rescaling,
include_top=include_top,
name=name,
weights=parse_weights(weights, include_top, "resnet50v2"),
input_shape=input_shape,
input_tensor=input_tensor,
pooling=pooling,
classes=classes,
classifier_activation=classifier_activation,
**kwargs,
)
def ResNet101V2(
include_rescaling,
include_top,
classes=None,
weights=None,
input_shape=(None, None, 3),
input_tensor=None,
pooling=None,
classifier_activation="softmax",
name="resnet101v2",
**kwargs,
):
"""Instantiates the ResNet101V2 architecture."""
return ResNetV2(
stackwise_filters=MODEL_CONFIGS["ResNet101V2"]["stackwise_filters"],
stackwise_blocks=MODEL_CONFIGS["ResNet101V2"]["stackwise_blocks"],
stackwise_strides=MODEL_CONFIGS["ResNet101V2"]["stackwise_strides"],
name=name,
include_rescaling=include_rescaling,
include_top=include_top,
weights=weights,
input_shape=input_shape,
input_tensor=input_tensor,
pooling=pooling,
classes=classes,
classifier_activation=classifier_activation,
**kwargs,
)
def ResNet152V2(
include_rescaling,
include_top,
classes=None,
weights=None,
input_shape=(None, None, 3),
input_tensor=None,
pooling=None,
classifier_activation="softmax",
name="resnet152v2",
**kwargs,
):
"""Instantiates the ResNet152V2 architecture."""
return ResNetV2(
stackwise_filters=MODEL_CONFIGS["ResNet152V2"]["stackwise_filters"],
stackwise_blocks=MODEL_CONFIGS["ResNet152V2"]["stackwise_blocks"],
stackwise_strides=MODEL_CONFIGS["ResNet152V2"]["stackwise_strides"],
include_rescaling=include_rescaling,
include_top=include_top,
name=name,
weights=weights,
input_shape=input_shape,
input_tensor=input_tensor,
pooling=pooling,
classes=classes,
classifier_activation=classifier_activation,
**kwargs,
)
setattr(ResNet18V2, "__doc__", BASE_DOCSTRING.format(name="ResNet18V2"))
setattr(ResNet34V2, "__doc__", BASE_DOCSTRING.format(name="ResNet34V2"))
setattr(ResNet50V2, "__doc__", BASE_DOCSTRING.format(name="ResNet50V2"))
setattr(ResNet101V2, "__doc__", BASE_DOCSTRING.format(name="ResNet101V2"))
setattr(ResNet152V2, "__doc__", BASE_DOCSTRING.format(name="ResNet152V2"))
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from absl.testing import parameterized
from keras_cv.models import resnet_v2
from .models_test import ModelsTest
MODEL_LIST = [
(resnet_v2.ResNet18V2, 512, {}),
(resnet_v2.ResNet34V2, 512, {}),
(resnet_v2.ResNet50V2, 2048, {}),
(resnet_v2.ResNet101V2, 2048, {}),
(resnet_v2.ResNet152V2, 2048, {}),
]
class ResNetV2Test(ModelsTest, tf.test.TestCase, parameterized.TestCase):
@parameterized.parameters(*MODEL_LIST)
def test_application_base(self, app, _, args):
super()._test_application_base(app, _, args)
@parameterized.parameters(*MODEL_LIST)
def test_application_with_rescaling(self, app, last_dim, args):
super()._test_application_with_rescaling(app, last_dim, args)
@parameterized.parameters(*MODEL_LIST)
def test_application_pooling(self, app, last_dim, args):
super()._test_application_pooling(app, last_dim, args)
@parameterized.parameters(*MODEL_LIST)
def test_application_variable_input_channels(self, app, last_dim, args):
super()._test_application_variable_input_channels(app, last_dim, args)
@parameterized.parameters(*MODEL_LIST)
def test_model_can_be_used_as_backbone(self, app, last_dim, args):
super()._test_model_can_be_used_as_backbone(app, last_dim, args)
def test_model_backbone_layer_names_stability(self):
model = resnet_v2.ResNet50V2(
include_rescaling=False,
include_top=False,
classes=2048,
input_shape=[256, 256, 3],
)
model_2 = resnet_v2.ResNet50V2(
include_rescaling=False,
include_top=False,
classes=2048,
input_shape=[256, 256, 3],
)
layers_1 = model.layers
layers_2 = model_2.layers
for i in range(len(layers_1)):
if "input" in layers_1[i].name:
continue
self.assertEquals(layers_1[i].name, layers_2[i].name)
def test_create_backbone_model_from_application_model(self):
model = resnet_v2.ResNet50V2(
include_rescaling=False,
include_top=False,
classes=2048,
input_shape=[256, 256, 3],
)
backbone_model = model.as_backbone()
inputs = tf.keras.Input(shape=[256, 256, 3])
outputs = backbone_model(inputs)
# Resnet50 backbone has 4 level of features (2 ~ 5)
self.assertLen(outputs, 4)
self.assertEquals(list(outputs.keys()), [2, 3, 4, 5])
self.assertEquals(outputs[2].shape, [None, 64, 64, 256])
self.assertEquals(outputs[3].shape, [None, 32, 32, 512])
self.assertEquals(outputs[4].shape, [None, 16, 16, 1024])
self.assertEquals(outputs[5].shape, [None, 8, 8, 2048])
def test_create_backbone_model_with_level_config(self):
model = resnet_v2.ResNet50V2(
include_rescaling=False,
include_top=False,
classes=2048,
input_shape=[256, 256, 3],
)
backbone_model = model.as_backbone(min_level=3, max_level=4)
inputs = tf.keras.Input(shape=[256, 256, 3])
outputs = backbone_model(inputs)
self.assertLen(outputs, 2)
self.assertEquals(list(outputs.keys()), [3, 4])
self.assertEquals(outputs[3].shape, [None, 32, 32, 512])
self.assertEquals(outputs[4].shape, [None, 16, 16, 1024])
if __name__ == "__main__":
tf.test.main()
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from keras_cv.models.segmentation.__internal__ import SegmentationHead
from keras_cv.models.segmentation.deeplab import DeepLabV3
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from keras_cv.models.segmentation.__internal__.segmentation_head import SegmentationHead
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from tensorflow.keras import layers
@tf.keras.utils.register_keras_serializable(package="keras_cv")
class SegmentationHead(layers.Layer):
"""Prediction head for the segmentation model
The head will take the output from decoder (eg FPN or ASPP), and produce a
segmentation mask (pixel level classifications) as the output for the model.
Args:
classes: int, the number of output classes for the prediction. This should
include all the classes (eg background) for the model to predict.
convs: int, the number of conv2D layers that are stacked before the final
classification layer. Default to 2.
filters: int, the number of filter/channels for the the conv2D layers. Default
to 256.
activations: str or 'tf.keras.activations', activation functions between the
conv2D layers and the final classification layer. Default to 'relu'
output_scale_factor: int, or a pair of ints, for upsample the output mask.
This is useful to scale the output mask back to same size as the input
image. When single int is provided, the mask will be scaled with same
ratio on both width and height. When a pair of ints are provided, they will
be parsed as (height_factor, width_factor). Default to None, which means
no resize will happen to the output mask tensor.
Sample code
```python
# Mimic a FPN output dict
p3 = tf.ones([2, 32, 32, 3])
p4 = tf.ones([2, 16, 16, 3])
p5 = tf.ones([2, 8, 8, 3])
inputs = {3: p3, 4: p4, 5: p5}
head = SegmentationHead(classes=11)
output = head(inputs)
# output tensor has shape [2, 32, 32, 11]. It has the same resolution as the p3.
```
"""
def __init__(
self,
classes,
convs=2,
filters=256,
activations="relu",
output_scale_factor=None,
**kwargs,
):
""""""
super().__init__(**kwargs)
self.classes = classes
self.convs = convs
self.filters = filters
self.activations = activations
self.output_scale_factor = output_scale_factor
self._conv_layers = []
self._bn_layers = []
for i in range(self.convs):
conv_name = "segmentation_head_conv_{}".format(i)
self._conv_layers.append(
tf.keras.layers.Conv2D(
name=conv_name,
filters=self.filters,
kernel_size=3,
padding="same",
use_bias=False,
)
)
norm_name = "segmentation_head_norm_{}".format(i)
self._bn_layers.append(tf.keras.layers.BatchNormalization(name=norm_name))
self._classification_layer = tf.keras.layers.Conv2D(
name="segmentation_output",
filters=self.classes,
kernel_size=1,
padding="same",
# Force the dtype of the classification head to float32 to avoid the NAN loss
# issue when used with mixed precision API.
dtype=tf.float32,
)
def call(self, inputs):
"""Forward path for the segmentation head.
For now, it accepts the output from the decoder only, which is a dict with int
key and tensor as value (level-> processed feature output). The head will use the
lowest level of feature output as the input for the head.
"""
if not isinstance(inputs, dict):
raise ValueError(f"Expect the inputs to be a dict, but received {inputs}")
lowest_level = next(iter(sorted(inputs)))
x = inputs[lowest_level]
for conv_layer, bn_layer in zip(self._conv_layers, self._bn_layers):
x = conv_layer(x)
x = bn_layer(x)
x = tf.keras.activations.get(self.activations)(x)
if self.output_scale_factor is not None:
x = tf.keras.layers.UpSampling2D(self.output_scale_factor)(x)
x = self._classification_layer(x)
return x
def get_config(self):
config = {
"classes": self.classes,
"convs": self.convs,
"filters": self.filters,
"activations": self.activations,
"output_scale_factor": self.output_scale_factor,
}
base_config = super().get_config()
return dict(list(base_config.items()) + list(config.items()))
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from keras_cv.models.segmentation.__internal__ import SegmentationHead
class SegmentationHeadTest(tf.test.TestCase):
def test_result_shapes(self):
p3 = tf.ones([2, 32, 32, 3])
p4 = tf.ones([2, 16, 16, 3])
p5 = tf.ones([2, 8, 8, 3])
inputs = {3: p3, 4: p4, 5: p5}
head = SegmentationHead(classes=11)
output = head(inputs)
# Make sure the output shape is same as the p3
self.assertEquals(output.shape, [2, 32, 32, 11])
def test_invalid_input_type(self):
p3 = tf.ones([2, 32, 32, 3])
p4 = tf.ones([2, 16, 16, 3])
p5 = tf.ones([2, 8, 8, 3])
list_input = [p3, p4, p5]
head = SegmentationHead(classes=11)
with self.assertRaisesRegexp(ValueError, "Expect the inputs to be a dict"):
head(list_input)
def test_scale_up_output(self):
p3 = tf.ones([2, 32, 32, 3])
p4 = tf.ones([2, 16, 16, 3])
p5 = tf.ones([2, 8, 8, 3])
inputs = {3: p3, 4: p4, 5: p5}
head = SegmentationHead(classes=11, output_scale_factor=4)
output = head(inputs)
# The output shape will scale up 4x
self.assertEquals(output.shape, [2, 32 * 4, 32 * 4, 11])
def test_dtype_for_classification_head(self):
p3 = tf.ones([2, 32, 32, 3])
p4 = tf.ones([2, 16, 16, 3])
p5 = tf.ones([2, 8, 8, 3])
inputs = {3: p3, 4: p4, 5: p5}
try:
tf.keras.mixed_precision.set_global_policy("mixed_float16")
head = SegmentationHead(classes=11, output_scale_factor=4)
_ = head(inputs)
# Make sure the dtype of the classification head is still float32, which will
# avoid NAN loss issue for mixed precision
self.assertEquals(head._classification_layer.dtype, tf.float32)
self.assertEquals(head._classification_layer.compute_dtype, tf.float32)
finally:
tf.keras.mixed_precision.set_global_policy(None)
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
import keras_cv
class DeepLabV3(tf.keras.models.Model):
"""A segmentation model based on the DeepLab v3.
Args:
classes: int, the number of classes for the detection model. Note that
the classes doesn't contain the background class, and the classes
from the data should be represented by integers with range
[0, classes).
include_rescaling: boolean, whether to Rescale the inputs. If set to True,
inputs will be passed through a `Rescaling(1/255.0)` layer.
backbone: an optional backbone network for the model. Can be a `tf.keras.layers.Layer`
instance. The supported pre-defined backbone models are:
1. "resnet50_v2", a ResNet50 V2 model
Default to 'resnet50_v2'.
decoder: an optional decoder network for segmentation model, e.g. FPN. The
supported premade decoder is: "fpn". The decoder is called on
the output of the backbone network to up-sample the feature output.
Default to 'fpn'.
segmentation_head: an optional `tf.keras.Layer` that predict the segmentation
mask based on feature from backbone and feature from decoder.
"""
def __init__(
self,
classes,
include_rescaling,
backbone="resnet50_v2",
decoder="fpn",
segmentation_head=None,
**kwargs,
):
super().__init__(**kwargs)
self.classes = classes
# ================== Backbone and weights. ==================
if isinstance(backbone, str):
supported_premade_backbone = [
"resnet50_v2",
]
if backbone not in supported_premade_backbone:
raise ValueError(
"Supported premade backbones are: "
f'{supported_premade_backbone}, received "{backbone}"'
)
self._backbone_passed = backbone
if backbone == "resnet50_v2":
backbone = keras_cv.models.ResNet50V2(
include_rescaling=include_rescaling, include_top=False
)
backbone = backbone.as_backbone()
self.backbone = backbone
else:
# TODO(scottzhu): Might need to do more assertion about the model
if not isinstance(backbone, tf.keras.layers.Layer):
raise ValueError(
"Backbone need to be a `tf.keras.layers.Layer`, "
f"received {backbone}"
)
self.backbone = backbone
# ================== decoder ==================
if isinstance(decoder, str):
# TODO(scottzhu): Add ASPP decoder.
supported_premade_decoder = ["fpn"]
if decoder not in supported_premade_decoder:
raise ValueError(
"Supported premade decoder are: "
f'{supported_premade_decoder}, received "{decoder}"'
)
self._decoder_passed = decoder
if decoder == "fpn":
# Infer the FPN level from the backbone. If user need to customize
# this setting, they should manually create the FPN and backbone.
if not isinstance(backbone.output, dict):
raise ValueError(
"Expect the backbone's output to be dict, "
f"received {backbone.output}"
)
backbone_levels = list(backbone.output.keys())
min_level = backbone_levels[0]
max_level = backbone_levels[-1]
decoder = keras_cv.layers.FeaturePyramid(
min_level=min_level, max_level=max_level
)
# TODO(scottzhu): do more validation for the decoder when we have a common
# interface.
self.decoder = decoder
self._segmentation_head_passed = segmentation_head
if segmentation_head is None:
# Scale up the output when using FPN, to keep the output shape same as the
# input shape.
if isinstance(self.decoder, keras_cv.layers.FeaturePyramid):
output_scale_factor = pow(2, self.decoder.min_level)
else:
output_scale_factor = None
segmentation_head = (
keras_cv.models.segmentation.__internal__.SegmentationHead(
classes=classes, output_scale_factor=output_scale_factor
)
)
self.segmentation_head = segmentation_head
def call(self, inputs, training=None):
backbone_output = self.backbone(inputs, training=training)
decoder_output = self.decoder(backbone_output, training=training)
return self.segmentation_head(decoder_output, training=training)
def get_config(self):
config = {
"classes": self.classes,
"backbone": self._backbone_passed,
"decoder": self._decoder_passed,
"segmentation_head": self._segmentation_head_passed,
}
base_config = super().get_config()
return dict(list(base_config.items()) + list(config.items()))
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import pytest
import tensorflow as tf
import tensorflow_datasets as tfds
from keras_cv import layers
from keras_cv import models
from keras_cv.models import segmentation
class DeeplabTest(tf.test.TestCase):
def test_deeplab_model_construction_with_preconfigured_setting(self):
model = segmentation.DeepLabV3(classes=11, include_rescaling=True)
input_image = tf.random.uniform(shape=[2, 256, 256, 3])
output = model(input_image, training=True)
self.assertEquals(output.shape, [2, 256, 256, 11])
def test_deeplab_model_with_components(self):
backbone = models.ResNet50V2(
include_rescaling=True, include_top=False
).as_backbone(min_level=3, max_level=4)
fpn = layers.FeaturePyramid(min_level=3, max_level=4)
model = segmentation.DeepLabV3(
classes=11, include_rescaling=True, backbone=backbone, decoder=fpn
)
input_image = tf.random.uniform(shape=[2, 256, 256, 3])
output = model(input_image, training=True)
self.assertEquals(output.shape, [2, 256, 256, 11])
def test_mixed_precision(self):
tf.keras.mixed_precision.set_global_policy("mixed_float16")
model = segmentation.DeepLabV3(classes=11, include_rescaling=True)
input_image = tf.random.uniform(shape=[2, 256, 256, 3])
output = model(input_image, training=True)
self.assertEquals(output.dtype, tf.float32)
def test_invalid_backbone_model(self):
with self.assertRaisesRegex(
ValueError, "Supported premade backbones are: .*resnet50_v2"
):
segmentation.DeepLabV3(
classes=11, include_rescaling=True, backbone="resnet_v3"
)
with self.assertRaisesRegex(
ValueError, "Backbone need to be a `tf.keras.layers.Layer`"
):
segmentation.DeepLabV3(
classes=11, include_rescaling=True, backbone=tf.Module()
)
def test_invalid_decoder(self):
with self.assertRaisesRegex(ValueError, "Supported premade decoder are: .*fpn"):
segmentation.DeepLabV3(classes=11, include_rescaling=True, decoder="aspp")
@pytest.mark.skipif(
"INTEGRATION" not in os.environ or os.environ["INTEGRATION"] != "true",
reason="Takes a long time to run, only runs when INTEGRATION "
"environment variable is set. To run the test please run: \n"
"`INTEGRATION=true pytest keras_cv/",
)
def test_model_train(self):
model = segmentation.DeepLabV3(classes=1, include_rescaling=True)
gcs_data_pattern = "gs://caltech_birds2011_mask/0.1.1/*.tfrecord*"
features = tfds.features.FeaturesDict(
{
"bbox": tfds.features.BBoxFeature(),
"image": tfds.features.Image(shape=(None, None, 3), dtype=tf.uint8),
"image/filename": tfds.features.Text(),
"label": tfds.features.ClassLabel(num_classes=200),
"label_name": tfds.features.Text(),
"segmentation_mask": tfds.features.Image(
shape=(None, None, 1), dtype=tf.uint8
),
}
)
filenames = tf.io.gfile.glob(gcs_data_pattern)
AUTO = tf.data.AUTOTUNE
ignore_order = tf.data.Options()
ignore_order.experimental_deterministic = False
ds = tf.data.TFRecordDataset(filenames, num_parallel_reads=AUTO)
ds = ds.with_options(ignore_order)
ds = ds.map(features.deserialize_example, num_parallel_calls=AUTO)
target_size = [384, 384]
output_res = [96, 96]
num_images = 11788
image_resizing = tf.keras.layers.Resizing(target_size[1], target_size[0])
labels_resizing = tf.keras.layers.Resizing(output_res[1], output_res[0])
def resize_images_and_masks(data):
image = tf.image.convert_image_dtype(data["image"], dtype=tf.float32)
data["image"] = image_resizing(image)
# WARNING: assumes processing unbatched
mask = data["segmentation_mask"]
mask = tf.image.convert_image_dtype(mask, dtype=tf.float32)
data["segmentation_mask"] = labels_resizing(mask)
return data
def keep_image_and_mask_only(data):
return data["image"], data["segmentation_mask"]
dataset = ds
dataset = dataset.map(resize_images_and_masks)
dataset = dataset.map(keep_image_and_mask_only)
batch_size = 32
training_dataset = dataset.apply(
tf.data.experimental.dense_to_ragged_batch(batch_size)
)
training_dataset = training_dataset.repeat()
epochs = 1
model.compile(
optimizer="adam",
loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
metrics=["accuracy"],
)
model_history = model.fit(
training_dataset, epochs=epochs, steps_per_epoch=num_images // batch_size
)
print(model_history)
if __name__ == "__main__":
tf.test.main()
# Copyright 2022 The KerasCV Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utility functions for models"""
from tensorflow import keras
from tensorflow.keras import layers
def parse_model_inputs(input_shape, input_tensor):
if input_tensor is None:
return layers.Input(shape=input_shape)
else:
if not keras.backend.is_keras_tensor(input_tensor):
return layers.Input(tensor=input_tensor, shape=input_shape)
else:
return input_tensor
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for KerasCV model utils."""
import tensorflow as tf
from keras import layers
from keras_cv.models import utils
class ModelUtilTestCase(tf.test.TestCase):
def test_parse_model_inputs(self):
input_shape = (224, 244, 3)
inputs = utils.parse_model_inputs(input_shape, None)
self.assertEqual(inputs.shape.as_list(), list((None,) + input_shape))
input_tensor = layers.Input(shape=input_shape)
self.assertIs(utils.parse_model_inputs(input_shape, input_tensor), input_tensor)
# Copyright 2022 The KerasCV Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""VGG19 model for KerasCV.
Reference:
- [Very Deep Convolutional Networks for Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556) (ICLR 2015)
"""
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras_cv.models import utils
def VGG19(
include_rescaling,
include_top,
classes=None,
weights=None,
input_shape=(224, 224, 3),
input_tensor=None,
pooling=None,
classifier_activation="softmax",
name="VGG19",
**kwargs,
):
"""Instantiates the VGG19 architecture.
Reference:
- [Very Deep Convolutional Networks for Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556) (ICLR 2015)
This function returns a Keras VGG19 model.
Args:
include_rescaling: whether or not to Rescale the inputs.If set to True,
inputs will be passed through a `Rescaling(1/255.0)` layer.
include_top: whether to include the 3 fully-connected
layers at the top of the network. If provided, classes must be provided.
classes: optional number of classes to classify images into, only to be
specified if `include_top` is True.
weights: one of `None` (random initialization), or a pretrained weight file path.
input_shape: optional shape tuple, defaults to (224, 224, 3).
input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
to use as image input for the model.
pooling: Optional pooling mode for feature extraction
when `include_top` is `False`.
- `None` means that the output of the model will be
the 4D tensor output of the
last convolutional block.
- `avg` means that global average pooling
will be applied to the output of the
last convolutional block, and thus
the output of the model will be a 2D tensor.
- `max` means that global max pooling will
be applied.
classifier_activation: A `str` or callable. The activation function to use
on the "top" layer. Ignored unless `include_top=True`. Set
`classifier_activation=None` to return the logits of the "top" layer.
When loading pretrained weights, `classifier_activation` can only
be `None` or `"softmax"`.
name: (Optional) name to pass to the model. Defaults to "VGG19".
Returns:
A `keras.Model` instance.
"""
if weights and not tf.io.gfile.exists(weights):
raise ValueError(
"The `weights` argument should be either `None` or the path to the "
"weights file to be loaded. Weights file not found at location: {weights}"
)
if include_top and not classes:
raise ValueError(
"If `include_top` is True, you should specify `classes`. "
f"Received: classes={classes}"
)
inputs = utils.parse_model_inputs(input_shape, input_tensor)
x = inputs
if include_rescaling:
x = layers.Rescaling(1 / 255.0)(x)
# Block 1
x = layers.Conv2D(
64, (3, 3), activation="relu", padding="same", name="block1_conv1"
)(x)
x = layers.Conv2D(
64, (3, 3), activation="relu", padding="same", name="block1_conv2"
)(x)
x = layers.MaxPooling2D((2, 2), strides=(2, 2), name="block1_pool")(x)
# Block 2
x = layers.Conv2D(
128, (3, 3), activation="relu", padding="same", name="block2_conv1"
)(x)
x = layers.Conv2D(
128, (3, 3), activation="relu", padding="same", name="block2_conv2"
)(x)
x = layers.MaxPooling2D((2, 2), strides=(2, 2), name="block2_pool")(x)
# Block 3
x = layers.Conv2D(
256, (3, 3), activation="relu", padding="same", name="block3_conv1"
)(x)
x = layers.Conv2D(
256, (3, 3), activation="relu", padding="same", name="block3_conv2"
)(x)
x = layers.Conv2D(
256, (3, 3), activation="relu", padding="same", name="block3_conv3"
)(x)
x = layers.Conv2D(
256, (3, 3), activation="relu", padding="same", name="block3_conv4"
)(x)
x = layers.MaxPooling2D((2, 2), strides=(2, 2), name="block3_pool")(x)
# Block 4
x = layers.Conv2D(
512, (3, 3), activation="relu", padding="same", name="block4_conv1"
)(x)
x = layers.Conv2D(
512, (3, 3), activation="relu", padding="same", name="block4_conv2"
)(x)
x = layers.Conv2D(
512, (3, 3), activation="relu", padding="same", name="block4_conv3"
)(x)
x = layers.Conv2D(
512, (3, 3), activation="relu", padding="same", name="block4_conv4"
)(x)
x = layers.MaxPooling2D((2, 2), strides=(2, 2), name="block4_pool")(x)
# Block 5
x = layers.Conv2D(
512, (3, 3), activation="relu", padding="same", name="block5_conv1"
)(x)
x = layers.Conv2D(
512, (3, 3), activation="relu", padding="same", name="block5_conv2"
)(x)
x = layers.Conv2D(
512, (3, 3), activation="relu", padding="same", name="block5_conv3"
)(x)
x = layers.Conv2D(
512, (3, 3), activation="relu", padding="same", name="block5_conv4"
)(x)
x = layers.MaxPooling2D((2, 2), strides=(2, 2), name="block5_pool")(x)
if include_top:
x = layers.Flatten(name="flatten")(x)
x = layers.Dense(4096, activation="relu", name="fc1")(x)
x = layers.Dense(4096, activation="relu", name="fc2")(x)
x = layers.Dense(classes, activation=classifier_activation, name="predictions")(
x
)
else:
if pooling == "avg":
x = layers.GlobalAveragePooling2D()(x)
elif pooling == "max":
x = layers.GlobalMaxPooling2D()(x)
model = keras.Model(inputs, x, name=name, **kwargs)
if weights is not None:
model.load_weights(weights)
return model
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from absl.testing import parameterized
from keras_cv.models import vgg19
from .models_test import ModelsTest
MODEL_LIST = [
(vgg19.VGG19, 512, {}),
]
class VGG19Test(ModelsTest, tf.test.TestCase, parameterized.TestCase):
@parameterized.parameters(*MODEL_LIST)
def test_application_base(self, app, _, args):
super()._test_application_base(app, _, args)
@parameterized.parameters(*MODEL_LIST)
def test_application_with_rescaling(self, app, last_dim, args):
super()._test_application_with_rescaling(app, last_dim, args)
@parameterized.parameters(*MODEL_LIST)
def test_application_pooling(self, app, last_dim, args):
super()._test_application_pooling(app, last_dim, args)
@parameterized.parameters(*MODEL_LIST)
def test_application_variable_input_channels(self, app, last_dim, args):
super()._test_application_variable_input_channels(app, last_dim, args)
@parameterized.parameters(*MODEL_LIST)
def test_model_can_be_used_as_backbone(self, app, last_dim, args):
super()._test_model_can_be_used_as_backbone(app, last_dim, args)
if __name__ == "__main__":
tf.test.main()
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
import tensorflow as tf
from keras.utils import data_utils
def parse_weights(weights, include_top, model_type):
if not weights or tf.io.gfile.exists(weights):
return weights
if weights in ALIASES[model_type]:
weights = ALIASES[model_type][weights]
if weights in WEIGHTS_CONFIG[model_type]:
if not include_top:
weights = weights + "-notop"
return data_utils.get_file(
origin=f"{BASE_PATH}/{model_type}/{weights}.h5",
cache_subdir="models",
file_hash=WEIGHTS_CONFIG[model_type][weights],
)
raise ValueError(
"The `weights` argument should be either `None`, a the path to the "
"weights file to be loaded, or the name of pre-trained weights from "
"https://github.com/keras-team/keras-cv/blob/master/keras_cv/models/weights.py. "
f"Invalid `weights` argument: {weights}"
)
BASE_PATH = "https://storage.googleapis.com/keras-cv/models"
ALIASES = {
"densenet121": {
"imagenet": "imagenet/classification-v0",
"imagenet/classification": "imagenet/classification-v0",
},
"densenet169": {
"imagenet": "imagenet/classification-v0",
"imagenet/classification": "imagenet/classification-v0",
},
"densenet201": {
"imagenet": "imagenet/classification-v0",
"imagenet/classification": "imagenet/classification-v0",
},
"efficientnetv2b0": {
"imagenet": "imagenet/classification-v0",
"imagenet/classification": "imagenet/classification-v0",
},
"efficientnetv2b1": {
"imagenet": "imagenet/classification-v0",
"imagenet/classification": "imagenet/classification-v0",
},
"efficientnetv2b2": {
"imagenet": "imagenet/classification-v0",
"imagenet/classification": "imagenet/classification-v0",
},
"resnet50v2": {
"imagenet": "imagenet/classification-v2",
"imagenet/classification": "imagenet/classification-v2",
},
}
WEIGHTS_CONFIG = {
"densenet121": {
"imagenet/classification-v0": "13de3d077ad9d9816b9a0acc78215201d9b6e216c7ed8e71d69cc914f8f0775b",
"imagenet/classification-v0-notop": "709afe0321d9f2b2562e562ff9d0dc44cca10ed09e0e2cfba08d783ff4dab6bf",
},
"densenet169": {
"imagenet/classification-v0": "4cd2a661d0cb2378574073b23129ee4d06ea53c895c62a8863c44ee039e236a1",
"imagenet/classification-v0-notop": "a99d1bb2cbe1a59a1cdd1f435fb265453a97c2a7b723d26f4ebee96e5fb49d62",
},
"densenet201": {
"imagenet/classification-v0": "3b6032e744e5e5babf7457abceaaba11fcd449fe2d07016ae5076ac3c3c6cf0c",
"imagenet/classification-v0-notop": "c1189a934f12c1a676a9cf52238e5994401af925e2adfc0365bad8133c052060",
},
"efficientnetv2b0": {
"imagenet/classification-v0": "da7975b6d4200dfdc3f859b0d028774e5e5dd4031d3e998a27dadc492dec4f3e",
"imagenet/classification-v0-notop": "defe635bfa3cc3f2b9e89bfd53bbc3de28a1dc67026b4437a14f44476e7d0549",
},
"efficientnetv2b1": {
"imagenet/classification-v0": "3f92fc9d7b141ec9e85ffe60d301fb49103ba17b148bdd638971a77f1b8db010",
"imagenet/classification-v0-notop": "359aaa5c1e863c8438d94052791e72ef29345d07703d06284e1069829f85932f",
},
"efficientnetv2b2": {
"imagenet/classification-v0": "1667d21b50e6c5b851a69c98503fa5ae707b82dbae8c900fe59ab1a93d60d694",
"imagenet/classification-v0-notop": "e118aadfab7e93ff939fb81c88c189cbd7fb2b7ddd7314fbf2badb7c551aa119",
},
"resnet50v2": {
"imagenet/classification-v0": "11bde945b54d1dca65101be2648048abca8a96a51a42820d87403486389790db",
"imagenet/classification-v0-notop": "5b4aca4932c433d84f6aef58135472a4312ed2fa565d53fedcd6b0c24b54ab4a",
"imagenet/classification-v1": "a32e5d9998e061527f6f947f36d8e794ad54dad71edcd8921cda7804912f3ee7",
"imagenet/classification-v1-notop": "ac46b82c11070ab2f69673c41fbe5039c9eb686cca4f34cd1d79412fd136f1ae",
"imagenet/classification-v2": "5ee5a8ac650aaa59342bc48ffe770e6797a5550bcc35961e1d06685292c15921",
"imagenet/classification-v2-notop": "e711c83d6db7034871f6d345a476c8184eab99dbf3ffcec0c1d8445684890ad9",
},
}
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from keras_cv.ops.iou_3d import IoU3D
from keras_cv.ops.point_cloud import _box_area
from keras_cv.ops.point_cloud import _center_xyzWHD_to_corner_xyz
from keras_cv.ops.point_cloud import _is_on_lefthand_side
from keras_cv.ops.point_cloud import coordinate_transform
from keras_cv.ops.point_cloud import is_within_box2d
from keras_cv.ops.point_cloud import is_within_box3d
from keras_cv.ops.point_cloud import spherical_coordinate_transform
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment