# Copyright 2022 The KerasCV Authors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """DarkNet models for KerasCV. Reference: - [YoloV3 Paper](https://arxiv.org/abs/1804.02767) - [YoloV3 implementation](https://github.com/ultralytics/yolov3) """ import tensorflow as tf from tensorflow import keras from tensorflow.keras import layers from keras_cv.models import utils from keras_cv.models.__internal__.darknet_utils import DarknetConvBlock from keras_cv.models.__internal__.darknet_utils import ResidualBlocks from keras_cv.models.__internal__.darknet_utils import SpatialPyramidPoolingBottleneck BASE_DOCSTRING = """Instantiates the {name} architecture. Although the {name} architecture is commonly used for detection tasks, it is possible to extract the intermediate dark2 to dark5 layers from the model for creating a feature pyramid Network. Reference: - [YoloV3 Paper](https://arxiv.org/abs/1804.02767) - [YoloV3 implementation](https://github.com/ultralytics/yolov3) For transfer learning use cases, make sure to read the [guide to transfer learning & fine-tuning]( https://keras.io/guides/transfer_learning/). Args: include_rescaling: whether or not to Rescale the inputs.If set to True, inputs will be passed through a `Rescaling(1/255.0)` layer. include_top: whether to include the fully-connected layer at the top of the network. If provided, `classes` must be provided. classes: optional number of classes to classify images into, only to be specified if `include_top` is True. weights: one of `None` (random initialization), or a pretrained weight file path. input_shape: optional shape tuple, defaults to (None, None, 3). input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. pooling: optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional block. - `avg` means that global average pooling will be applied to the output of the last convolutional block, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. name: (Optional) name to pass to the model. Defaults to "{name}". Returns: A `keras.Model` instance. """ def DarkNet( blocks, include_rescaling, include_top, classes=None, weights=None, input_shape=(None, None, 3), input_tensor=None, pooling=None, classifier_activation="softmax", name=None, **kwargs, ): """Instantiates the DarkNet architecture. Although the DarkNet architecture is commonly used for detection tasks, it is possible to extract the intermediate dark2 to dark5 layers from the model for creating a feature pyramid Network. Reference: - [YoloV3 Paper](https://arxiv.org/abs/1804.02767) - [YoloV3 implementation](https://github.com/ultralytics/yolov3) For transfer learning use cases, make sure to read the [guide to transfer learning & fine-tuning]( https://keras.io/guides/transfer_learning/). Args: blocks: numbers of building blocks from the layer dark2 to layer dark5. include_rescaling: whether or not to Rescale the inputs.If set to True, inputs will be passed through a `Rescaling(1/255.0)` layer. include_top: whether to include the fully-connected layer at the top of the network. If provided, `classes` must be provided. classes: optional number of classes to classify imagesinto, only to be specified if `include_top` is True. weights: one of `None` (random initialization), or a pretrained weight file path. input_shape: optional shape tuple, defaults to (None, None, 3). input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. pooling: optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional block. - `avg` means that global average pooling will be applied to the output of the last convolutional block, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classifier_activation: A `str` or callable. The activation function to use on the "top" layer. Ignored unless `include_top=True`. Set `classifier_activation=None` to return the logits of the "top" layer. name: (Optional) name to pass to the model. Defaults to "DarkNet". Returns: A `keras.Model` instance. """ if weights and not tf.io.gfile.exists(weights): raise ValueError( "The `weights` argument should be either `None` or the path to the " f"weights file to be loaded. Weights file not found at location: {weights}" ) if include_top and not classes: raise ValueError( "If `include_top` is True, you should specify `classes`. Received: " f"classes={classes}" ) inputs = utils.parse_model_inputs(input_shape, input_tensor) x = inputs if include_rescaling: x = layers.Rescaling(1 / 255.0)(x) # stem x = DarknetConvBlock( filters=32, kernel_size=3, strides=1, activation="leaky_relu", name="stem_conv" )(x) x = ResidualBlocks(filters=64, num_blocks=1, name="stem_residual_block")(x) # filters for the ResidualBlock outputs filters = [128, 256, 512, 1024] # layer_num is used for naming the residual blocks (starts with dark2, hence 2) layer_num = 2 for filter, block in zip(filters, blocks): x = ResidualBlocks( filters=filter, num_blocks=block, name=f"dark{layer_num}_residual_block" )(x) layer_num += 1 # remaining dark5 layers x = DarknetConvBlock( filters=512, kernel_size=1, strides=1, activation="leaky_relu", name="dark5_conv1", )(x) x = DarknetConvBlock( filters=1024, kernel_size=3, strides=1, activation="leaky_relu", name="dark5_conv2", )(x) x = SpatialPyramidPoolingBottleneck(512, activation="leaky_relu", name="dark5_spp")( x ) x = DarknetConvBlock( filters=1024, kernel_size=3, strides=1, activation="leaky_relu", name="dark5_conv3", )(x) x = DarknetConvBlock( filters=512, kernel_size=1, strides=1, activation="leaky_relu", name="dark5_conv4", )(x) if include_top: x = layers.GlobalAveragePooling2D(name="avg_pool")(x) x = layers.Dense(classes, activation=classifier_activation, name="predictions")( x ) elif pooling == "avg": x = layers.GlobalAveragePooling2D(name="avg_pool")(x) elif pooling == "max": x = layers.GlobalMaxPooling2D(name="max_pool")(x) model = keras.Model(inputs, x, name=name, **kwargs) if weights is not None: model.load_weights(weights) return model def DarkNet21( include_rescaling, include_top, classes=None, weights=None, input_shape=(None, None, 3), input_tensor=None, pooling=None, name="DarkNet21", **kwargs, ): return DarkNet( [1, 2, 2, 1], include_rescaling=include_rescaling, include_top=include_top, classes=classes, weights=weights, input_shape=input_shape, input_tensor=input_tensor, pooling=pooling, name=name, **kwargs, ) def DarkNet53( include_rescaling, include_top, classes=None, weights=None, input_shape=(None, None, 3), input_tensor=None, pooling=None, name="DarkNet53", **kwargs, ): return DarkNet( [2, 8, 8, 4], include_rescaling=include_rescaling, include_top=include_top, classes=classes, weights=weights, input_shape=input_shape, input_tensor=input_tensor, pooling=pooling, name=name, **kwargs, ) setattr(DarkNet21, "__doc__", BASE_DOCSTRING.format(name="DarkNet21")) setattr(DarkNet53, "__doc__", BASE_DOCSTRING.format(name="DarkNet53"))