Initial commit

8de66223 · maming · 8de66223 · 8de66223 · 8de66223 · 8de66223
Commit 8de66223 authored Feb 04, 2026 by maming
20 changed files
--- a/code/keras_contrib/activations/squash.py
+++ b/code/keras_contrib/activations/squash.py
+from keras import backend as K
+
+
+def squash(x, axis=-1):
+    """
+    Squash activation function (generally used in Capsule layers).
+    """
+    s_squared_norm = K.sum(K.square(x), axis, keepdims=True) + K.epsilon()
+    scale = K.sqrt(s_squared_norm) / (0.5 + s_squared_norm)
+    return scale * x
--- a/code/keras_contrib/applications/__init__.py
+++ b/code/keras_contrib/applications/__init__.py
+from .densenet import DenseNet
+from .resnet import ResNet, ResNet18, ResNet34, ResNet50, ResNet101, ResNet152
+from .wide_resnet import WideResidualNetwork
+from .nasnet import NASNet, NASNetLarge, NASNetMobile
--- a/code/keras_contrib/applications/densenet.py
+++ b/code/keras_contrib/applications/densenet.py
--- a/code/keras_contrib/applications/nasnet.py
+++ b/code/keras_contrib/applications/nasnet.py
--- a/code/keras_contrib/applications/resnet.py
+++ b/code/keras_contrib/applications/resnet.py
+"""ResNet v1, v2, and segmentation models for Keras.
+
+# Reference
+
+- [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385)
+- [Identity Mappings in Deep Residual Networks](https://arxiv.org/abs/1603.05027)
+
+Reference material for extended functionality:
+
+- [ResNeXt](https://arxiv.org/abs/1611.05431) for Tiny ImageNet support.
+- [Dilated Residual Networks](https://arxiv.org/pdf/1705.09914) for segmentation support
+- [Deep Residual Learning for Instrument Segmentation in
+   Robotic Surgery](https://arxiv.org/abs/1703.08580)
+  for segmentation support.
+
+Implementation Adapted from: github.com/raghakot/keras-resnet
+"""  # pylint: disable=E501
+from __future__ import division
+
+import six
+from keras.models import Model
+from keras.layers import Input
+from keras.layers import Activation
+from keras.layers import Reshape
+from keras.layers import Dense
+from keras.layers import Conv2D
+from keras.layers import MaxPooling2D
+from keras.layers import GlobalMaxPooling2D
+from keras.layers import GlobalAveragePooling2D
+from keras.layers import Dropout
+from keras.layers.merge import add
+from keras.layers.normalization import BatchNormalization
+from keras.regularizers import l2
+from keras import backend as K
+from keras_applications.imagenet_utils import _obtain_input_shape
+
+
+def _bn_relu(x, bn_name=None, relu_name=None):
+    """Helper to build a BN -> relu block
+    """
+    norm = BatchNormalization(axis=CHANNEL_AXIS, name=bn_name)(x)
+    return Activation("relu", name=relu_name)(norm)
+
+
+def _conv_bn_relu(**conv_params):
+    """Helper to build a conv -> BN -> relu residual unit activation function.
+       This is the original ResNet v1 scheme in https://arxiv.org/abs/1512.03385
+    """
+    filters = conv_params["filters"]
+    kernel_size = conv_params["kernel_size"]
+    strides = conv_params.setdefault("strides", (1, 1))
+    dilation_rate = conv_params.setdefault("dilation_rate", (1, 1))
+    conv_name = conv_params.setdefault("conv_name", None)
+    bn_name = conv_params.setdefault("bn_name", None)
+    relu_name = conv_params.setdefault("relu_name", None)
+    kernel_initializer = conv_params.setdefault("kernel_initializer", "he_normal")
+    padding = conv_params.setdefault("padding", "same")
+    kernel_regularizer = conv_params.setdefault("kernel_regularizer", l2(1.e-4))
+
+    def f(x):
+        x = Conv2D(filters=filters, kernel_size=kernel_size,
+                   strides=strides, padding=padding,
+                   dilation_rate=dilation_rate,
+                   kernel_initializer=kernel_initializer,
+                   kernel_regularizer=kernel_regularizer,
+                   name=conv_name)(x)
+        return _bn_relu(x, bn_name=bn_name, relu_name=relu_name)
+
+    return f
+
+
+def _bn_relu_conv(**conv_params):
+    """Helper to build a BN -> relu -> conv residual unit with full pre-activation
+    function. This is the ResNet v2 scheme proposed in
+    http://arxiv.org/pdf/1603.05027v2.pdf
+    """
+    filters = conv_params["filters"]
+    kernel_size = conv_params["kernel_size"]
+    strides = conv_params.setdefault("strides", (1, 1))
+    dilation_rate = conv_params.setdefault("dilation_rate", (1, 1))
+    conv_name = conv_params.setdefault("conv_name", None)
+    bn_name = conv_params.setdefault("bn_name", None)
+    relu_name = conv_params.setdefault("relu_name", None)
+    kernel_initializer = conv_params.setdefault("kernel_initializer", "he_normal")
+    padding = conv_params.setdefault("padding", "same")
+    kernel_regularizer = conv_params.setdefault("kernel_regularizer", l2(1.e-4))
+
+    def f(x):
+        activation = _bn_relu(x, bn_name=bn_name, relu_name=relu_name)
+        return Conv2D(filters=filters, kernel_size=kernel_size,
+                      strides=strides, padding=padding,
+                      dilation_rate=dilation_rate,
+                      kernel_initializer=kernel_initializer,
+                      kernel_regularizer=kernel_regularizer,
+                      name=conv_name)(activation)
+
+    return f
+
+
+def _shortcut(input_feature, residual, conv_name_base=None, bn_name_base=None):
+    """Adds a shortcut between input and residual block and merges them with "sum"
+    """
+    # Expand channels of shortcut to match residual.
+    # Stride appropriately to match residual (width, height)
+    # Should be int if network architecture is correctly configured.
+    input_shape = K.int_shape(input_feature)
+    residual_shape = K.int_shape(residual)
+    stride_width = int(round(input_shape[ROW_AXIS] / residual_shape[ROW_AXIS]))
+    stride_height = int(round(input_shape[COL_AXIS] / residual_shape[COL_AXIS]))
+    equal_channels = input_shape[CHANNEL_AXIS] == residual_shape[CHANNEL_AXIS]
+
+    shortcut = input_feature
+    # 1 X 1 conv if shape is different. Else identity.
+    if stride_width > 1 or stride_height > 1 or not equal_channels:
+        print('reshaping via a convolution...')
+        if conv_name_base is not None:
+            conv_name_base = conv_name_base + '1'
+        shortcut = Conv2D(filters=residual_shape[CHANNEL_AXIS],
+                          kernel_size=(1, 1),
+                          strides=(stride_width, stride_height),
+                          padding="valid",
+                          kernel_initializer="he_normal",
+                          kernel_regularizer=l2(0.0001),
+                          name=conv_name_base)(input_feature)
+        if bn_name_base is not None:
+            bn_name_base = bn_name_base + '1'
+        shortcut = BatchNormalization(axis=CHANNEL_AXIS,
+                                      name=bn_name_base)(shortcut)
+
+    return add([shortcut, residual])
+
+
+def _residual_block(block_function, filters, blocks, stage,
+                    transition_strides=None, transition_dilation_rates=None,
+                    dilation_rates=None, is_first_layer=False, dropout=None,
+                    residual_unit=_bn_relu_conv):
+    """Builds a residual block with repeating bottleneck blocks.
+
+       stage: integer, current stage label, used for generating layer names
+       blocks: number of blocks 'a','b'..., current block label, used for generating
+            layer names
+       transition_strides: a list of tuples for the strides of each transition
+       transition_dilation_rates: a list of tuples for the dilation rate of each
+            transition
+    """
+    if transition_dilation_rates is None:
+        transition_dilation_rates = [(1, 1)] * blocks
+    if transition_strides is None:
+        transition_strides = [(1, 1)] * blocks
+    if dilation_rates is None:
+        dilation_rates = [1] * blocks
+
+    def f(x):
+        for i in range(blocks):
+            is_first_block = is_first_layer and i == 0
+            x = block_function(filters=filters, stage=stage, block=i,
+                               transition_strides=transition_strides[i],
+                               dilation_rate=dilation_rates[i],
+                               is_first_block_of_first_layer=is_first_block,
+                               dropout=dropout,
+                               residual_unit=residual_unit)(x)
+        return x
+
+    return f
+
+
+def _block_name_base(stage, block):
+    """Get the convolution name base and batch normalization name base defined by
+    stage and block.
+
+    If there are less than 26 blocks they will be labeled 'a', 'b', 'c' to match the
+    paper and keras and beyond 26 blocks they will simply be numbered.
+    """
+    if block < 27:
+        block = '%c' % (block + 97)  # 97 is the ascii number for lowercase 'a'
+    conv_name_base = 'res' + str(stage) + block + '_branch'
+    bn_name_base = 'bn' + str(stage) + block + '_branch'
+    return conv_name_base, bn_name_base
+
+
+def basic_block(filters, stage, block, transition_strides=(1, 1),
+                dilation_rate=(1, 1), is_first_block_of_first_layer=False, dropout=None,
+                residual_unit=_bn_relu_conv):
+    """Basic 3 X 3 convolution blocks for use on resnets with layers <= 34.
+    Follows improved proposed scheme in http://arxiv.org/pdf/1603.05027v2.pdf
+    """
+    def f(input_features):
+        conv_name_base, bn_name_base = _block_name_base(stage, block)
+        if is_first_block_of_first_layer:
+            # don't repeat bn->relu since we just did bn->relu->maxpool
+            x = Conv2D(filters=filters, kernel_size=(3, 3),
+                       strides=transition_strides,
+                       dilation_rate=dilation_rate,
+                       padding="same",
+                       kernel_initializer="he_normal",
+                       kernel_regularizer=l2(1e-4),
+                       name=conv_name_base + '2a')(input_features)
+        else:
+            x = residual_unit(filters=filters, kernel_size=(3, 3),
+                              strides=transition_strides,
+                              dilation_rate=dilation_rate,
+                              conv_name_base=conv_name_base + '2a',
+                              bn_name_base=bn_name_base + '2a')(input_features)
+
+        if dropout is not None:
+            x = Dropout(dropout)(x)
+
+        x = residual_unit(filters=filters, kernel_size=(3, 3),
+                          conv_name_base=conv_name_base + '2b',
+                          bn_name_base=bn_name_base + '2b')(x)
+
+        return _shortcut(input_features, x)
+
+    return f
+
+
+def bottleneck(filters, stage, block, transition_strides=(1, 1),
+               dilation_rate=(1, 1), is_first_block_of_first_layer=False, dropout=None,
+               residual_unit=_bn_relu_conv):
+    """Bottleneck architecture for > 34 layer resnet.
+    Follows improved proposed scheme in http://arxiv.org/pdf/1603.05027v2.pdf
+
+    Returns:
+        A final conv layer of filters * 4
+    """
+    def f(input_feature):
+        conv_name_base, bn_name_base = _block_name_base(stage, block)
+        if is_first_block_of_first_layer:
+            # don't repeat bn->relu since we just did bn->relu->maxpool
+            x = Conv2D(filters=filters, kernel_size=(1, 1),
+                       strides=transition_strides,
+                       dilation_rate=dilation_rate,
+                       padding="same",
+                       kernel_initializer="he_normal",
+                       kernel_regularizer=l2(1e-4),
+                       name=conv_name_base + '2a')(input_feature)
+        else:
+            x = residual_unit(filters=filters, kernel_size=(1, 1),
+                              strides=transition_strides,
+                              dilation_rate=dilation_rate,
+                              conv_name_base=conv_name_base + '2a',
+                              bn_name_base=bn_name_base + '2a')(input_feature)
+
+        if dropout is not None:
+            x = Dropout(dropout)(x)
+
+        x = residual_unit(filters=filters, kernel_size=(3, 3),
+                          conv_name_base=conv_name_base + '2b',
+                          bn_name_base=bn_name_base + '2b')(x)
+
+        if dropout is not None:
+            x = Dropout(dropout)(x)
+
+        x = residual_unit(filters=filters * 4, kernel_size=(1, 1),
+                          conv_name_base=conv_name_base + '2c',
+                          bn_name_base=bn_name_base + '2c')(x)
+
+        return _shortcut(input_feature, x)
+
+    return f
+
+
+def _handle_dim_ordering():
+    global ROW_AXIS
+    global COL_AXIS
+    global CHANNEL_AXIS
+    if K.image_data_format() == 'channels_last':
+        ROW_AXIS = 1
+        COL_AXIS = 2
+        CHANNEL_AXIS = 3
+    else:
+        CHANNEL_AXIS = 1
+        ROW_AXIS = 2
+        COL_AXIS = 3
+
+
+def _string_to_function(identifier):
+    if isinstance(identifier, six.string_types):
+        res = globals().get(identifier)
+        if not res:
+            raise ValueError('Invalid {}'.format(identifier))
+        return res
+    return identifier
+
+
+def ResNet(input_shape=None, classes=10, block='bottleneck', residual_unit='v2',
+           repetitions=None, initial_filters=64, activation='softmax', include_top=True,
+           input_tensor=None, dropout=None, transition_dilation_rate=(1, 1),
+           initial_strides=(2, 2), initial_kernel_size=(7, 7), initial_pooling='max',
+           final_pooling=None, top='classification'):
+    """Builds a custom ResNet like architecture. Defaults to ResNet50 v2.
+
+    Args:
+        input_shape: optional shape tuple, only to be specified
+            if `include_top` is False (otherwise the input shape
+            has to be `(224, 224, 3)` (with `channels_last` dim ordering)
+            or `(3, 224, 224)` (with `channels_first` dim ordering).
+            It should have exactly 3 dimensions,
+            and width and height should be no smaller than 8.
+            E.g. `(224, 224, 3)` would be one valid value.
+        classes: The number of outputs at final softmax layer
+        block: The block function to use. This is either `'basic'` or `'bottleneck'`.
+            The original paper used `basic` for layers < 50.
+        repetitions: Number of repetitions of various block units.
+            At each block unit, the number of filters are doubled and the input size
+            is halved. Default of None implies the ResNet50v2 values of [3, 4, 6, 3].
+        residual_unit: the basic residual unit, 'v1' for conv bn relu, 'v2' for bn relu
+            conv. See [Identity Mappings in
+            Deep Residual Networks](https://arxiv.org/abs/1603.05027)
+            for details.
+        dropout: None for no dropout, otherwise rate of dropout from 0 to 1.
+            Based on [Wide Residual Networks.(https://arxiv.org/pdf/1605.07146) paper.
+        transition_dilation_rate: Dilation rate for transition layers. For semantic
+            segmentation of images use a dilation rate of (2, 2).
+        initial_strides: Stride of the very first residual unit and MaxPooling2D call,
+            with default (2, 2), set to (1, 1) for small images like cifar.
+        initial_kernel_size: kernel size of the very first convolution, (7, 7) for
+            imagenet and (3, 3) for small image datasets like tiny imagenet and cifar.
+            See [ResNeXt](https://arxiv.org/abs/1611.05431) paper for details.
+        initial_pooling: Determine if there will be an initial pooling layer,
+            'max' for imagenet and None for small image datasets.
+            See [ResNeXt](https://arxiv.org/abs/1611.05431) paper for details.
+        final_pooling: Optional pooling mode for feature extraction at the final
+            model layer when `include_top` is `False`.
+            - `None` means that the output of the model
+                will be the 4D tensor output of the
+                last convolutional layer.
+            - `avg` means that global average pooling
+                will be applied to the output of the
+                last convolutional layer, and thus
+                the output of the model will be a
+                2D tensor.
+            - `max` means that global max pooling will
+                be applied.
+        top: Defines final layers to evaluate based on a specific problem type. Options
+            are 'classification' for ImageNet style problems, 'segmentation' for
+            problems like the Pascal VOC dataset, and None to exclude these layers
+            entirely.
+
+    Returns:
+        The keras `Model`.
+    """
+    if activation not in ['softmax', 'sigmoid', None]:
+        raise ValueError('activation must be one of "softmax", "sigmoid", or None')
+    if activation == 'sigmoid' and classes != 1:
+        raise ValueError('sigmoid activation can only be used when classes = 1')
+    if repetitions is None:
+        repetitions = [3, 4, 6, 3]
+    # Determine proper input shape
+    input_shape = _obtain_input_shape(input_shape,
+                                      default_size=32,
+                                      min_size=8,
+                                      data_format=K.image_data_format(),
+                                      require_flatten=include_top)
+    _handle_dim_ordering()
+    if len(input_shape) != 3:
+        raise Exception("Input shape should be a tuple (nb_channels, nb_rows, nb_cols)")
+
+    if block == 'basic':
+        block_fn = basic_block
+    elif block == 'bottleneck':
+        block_fn = bottleneck
+    elif isinstance(block, six.string_types):
+        block_fn = _string_to_function(block)
+    else:
+        block_fn = block
+
+    if residual_unit == 'v2':
+        residual_unit = _bn_relu_conv
+    elif residual_unit == 'v1':
+        residual_unit = _conv_bn_relu
+    elif isinstance(residual_unit, six.string_types):
+        residual_unit = _string_to_function(residual_unit)
+    else:
+        residual_unit = residual_unit
+
+    # Permute dimension order if necessary
+    if K.image_data_format() == 'channels_first':
+        input_shape = (input_shape[1], input_shape[2], input_shape[0])
+    # Determine proper input shape
+    input_shape = _obtain_input_shape(input_shape,
+                                      default_size=32,
+                                      min_size=8,
+                                      data_format=K.image_data_format(),
+                                      require_flatten=include_top)
+
+    img_input = Input(shape=input_shape, tensor=input_tensor)
+    x = _conv_bn_relu(filters=initial_filters, kernel_size=initial_kernel_size,
+                      strides=initial_strides)(img_input)
+    if initial_pooling == 'max':
+        x = MaxPooling2D(pool_size=(3, 3), strides=initial_strides, padding="same")(x)
+
+    block = x
+    filters = initial_filters
+    for i, r in enumerate(repetitions):
+        transition_dilation_rates = [transition_dilation_rate] * r
+        transition_strides = [(1, 1)] * r
+        if transition_dilation_rate == (1, 1):
+            transition_strides[0] = (2, 2)
+        block = _residual_block(block_fn, filters=filters,
+                                stage=i, blocks=r,
+                                is_first_layer=(i == 0),
+                                dropout=dropout,
+                                transition_dilation_rates=transition_dilation_rates,
+                                transition_strides=transition_strides,
+                                residual_unit=residual_unit)(block)
+        filters *= 2
+
+    # Last activation
+    x = _bn_relu(block)
+
+    # Classifier block
+    if include_top and top is 'classification':
+        x = GlobalAveragePooling2D()(x)
+        x = Dense(units=classes, activation=activation,
+                  kernel_initializer="he_normal")(x)
+    elif include_top and top is 'segmentation':
+        x = Conv2D(classes, (1, 1), activation='linear', padding='same')(x)
+
+        if K.image_data_format() == 'channels_first':
+            channel, row, col = input_shape
+        else:
+            row, col, channel = input_shape
+
+        x = Reshape((row * col, classes))(x)
+        x = Activation(activation)(x)
+        x = Reshape((row, col, classes))(x)
+    elif final_pooling == 'avg':
+        x = GlobalAveragePooling2D()(x)
+    elif final_pooling == 'max':
+        x = GlobalMaxPooling2D()(x)
+
+    model = Model(inputs=img_input, outputs=x)
+    return model
+
+
+def ResNet18(input_shape, classes):
+    """ResNet with 18 layers and v2 residual units
+    """
+    return ResNet(input_shape, classes, basic_block, repetitions=[2, 2, 2, 2])
+
+
+def ResNet34(input_shape, classes):
+    """ResNet with 34 layers and v2 residual units
+    """
+    return ResNet(input_shape, classes, basic_block, repetitions=[3, 4, 6, 3])
+
+
+def ResNet50(input_shape, classes):
+    """ResNet with 50 layers and v2 residual units
+    """
+    return ResNet(input_shape, classes, bottleneck, repetitions=[3, 4, 6, 3])
+
+
+def ResNet101(input_shape, classes):
+    """ResNet with 101 layers and v2 residual units
+    """
+    return ResNet(input_shape, classes, bottleneck, repetitions=[3, 4, 23, 3])
+
+
+def ResNet152(input_shape, classes):
+    """ResNet with 152 layers and v2 residual units
+    """
+    return ResNet(input_shape, classes, bottleneck, repetitions=[3, 8, 36, 3])
--- a/code/keras_contrib/applications/wide_resnet.py
+++ b/code/keras_contrib/applications/wide_resnet.py
+# -*- coding: utf-8 -*-
+"""Wide Residual Network models for Keras.
+
+# Reference
+
+- [Wide Residual Networks](https://arxiv.org/abs/1605.07146)
+
+"""
+from __future__ import print_function
+from __future__ import absolute_import
+from __future__ import division
+
+import warnings
+
+from keras.models import Model
+from keras.layers.core import Dense, Dropout, Activation
+from keras.layers.pooling import MaxPooling2D, GlobalAveragePooling2D
+from keras.layers import Input, Conv2D
+from keras.layers.merge import add
+from keras.layers.normalization import BatchNormalization
+from keras.utils.layer_utils import convert_all_kernels_in_model
+from keras.utils.data_utils import get_file
+from keras.engine.topology import get_source_inputs
+from keras_applications.imagenet_utils import _obtain_input_shape
+import keras.backend as K
+
+TH_WEIGHTS_PATH = ('https://github.com/titu1994/Wide-Residual-Networks/'
+                   'releases/download/v1.2/wrn_28_8_th_kernels_th_dim_ordering.h5')
+TF_WEIGHTS_PATH = ('https://github.com/titu1994/Wide-Residual-Networks/'
+                   'releases/download/v1.2/wrn_28_8_tf_kernels_tf_dim_ordering.h5')
+TH_WEIGHTS_PATH_NO_TOP = ('https://github.com/titu1994/Wide-Residual-Networks/releases/'
+                          'download/v1.2/wrn_28_8_th_kernels_th_dim_ordering_no_top.h5')
+TF_WEIGHTS_PATH_NO_TOP = ('https://github.com/titu1994/Wide-Residual-Networks/releases/'
+                          'download/v1.2/wrn_28_8_tf_kernels_tf_dim_ordering_no_top.h5')
+
+
+def WideResidualNetwork(depth=28, width=8, dropout_rate=0.0,
+                        include_top=True, weights='cifar10',
+                        input_tensor=None, input_shape=None,
+                        classes=10, activation='softmax'):
+    """Instantiate the Wide Residual Network architecture,
+        optionally loading weights pre-trained
+        on CIFAR-10. Note that when using TensorFlow,
+        for best performance you should set
+        `image_dim_ordering="tf"` in your Keras config
+        at ~/.keras/keras.json.
+
+        The model and the weights are compatible with both
+        TensorFlow and Theano. The dimension ordering
+        convention used by the model is the one
+        specified in your Keras config file.
+
+        # Arguments
+            depth: number or layers in the DenseNet
+            width: multiplier to the ResNet width (number of filters)
+            dropout_rate: dropout rate
+            include_top: whether to include the fully-connected
+                layer at the top of the network.
+            weights: one of `None` (random initialization) or
+                "cifar10" (pre-training on CIFAR-10)..
+            input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
+                to use as image input for the model.
+            input_shape: optional shape tuple, only to be specified
+                if `include_top` is False (otherwise the input shape
+                has to be `(32, 32, 3)` (with `tf` dim ordering)
+                or `(3, 32, 32)` (with `th` dim ordering).
+                It should have exactly 3 inputs channels,
+                and width and height should be no smaller than 8.
+                E.g. `(200, 200, 3)` would be one valid value.
+            classes: optional number of classes to classify images
+                into, only to be specified if `include_top` is True, and
+                if no `weights` argument is specified.
+
+        # Returns
+            A Keras model instance.
+        """
+
+    if weights not in {'cifar10', None}:
+        raise ValueError('The `weights` argument should be either '
+                         '`None` (random initialization) or `cifar10` '
+                         '(pre-training on CIFAR-10).')
+
+    if weights == 'cifar10' and include_top and classes != 10:
+        raise ValueError('If using `weights` as CIFAR 10 with `include_top`'
+                         ' as true, `classes` should be 10')
+
+    if (depth - 4) % 6 != 0:
+        raise ValueError('Depth of the network must be such that (depth - 4)'
+                         'should be divisible by 6.')
+
+    # Determine proper input shape
+    input_shape = _obtain_input_shape(input_shape,
+                                      default_size=32,
+                                      min_size=8,
+                                      data_format=K.image_dim_ordering(),
+                                      require_flatten=include_top)
+
+    if input_tensor is None:
+        img_input = Input(shape=input_shape)
+    else:
+        if not K.is_keras_tensor(input_tensor):
+            img_input = Input(tensor=input_tensor, shape=input_shape)
+        else:
+            img_input = input_tensor
+
+    x = __create_wide_residual_network(classes, img_input, include_top, depth, width,
+                                       dropout_rate, activation)
+
+    # Ensure that the model takes into account
+    # any potential predecessors of `input_tensor`.
+    if input_tensor is not None:
+        inputs = get_source_inputs(input_tensor)
+    else:
+        inputs = img_input
+    # Create model.
+    model = Model(inputs, x, name='wide-resnet')
+
+    # load weights
+    if weights == 'cifar10':
+        if (depth == 28) and (width == 8) and (dropout_rate == 0.0):
+            # Default parameters match. Weights for this model exist:
+
+            if K.image_dim_ordering() == 'th':
+                if include_top:
+                    h5_file = 'wide_resnet_28_8_th_dim_ordering_th_kernels.h5'
+                    weights_path = get_file(h5_file,
+                                            TH_WEIGHTS_PATH,
+                                            cache_subdir='models')
+                else:
+                    h5_file = 'wide_resnet_28_8_th_dim_ordering_th_kernels_no_top.h5'
+                    weights_path = get_file(h5_file,
+                                            TH_WEIGHTS_PATH_NO_TOP,
+                                            cache_subdir='models')
+
+                model.load_weights(weights_path)
+
+                if K.backend() == 'tensorflow':
+                    warnings.warn('You are using the TensorFlow backend, yet you '
+                                  'are using the Theano '
+                                  'image dimension ordering convention '
+                                  '(`image_dim_ordering="th"`). '
+                                  'For best performance, set '
+                                  '`image_dim_ordering="tf"` in '
+                                  'your Keras config '
+                                  'at ~/.keras/keras.json.')
+                    convert_all_kernels_in_model(model)
+            else:
+                if include_top:
+                    h5_file = 'wide_resnet_28_8_tf_dim_ordering_tf_kernels.h5'
+                    weights_path = get_file(h5_file,
+                                            TF_WEIGHTS_PATH,
+                                            cache_subdir='models')
+                else:
+                    h5_file = 'wide_resnet_28_8_tf_dim_ordering_tf_kernels_no_top.h5'
+                    weights_path = get_file(h5_file,
+                                            TF_WEIGHTS_PATH_NO_TOP,
+                                            cache_subdir='models')
+
+                model.load_weights(weights_path)
+
+                if K.backend() == 'theano':
+                    convert_all_kernels_in_model(model)
+
+    return model
+
+
+def __conv1_block(input):
+    x = Conv2D(16, (3, 3), padding='same')(input)
+
+    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
+
+    x = BatchNormalization(axis=channel_axis)(x)
+    x = Activation('relu')(x)
+    return x
+
+
+def __conv2_block(input, k=1, dropout=0.0):
+    init = input
+
+    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
+
+    # Check if input number of filters is same as 16 * k, else create
+    # convolution2d for this input
+    if K.image_data_format() == 'channels_first':
+        if init._keras_shape[1] != 16 * k:
+            init = Conv2D(16 * k, (1, 1), activation='linear', padding='same')(init)
+    else:
+        if init._keras_shape[-1] != 16 * k:
+            init = Conv2D(16 * k, (1, 1), activation='linear', padding='same')(init)
+
+    x = Conv2D(16 * k, (3, 3), padding='same')(input)
+    x = BatchNormalization(axis=channel_axis)(x)
+    x = Activation('relu')(x)
+
+    if dropout > 0.0:
+        x = Dropout(dropout)(x)
+
+    x = Conv2D(16 * k, (3, 3), padding='same')(x)
+    x = BatchNormalization(axis=channel_axis)(x)
+    x = Activation('relu')(x)
+
+    m = add([init, x])
+    return m
+
+
+def __conv3_block(input, k=1, dropout=0.0):
+    init = input
+
+    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
+
+    # Check if input number of filters is same as 32 * k, else
+    # create convolution2d for this input
+    if K.image_data_format() == 'channels_first':
+        if init._keras_shape[1] != 32 * k:
+            init = Conv2D(32 * k, (1, 1), activation='linear', padding='same')(init)
+    else:
+        if init._keras_shape[-1] != 32 * k:
+            init = Conv2D(32 * k, (1, 1), activation='linear', padding='same')(init)
+
+    x = Conv2D(32 * k, (3, 3), padding='same')(input)
+    x = BatchNormalization(axis=channel_axis)(x)
+    x = Activation('relu')(x)
+
+    if dropout > 0.0:
+        x = Dropout(dropout)(x)
+
+    x = Conv2D(32 * k, (3, 3), padding='same')(x)
+    x = BatchNormalization(axis=channel_axis)(x)
+    x = Activation('relu')(x)
+
+    m = add([init, x])
+    return m
+
+
+def ___conv4_block(input, k=1, dropout=0.0):
+    init = input
+
+    channel_axis = 1 if K.image_dim_ordering() == 'th' else -1
+
+    # Check if input number of filters is same as 64 * k, else
+    # create convolution2d for this input
+    if K.image_dim_ordering() == 'th':
+        if init._keras_shape[1] != 64 * k:
+            init = Conv2D(64 * k, (1, 1), activation='linear', padding='same')(init)
+    else:
+        if init._keras_shape[-1] != 64 * k:
+            init = Conv2D(64 * k, (1, 1), activation='linear', padding='same')(init)
+
+    x = Conv2D(64 * k, (3, 3), padding='same')(input)
+    x = BatchNormalization(axis=channel_axis)(x)
+    x = Activation('relu')(x)
+
+    if dropout > 0.0:
+        x = Dropout(dropout)(x)
+
+    x = Conv2D(64 * k, (3, 3), padding='same')(x)
+    x = BatchNormalization(axis=channel_axis)(x)
+    x = Activation('relu')(x)
+
+    m = add([init, x])
+    return m
+
+
+def __create_wide_residual_network(nb_classes, img_input, include_top, depth=28,
+                                   width=8, dropout=0.0, activation='softmax'):
+    ''' Creates a Wide Residual Network with specified parameters
+
+    Args:
+        nb_classes: Number of output classes
+        img_input: Input tensor or layer
+        include_top: Flag to include the last dense layer
+        depth: Depth of the network. Compute N = (n - 4) / 6.
+               For a depth of 16, n = 16, N = (16 - 4) / 6 = 2
+               For a depth of 28, n = 28, N = (28 - 4) / 6 = 4
+               For a depth of 40, n = 40, N = (40 - 4) / 6 = 6
+        width: Width of the network.
+        dropout: Adds dropout if value is greater than 0.0
+
+    Returns:a Keras Model
+    '''
+
+    N = (depth - 4) // 6
+
+    x = __conv1_block(img_input)
+    nb_conv = 4
+
+    for i in range(N):
+        x = __conv2_block(x, width, dropout)
+        nb_conv += 2
+
+    x = MaxPooling2D((2, 2))(x)
+
+    for i in range(N):
+        x = __conv3_block(x, width, dropout)
+        nb_conv += 2
+
+    x = MaxPooling2D((2, 2))(x)
+
+    for i in range(N):
+        x = ___conv4_block(x, width, dropout)
+        nb_conv += 2
+
+    if include_top:
+        x = GlobalAveragePooling2D()(x)
+        x = Dense(nb_classes, activation=activation)(x)
+
+    return x
--- a/code/keras_contrib/backend/__init__.py
+++ b/code/keras_contrib/backend/__init__.py
+from keras import backend as K
+
+# We import all keras backend functions here,
+# so that files in this repo can import both
+# core and contrib backend functions with a
+# single import statement.
+
+if K.backend() == 'theano':
+    from .theano_backend import *
+elif K.backend() == 'tensorflow':
+    from .tensorflow_backend import *
+elif K.backend() == 'cntk':
+    from .cntk_backend import *
--- a/code/keras_contrib/backend/__pycache__/__init__.cpython-310.pyc
+++ b/code/keras_contrib/backend/__pycache__/__init__.cpython-310.pyc
--- a/code/keras_contrib/backend/__pycache__/tensorflow_backend.cpython-310.pyc
+++ b/code/keras_contrib/backend/__pycache__/tensorflow_backend.cpython-310.pyc
--- a/code/keras_contrib/backend/cntk_backend.py
+++ b/code/keras_contrib/backend/cntk_backend.py
+from keras.backend import cntk_backend as KCN
+
+
+def moments(x, axes, shift=None, keep_dims=False):
+    ''' Calculates and returns the mean and variance of the input '''
+    mean, variant = KCN._moments(x, axes=axes, shift=shift, keep_dims=keep_dims)
+    return mean, variant
--- a/code/keras_contrib/backend/numpy_backend.py
+++ b/code/keras_contrib/backend/numpy_backend.py
+import numpy as np
+from keras import backend as K
+
+
+def extract_image_patches(X, ksizes, strides,
+                          padding='valid',
+                          data_format='channels_first'):
+    raise NotImplementedError
+
+
+def depth_to_space(input, scale, data_format=None):
+    raise NotImplementedError
+
+
+def moments(x, axes, shift=None, keep_dims=False):
+    mean_batch = np.mean(x, axis=tuple(axes), keepdims=keep_dims)
+    var_batch = np.var(x, axis=tuple(axes), keepdims=keep_dims)
+    return mean_batch, var_batch
--- a/code/keras_contrib/backend/tensorflow_backend.py
+++ b/code/keras_contrib/backend/tensorflow_backend.py
+import tensorflow as tf
+
+try:
+    from tensorflow.python.ops import ctc_ops as ctc
+except ImportError:
+    import tensorflow.contrib.ctc as ctc
+import keras.backend as K
+
+py_all = all
+
+
+def _preprocess_conv2d_input(x, data_format):
+    """Transpose and cast the input before the conv2d.
+
+    # Arguments
+        x: input tensor.
+        data_format: string, `"channels_last"` or `"channels_first"`.
+
+    # Returns
+        A tensor.
+    """
+    if K.dtype(x) == 'float64':
+        x = tf.cast(x, 'float32')
+    if data_format == 'channels_first':
+        # TF uses the last dimension as channel dimension,
+        # instead of the 2nd one.
+        # TH input shape: (samples, input_depth, rows, cols)
+        # TF input shape: (samples, rows, cols, input_depth)
+        x = tf.transpose(x, (0, 2, 3, 1))
+    return x
+
+
+def _postprocess_conv2d_output(x, data_format):
+    """Transpose and cast the output from conv2d if needed.
+
+    # Arguments
+        x: A tensor.
+        data_format: string, `"channels_last"` or `"channels_first"`.
+
+    # Returns
+        A tensor.
+    """
+
+    if data_format == 'channels_first':
+        x = tf.transpose(x, (0, 3, 1, 2))
+
+    if K.floatx() == 'float64':
+        x = tf.cast(x, 'float64')
+    return x
+
+
+def _preprocess_padding(padding):
+    """Convert keras' padding to tensorflow's padding.
+
+    # Arguments
+        padding: string, `"same"` or `"valid"`.
+
+    # Returns
+        a string, `"SAME"` or `"VALID"`.
+
+    # Raises
+        ValueError: if `padding` is invalid.
+    """
+    if padding == 'same':
+        padding = 'SAME'
+    elif padding == 'valid':
+        padding = 'VALID'
+    else:
+        raise ValueError('Invalid padding:', padding)
+    return padding
+
+
+def conv2d(x, kernel, strides=(1, 1), padding='valid', data_format='channels_first',
+           image_shape=None, filter_shape=None):
+    """2D convolution.
+
+    # Arguments
+        x: Input tensor
+        kernel: kernel tensor.
+        strides: strides tuple.
+        padding: string, "same" or "valid".
+        data_format: 'channels_first' or 'channels_last'.
+            Whether to use Theano or TensorFlow dimension
+            ordering in inputs/kernels/ouputs.
+        image_shape: Optional, the input tensor shape
+        filter_shape: Optional, the kernel shape.
+
+    # Returns
+        x convolved with the kernel.
+
+    # Raises
+        Exception: In case of invalid border mode or data format.
+    """
+    return K.conv2d(x, kernel, strides, padding, data_format)
+
+
+def extract_image_patches(x, ksizes, ssizes, padding='same',
+                          data_format='channels_last'):
+    """Extract the patches from an image.
+
+    # Arguments
+        x: The input image
+        ksizes: 2-d tuple with the kernel size
+        ssizes: 2-d tuple with the strides size
+        padding: 'same' or 'valid'
+        data_format: 'channels_last' or 'channels_first'
+
+    # Returns
+        The (k_w,k_h) patches extracted
+        TF ==> (batch_size,w,h,k_w,k_h,c)
+        TH ==> (batch_size,w,h,c,k_w,k_h)
+    """
+    kernel = [1, ksizes[0], ksizes[1], 1]
+    strides = [1, ssizes[0], ssizes[1], 1]
+    padding = _preprocess_padding(padding)
+    if data_format == 'channels_first':
+        x = K.permute_dimensions(x, (0, 2, 3, 1))
+    bs_i, w_i, h_i, ch_i = K.int_shape(x)
+    patches = tf.extract_image_patches(x, kernel, strides, [1, 1, 1, 1],
+                                       padding)
+    # Reshaping to fit Theano
+    bs, w, h, ch = K.int_shape(patches)
+    reshaped = tf.reshape(patches, [-1, w, h, tf.floordiv(ch, ch_i), ch_i])
+    final_shape = [-1, w, h, ch_i, ksizes[0], ksizes[1]]
+    patches = tf.reshape(tf.transpose(reshaped, [0, 1, 2, 4, 3]), final_shape)
+    if data_format == 'channels_last':
+        patches = K.permute_dimensions(patches, [0, 1, 2, 4, 5, 3])
+    return patches
+
+
+def depth_to_space(input, scale, data_format=None):
+    """ Uses phase shift algorithm to convert channels/depth for spatial resolution.
+
+    # Arguments
+        input: Input tensor
+        scale: n `int` that is `>= 2`. The size of the spatial block.
+        data_format: 'channels_first' or 'channels_last'.
+            Whether to use Theano or TensorFlow dimension
+            ordering in inputs/kernels/ouputs.
+
+    # Returns
+        TODO (PR welcome): Filling this section.
+    """
+    if data_format is None:
+        data_format = K.image_data_format()
+    data_format = data_format.lower()
+    input = _preprocess_conv2d_input(input, data_format)
+    out = tf.depth_to_space(input, scale)
+    out = _postprocess_conv2d_output(out, data_format)
+    return out
+
+
+def moments(x, axes, shift=None, keep_dims=False):
+    ''' Wrapper over tensorflow backend call '''
+
+    return tf.nn.moments(x, axes, shift=shift, keep_dims=keep_dims)
--- a/code/keras_contrib/backend/theano_backend.py
+++ b/code/keras_contrib/backend/theano_backend.py
+from theano import tensor as T
+from theano.sandbox.neighbours import images2neibs
+
+try:
+    import theano.sparse as th_sparse_module
+except ImportError:
+    th_sparse_module = None
+try:
+    from theano.tensor.nnet.nnet import softsign as T_softsign
+except ImportError:
+    from theano.sandbox.softsign import softsign as T_softsign
+from keras.backend import theano_backend as KTH
+from keras.backend.common import image_data_format
+from keras.backend.theano_backend import _preprocess_conv2d_input
+from keras.backend.theano_backend import _postprocess_conv2d_output
+
+py_all = all
+
+
+def conv2d(x, kernel, strides=(1, 1), padding='valid', data_format='channels_first',
+           image_shape=None, filter_shape=None):
+    '''
+    padding: string, "same" or "valid".
+    '''
+    if data_format not in {'channels_first', 'channels_last'}:
+        raise Exception('Unknown data_format ' + str(data_format))
+
+    if data_format == 'channels_last':
+        # TF uses the last dimension as channel dimension,
+        # instead of the 2nd one.
+        # TH input shape: (samples, input_depth, rows, cols)
+        # TF input shape: (samples, rows, cols, input_depth)
+        # TH kernel shape: (depth, input_depth, rows, cols)
+        # TF kernel shape: (rows, cols, input_depth, depth)
+        x = x.dimshuffle((0, 3, 1, 2))
+        kernel = kernel.dimshuffle((3, 2, 0, 1))
+        if image_shape:
+            image_shape = (image_shape[0], image_shape[3],
+                           image_shape[1], image_shape[2])
+        if filter_shape:
+            filter_shape = (filter_shape[3], filter_shape[2],
+                            filter_shape[0], filter_shape[1])
+
+    if padding == 'same':
+        th_padding = 'half'
+        np_kernel = kernel.eval()
+    elif padding == 'valid':
+        th_padding = 'valid'
+    else:
+        raise Exception('Border mode not supported: ' + str(padding))
+
+    # Theano might not accept long type
+    def int_or_none(value):
+        try:
+            return int(value)
+        except TypeError:
+            return None
+
+    if image_shape is not None:
+        image_shape = tuple(int_or_none(v) for v in image_shape)
+
+    if filter_shape is not None:
+        filter_shape = tuple(int_or_none(v) for v in filter_shape)
+
+    conv_out = T.nnet.conv2d(x, kernel,
+                             border_mode=th_padding,
+                             subsample=strides,
+                             input_shape=image_shape,
+                             filter_shape=filter_shape)
+
+    if padding == 'same':
+        if np_kernel.shape[2] % 2 == 0:
+            end = (x.shape[2] + strides[0] - 1) // strides[0]
+            conv_out = conv_out[:, :, :end, :]
+        if np_kernel.shape[3] % 2 == 0:
+            end = (x.shape[3] + strides[1] - 1) // strides[1]
+            conv_out = conv_out[:, :, :, :end]
+
+    if data_format == 'channels_last':
+        conv_out = conv_out.dimshuffle((0, 2, 3, 1))
+    return conv_out
+
+
+def extract_image_patches(X, ksizes, strides,
+                          padding='valid',
+                          data_format='channels_first'):
+    '''
+    Extract the patches from an image
+    Parameters
+    ----------
+    X : The input image
+    ksizes : 2-d tuple with the kernel size
+    strides : 2-d tuple with the strides size
+    padding : 'same' or 'valid'
+    data_format : 'channels_last' or 'channels_first'
+    Returns
+    -------
+    The (k_w,k_h) patches extracted
+    TF ==> (batch_size,w,h,k_w,k_h,c)
+    TH ==> (batch_size,w,h,c,k_w,k_h)
+    '''
+    patch_size = ksizes[1]
+    if padding == 'same':
+        padding = 'ignore_borders'
+    if data_format == 'channels_last':
+        X = KTH.permute_dimensions(X, [0, 3, 1, 2])
+    # Thanks to https://github.com/awentzonline for the help!
+    batch, c, w, h = KTH.shape(X)
+    xs = KTH.shape(X)
+    num_rows = 1 + (xs[-2] - patch_size) // strides[1]
+    num_cols = 1 + (xs[-1] - patch_size) // strides[1]
+    num_channels = xs[-3]
+    patches = images2neibs(X, ksizes, strides, padding)
+    # Theano is sorting by channel
+    new_shape = (batch, num_channels, num_rows * num_cols, patch_size, patch_size)
+    patches = KTH.reshape(patches, new_shape)
+    patches = KTH.permute_dimensions(patches, (0, 2, 1, 3, 4))
+    # arrange in a 2d-grid (rows, cols, channels, px, py)
+    new_shape = (batch, num_rows, num_cols, num_channels, patch_size, patch_size)
+    patches = KTH.reshape(patches, new_shape)
+    if data_format == 'channels_last':
+        patches = KTH.permute_dimensions(patches, [0, 1, 2, 4, 5, 3])
+    return patches
+
+
+def depth_to_space(input, scale, data_format=None):
+    """Uses phase shift algorithm to convert
+    channels/depth for spatial resolution
+    """
+    if data_format is None:
+        data_format = image_data_format()
+    data_format = data_format.lower()
+    input = _preprocess_conv2d_input(input, data_format)
+
+    b, k, row, col = input.shape
+    out_channels = k // (scale ** 2)
+    x = T.reshape(input, (b, scale, scale, out_channels, row, col))
+    x = T.transpose(x, (0, 3, 4, 1, 5, 2))
+    out = T.reshape(x, (b, out_channels, row * scale, col * scale))
+
+    out = _postprocess_conv2d_output(out, input, None, None, None, data_format)
+    return out
+
+
+def moments(x, axes, shift=None, keep_dims=False):
+    ''' Calculates and returns the mean and variance of the input '''
+
+    mean_batch = KTH.mean(x, axis=axes, keepdims=keep_dims)
+    var_batch = KTH.var(x, axis=axes, keepdims=keep_dims)
+
+    return mean_batch, var_batch
--- a/code/keras_contrib/callbacks/__init__.py
+++ b/code/keras_contrib/callbacks/__init__.py
+from .snapshot import SnapshotCallbackBuilder, SnapshotModelCheckpoint
+from .dead_relu_detector import DeadReluDetector
+from .cyclical_learning_rate import CyclicLR
+from .tensorboard import TensorBoardGrouped
--- a/code/keras_contrib/callbacks/__pycache__/__init__.cpython-310.pyc
+++ b/code/keras_contrib/callbacks/__pycache__/__init__.cpython-310.pyc
--- a/code/keras_contrib/callbacks/__pycache__/cyclical_learning_rate.cpython-310.pyc
+++ b/code/keras_contrib/callbacks/__pycache__/cyclical_learning_rate.cpython-310.pyc
--- a/code/keras_contrib/callbacks/__pycache__/dead_relu_detector.cpython-310.pyc
+++ b/code/keras_contrib/callbacks/__pycache__/dead_relu_detector.cpython-310.pyc
--- a/code/keras_contrib/callbacks/__pycache__/snapshot.cpython-310.pyc
+++ b/code/keras_contrib/callbacks/__pycache__/snapshot.cpython-310.pyc
--- a/code/keras_contrib/callbacks/__pycache__/tensorboard.cpython-310.pyc
+++ b/code/keras_contrib/callbacks/__pycache__/tensorboard.cpython-310.pyc
--- a/code/keras_contrib/callbacks/cyclical_learning_rate.py
+++ b/code/keras_contrib/callbacks/cyclical_learning_rate.py
+from keras.callbacks import Callback
+from keras import backend as K
+import numpy as np
+
+
+class CyclicLR(Callback):
+    """This callback implements a cyclical learning rate policy (CLR).
+    The method cycles the learning rate between two boundaries with
+    some constant frequency.
+    # Arguments
+        base_lr: initial learning rate which is the
+            lower boundary in the cycle.
+        max_lr: upper boundary in the cycle. Functionally,
+            it defines the cycle amplitude (max_lr - base_lr).
+            The lr at any cycle is the sum of base_lr
+            and some scaling of the amplitude; therefore
+            max_lr may not actually be reached depending on
+            scaling function.
+        step_size: number of training iterations per
+            half cycle. Authors suggest setting step_size
+            2-8 x training iterations in epoch.
+        mode: one of {triangular, triangular2, exp_range}.
+            Default 'triangular'.
+            Values correspond to policies detailed above.
+            If scale_fn is not None, this argument is ignored.
+        gamma: constant in 'exp_range' scaling function:
+            gamma**(cycle iterations)
+        scale_fn: Custom scaling policy defined by a single
+            argument lambda function, where
+            0 <= scale_fn(x) <= 1 for all x >= 0.
+            mode paramater is ignored
+        scale_mode: {'cycle', 'iterations'}.
+            Defines whether scale_fn is evaluated on
+            cycle number or cycle iterations (training
+            iterations since start of cycle). Default is 'cycle'.
+
+    The amplitude of the cycle can be scaled on a per-iteration or
+    per-cycle basis.
+    This class has three built-in policies, as put forth in the paper.
+    "triangular":
+        A basic triangular cycle w/ no amplitude scaling.
+    "triangular2":
+        A basic triangular cycle that scales initial amplitude by half each cycle.
+    "exp_range":
+        A cycle that scales initial amplitude by gamma**(cycle iterations) at each
+        cycle iteration.
+    For more detail, please see paper.
+
+    # Example for CIFAR-10 w/ batch size 100:
+        ```python
+            clr = CyclicLR(base_lr=0.001, max_lr=0.006,
+                                step_size=2000., mode='triangular')
+            model.fit(X_train, Y_train, callbacks=[clr])
+        ```
+
+    Class also supports custom scaling functions:
+        ```python
+            clr_fn = lambda x: 0.5*(1+np.sin(x*np.pi/2.))
+            clr = CyclicLR(base_lr=0.001, max_lr=0.006,
+                                step_size=2000., scale_fn=clr_fn,
+                                scale_mode='cycle')
+            model.fit(X_train, Y_train, callbacks=[clr])
+        ```
+
+    # References
+
+      - [Cyclical Learning Rates for Training Neural Networks](
+      https://arxiv.org/abs/1506.01186)
+    """
+
+    def __init__(
+            self,
+            base_lr=0.001,
+            max_lr=0.006,
+            step_size=2000.,
+            mode='triangular',
+            gamma=1.,
+            scale_fn=None,
+            scale_mode='cycle'):
+        super(CyclicLR, self).__init__()
+
+        if mode not in ['triangular', 'triangular2',
+                        'exp_range']:
+            raise KeyError("mode must be one of 'triangular', "
+                           "'triangular2', or 'exp_range'")
+        self.base_lr = base_lr
+        self.max_lr = max_lr
+        self.step_size = step_size
+        self.mode = mode
+        self.gamma = gamma
+        if scale_fn is None:
+            if self.mode == 'triangular':
+                self.scale_fn = lambda x: 1.
+                self.scale_mode = 'cycle'
+            elif self.mode == 'triangular2':
+                self.scale_fn = lambda x: 1 / (2.**(x - 1))
+                self.scale_mode = 'cycle'
+            elif self.mode == 'exp_range':
+                self.scale_fn = lambda x: gamma ** x
+                self.scale_mode = 'iterations'
+        else:
+            self.scale_fn = scale_fn
+            self.scale_mode = scale_mode
+        self.clr_iterations = 0.
+        self.trn_iterations = 0.
+        self.history = {}
+
+        self._reset()
+
+    def _reset(self, new_base_lr=None, new_max_lr=None,
+               new_step_size=None):
+        """Resets cycle iterations.
+        Optional boundary/step size adjustment.
+        """
+        if new_base_lr is not None:
+            self.base_lr = new_base_lr
+        if new_max_lr is not None:
+            self.max_lr = new_max_lr
+        if new_step_size is not None:
+            self.step_size = new_step_size
+        self.clr_iterations = 0.
+
+    def clr(self):
+        cycle = np.floor(1 + self.clr_iterations / (2 * self.step_size))
+        x = np.abs(self.clr_iterations / self.step_size - 2 * cycle + 1)
+        if self.scale_mode == 'cycle':
+            return self.base_lr + (self.max_lr - self.base_lr) * \
+                np.maximum(0, (1 - x)) * self.scale_fn(cycle)
+        else:
+            return self.base_lr + (self.max_lr - self.base_lr) * \
+                np.maximum(0, (1 - x)) * self.scale_fn(self.clr_iterations)
+
+    def on_train_begin(self, logs={}):
+        logs = logs or {}
+
+        if self.clr_iterations == 0:
+            K.set_value(self.model.optimizer.lr, self.base_lr)
+        else:
+            K.set_value(self.model.optimizer.lr, self.clr())
+
+    def on_batch_end(self, epoch, logs=None):
+
+        logs = logs or {}
+        self.trn_iterations += 1
+        self.clr_iterations += 1
+        K.set_value(self.model.optimizer.lr, self.clr())
+
+        self.history.setdefault(
+            'lr', []).append(
+            K.get_value(
+                self.model.optimizer.lr))
+        self.history.setdefault('iterations', []).append(self.trn_iterations)
+
+        for k, v in logs.items():
+            self.history.setdefault(k, []).append(v)
+
+    def on_epoch_end(self, epoch, logs=None):
+        logs = logs or {}
+        logs['lr'] = K.get_value(self.model.optimizer.lr)