Initial commit

8de66223 · maming · 8de66223 · 8de66223 · 8de66223 · 8de66223
Commit 8de66223 authored Feb 04, 2026 by maming
20 changed files
--- a/code/keras_contrib/layers/normalization/__init__.py
+++ b/code/keras_contrib/layers/normalization/__init__.py
--- a/code/keras_contrib/layers/normalization/__pycache__/__init__.cpython-310.pyc
+++ b/code/keras_contrib/layers/normalization/__pycache__/__init__.cpython-310.pyc
--- a/code/keras_contrib/layers/normalization/__pycache__/groupnormalization.cpython-310.pyc
+++ b/code/keras_contrib/layers/normalization/__pycache__/groupnormalization.cpython-310.pyc
--- a/code/keras_contrib/layers/normalization/__pycache__/instancenormalization.cpython-310.pyc
+++ b/code/keras_contrib/layers/normalization/__pycache__/instancenormalization.cpython-310.pyc
--- a/code/keras_contrib/layers/normalization/groupnormalization.py
+++ b/code/keras_contrib/layers/normalization/groupnormalization.py
+from keras.layers import Layer, InputSpec
+from keras import initializers, regularizers, constraints
+from keras import backend as K
+from keras_contrib import backend as KC
+
+
+class GroupNormalization(Layer):
+    """Group normalization layer.
+
+    Group Normalization divides the channels into groups and computes
+    within each group
+    the mean and variance for normalization.
+    Group Normalization's computation is independent
+     of batch sizes, and its accuracy is stable in a wide range of batch sizes.
+
+    Relation to Layer Normalization:
+    If the number of groups is set to 1, then this operation becomes identical to
+    Layer Normalization.
+
+    Relation to Instance Normalization:
+    If the number of groups is set to the
+    input dimension (number of groups is equal
+    to number of channels), then this operation becomes
+    identical to Instance Normalization.
+
+    # Arguments
+        groups: Integer, the number of groups for Group Normalization.
+            Can be in the range [1, N] where N is the input dimension.
+            The input dimension must be divisible by the number of groups.
+        axis: Integer, the axis that should be normalized
+            (typically the features axis).
+            For instance, after a `Conv2D` layer with
+            `data_format="channels_first"`,
+            set `axis=1` in `BatchNormalization`.
+        epsilon: Small float added to variance to avoid dividing by zero.
+        center: If True, add offset of `beta` to normalized tensor.
+            If False, `beta` is ignored.
+        scale: If True, multiply by `gamma`.
+            If False, `gamma` is not used.
+            When the next layer is linear (also e.g. `nn.relu`),
+            this can be disabled since the scaling
+            will be done by the next layer.
+        beta_initializer: Initializer for the beta weight.
+        gamma_initializer: Initializer for the gamma weight.
+        beta_regularizer: Optional regularizer for the beta weight.
+        gamma_regularizer: Optional regularizer for the gamma weight.
+        beta_constraint: Optional constraint for the beta weight.
+        gamma_constraint: Optional constraint for the gamma weight.
+
+    # Input shape
+        Arbitrary. Use the keyword argument `input_shape`
+        (tuple of integers, does not include the samples axis)
+        when using this layer as the first layer in a model.
+
+    # Output shape
+        Same shape as input.
+
+    # References
+        - [Group Normalization](https://arxiv.org/abs/1803.08494)
+    """
+
+    def __init__(self,
+                 groups=32,
+                 axis=-1,
+                 epsilon=1e-5,
+                 center=True,
+                 scale=True,
+                 beta_initializer='zeros',
+                 gamma_initializer='ones',
+                 beta_regularizer=None,
+                 gamma_regularizer=None,
+                 beta_constraint=None,
+                 gamma_constraint=None,
+                 **kwargs):
+        super(GroupNormalization, self).__init__(**kwargs)
+        self.supports_masking = True
+        self.groups = groups
+        self.axis = axis
+        self.epsilon = epsilon
+        self.center = center
+        self.scale = scale
+        self.beta_initializer = initializers.get(beta_initializer)
+        self.gamma_initializer = initializers.get(gamma_initializer)
+        self.beta_regularizer = regularizers.get(beta_regularizer)
+        self.gamma_regularizer = regularizers.get(gamma_regularizer)
+        self.beta_constraint = constraints.get(beta_constraint)
+        self.gamma_constraint = constraints.get(gamma_constraint)
+
+    def build(self, input_shape):
+        dim = input_shape[self.axis]
+
+        if dim is None:
+            raise ValueError('Axis ' + str(self.axis) + ' of '
+                             'input tensor should have a defined dimension '
+                             'but the layer received an input with shape ' +
+                             str(input_shape) + '.')
+
+        if dim < self.groups:
+            raise ValueError('Number of groups (' + str(self.groups) + ') cannot be '
+                             'more than the number of channels (' +
+                             str(dim) + ').')
+
+        if dim % self.groups != 0:
+            raise ValueError('Number of groups (' + str(self.groups) + ') must be a '
+                             'multiple of the number of channels (' +
+                             str(dim) + ').')
+
+        self.input_spec = InputSpec(ndim=len(input_shape),
+                                    axes={self.axis: dim})
+        shape = (dim,)
+
+        if self.scale:
+            self.gamma = self.add_weight(shape=shape,
+                                         name='gamma',
+                                         initializer=self.gamma_initializer,
+                                         regularizer=self.gamma_regularizer,
+                                         constraint=self.gamma_constraint)
+        else:
+            self.gamma = None
+        if self.center:
+            self.beta = self.add_weight(shape=shape,
+                                        name='beta',
+                                        initializer=self.beta_initializer,
+                                        regularizer=self.beta_regularizer,
+                                        constraint=self.beta_constraint)
+        else:
+            self.beta = None
+        self.built = True
+
+    def call(self, inputs, **kwargs):
+        input_shape = K.int_shape(inputs)
+        tensor_input_shape = K.shape(inputs)
+
+        # Prepare broadcasting shape.
+        reduction_axes = list(range(len(input_shape)))
+        del reduction_axes[self.axis]
+        broadcast_shape = [1] * len(input_shape)
+        broadcast_shape[self.axis] = input_shape[self.axis] // self.groups
+        broadcast_shape.insert(1, self.groups)
+
+        reshape_group_shape = K.shape(inputs)
+        group_axes = [reshape_group_shape[i] for i in range(len(input_shape))]
+        group_axes[self.axis] = input_shape[self.axis] // self.groups
+        group_axes.insert(1, self.groups)
+
+        # reshape inputs to new group shape
+        group_shape = [group_axes[0], self.groups] + group_axes[2:]
+        group_shape = K.stack(group_shape)
+        inputs = K.reshape(inputs, group_shape)
+
+        group_reduction_axes = list(range(len(group_axes)))
+        mean, variance = KC.moments(inputs, group_reduction_axes[2:],
+                                    keep_dims=True)
+        inputs = (inputs - mean) / (K.sqrt(variance + self.epsilon))
+
+        # prepare broadcast shape
+        inputs = K.reshape(inputs, group_shape)
+
+        outputs = inputs
+
+        # In this case we must explicitly broadcast all parameters.
+        if self.scale:
+            broadcast_gamma = K.reshape(self.gamma, broadcast_shape)
+            outputs = outputs * broadcast_gamma
+
+        if self.center:
+            broadcast_beta = K.reshape(self.beta, broadcast_shape)
+            outputs = outputs + broadcast_beta
+
+        # finally we reshape the output back to the input shape
+        outputs = K.reshape(outputs, tensor_input_shape)
+
+        return outputs
+
+    def get_config(self):
+        config = {
+            'groups': self.groups,
+            'axis': self.axis,
+            'epsilon': self.epsilon,
+            'center': self.center,
+            'scale': self.scale,
+            'beta_initializer': initializers.serialize(self.beta_initializer),
+            'gamma_initializer': initializers.serialize(self.gamma_initializer),
+            'beta_regularizer': regularizers.serialize(self.beta_regularizer),
+            'gamma_regularizer': regularizers.serialize(self.gamma_regularizer),
+            'beta_constraint': constraints.serialize(self.beta_constraint),
+            'gamma_constraint': constraints.serialize(self.gamma_constraint)
+        }
+        base_config = super(GroupNormalization, self).get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+
+    def compute_output_shape(self, input_shape):
+        return input_shape
--- a/code/keras_contrib/layers/normalization/instancenormalization.py
+++ b/code/keras_contrib/layers/normalization/instancenormalization.py
+from keras.layers import Layer, InputSpec
+from keras import initializers, regularizers, constraints
+from keras import backend as K
+
+
+class InstanceNormalization(Layer):
+    """Instance normalization layer.
+
+    Normalize the activations of the previous layer at each step,
+    i.e. applies a transformation that maintains the mean activation
+    close to 0 and the activation standard deviation close to 1.
+
+    # Arguments
+        axis: Integer, the axis that should be normalized
+            (typically the features axis).
+            For instance, after a `Conv2D` layer with
+            `data_format="channels_first"`,
+            set `axis=1` in `InstanceNormalization`.
+            Setting `axis=None` will normalize all values in each
+            instance of the batch.
+            Axis 0 is the batch dimension. `axis` cannot be set to 0 to avoid errors.
+        epsilon: Small float added to variance to avoid dividing by zero.
+        center: If True, add offset of `beta` to normalized tensor.
+            If False, `beta` is ignored.
+        scale: If True, multiply by `gamma`.
+            If False, `gamma` is not used.
+            When the next layer is linear (also e.g. `nn.relu`),
+            this can be disabled since the scaling
+            will be done by the next layer.
+        beta_initializer: Initializer for the beta weight.
+        gamma_initializer: Initializer for the gamma weight.
+        beta_regularizer: Optional regularizer for the beta weight.
+        gamma_regularizer: Optional regularizer for the gamma weight.
+        beta_constraint: Optional constraint for the beta weight.
+        gamma_constraint: Optional constraint for the gamma weight.
+
+    # Input shape
+        Arbitrary. Use the keyword argument `input_shape`
+        (tuple of integers, does not include the samples axis)
+        when using this layer as the first layer in a Sequential model.
+
+    # Output shape
+        Same shape as input.
+
+    # References
+        - [Layer Normalization](https://arxiv.org/abs/1607.06450)
+        - [Instance Normalization: The Missing Ingredient for Fast Stylization](
+        https://arxiv.org/abs/1607.08022)
+    """
+    def __init__(self,
+                 axis=None,
+                 epsilon=1e-3,
+                 center=True,
+                 scale=True,
+                 beta_initializer='zeros',
+                 gamma_initializer='ones',
+                 beta_regularizer=None,
+                 gamma_regularizer=None,
+                 beta_constraint=None,
+                 gamma_constraint=None,
+                 **kwargs):
+        super(InstanceNormalization, self).__init__(**kwargs)
+        self.supports_masking = True
+        self.axis = axis
+        self.epsilon = epsilon
+        self.center = center
+        self.scale = scale
+        self.beta_initializer = initializers.get(beta_initializer)
+        self.gamma_initializer = initializers.get(gamma_initializer)
+        self.beta_regularizer = regularizers.get(beta_regularizer)
+        self.gamma_regularizer = regularizers.get(gamma_regularizer)
+        self.beta_constraint = constraints.get(beta_constraint)
+        self.gamma_constraint = constraints.get(gamma_constraint)
+
+    def build(self, input_shape):
+        ndim = len(input_shape)
+        if self.axis == 0:
+            raise ValueError('Axis cannot be zero')
+
+        if (self.axis is not None) and (ndim == 2):
+            raise ValueError('Cannot specify axis for rank 1 tensor')
+
+        self.input_spec = InputSpec(ndim=ndim)
+
+        if self.axis is None:
+            shape = (1,)
+        else:
+            shape = (input_shape[self.axis],)
+
+        if self.scale:
+            self.gamma = self.add_weight(shape=shape,
+                                         name='gamma',
+                                         initializer=self.gamma_initializer,
+                                         regularizer=self.gamma_regularizer,
+                                         constraint=self.gamma_constraint)
+        else:
+            self.gamma = None
+        if self.center:
+            self.beta = self.add_weight(shape=shape,
+                                        name='beta',
+                                        initializer=self.beta_initializer,
+                                        regularizer=self.beta_regularizer,
+                                        constraint=self.beta_constraint)
+        else:
+            self.beta = None
+        self.built = True
+
+    def call(self, inputs, training=None):
+        input_shape = K.int_shape(inputs)
+        reduction_axes = list(range(0, len(input_shape)))
+
+        if self.axis is not None:
+            del reduction_axes[self.axis]
+
+        del reduction_axes[0]
+
+        mean = K.mean(inputs, reduction_axes, keepdims=True)
+        stddev = K.std(inputs, reduction_axes, keepdims=True) + self.epsilon
+        normed = (inputs - mean) / stddev
+
+        broadcast_shape = [1] * len(input_shape)
+        if self.axis is not None:
+            broadcast_shape[self.axis] = input_shape[self.axis]
+
+        if self.scale:
+            broadcast_gamma = K.reshape(self.gamma, broadcast_shape)
+            normed = normed * broadcast_gamma
+        if self.center:
+            broadcast_beta = K.reshape(self.beta, broadcast_shape)
+            normed = normed + broadcast_beta
+        return normed
+
+    def get_config(self):
+        config = {
+            'axis': self.axis,
+            'epsilon': self.epsilon,
+            'center': self.center,
+            'scale': self.scale,
+            'beta_initializer': initializers.serialize(self.beta_initializer),
+            'gamma_initializer': initializers.serialize(self.gamma_initializer),
+            'beta_regularizer': regularizers.serialize(self.beta_regularizer),
+            'gamma_regularizer': regularizers.serialize(self.gamma_regularizer),
+            'beta_constraint': constraints.serialize(self.beta_constraint),
+            'gamma_constraint': constraints.serialize(self.gamma_constraint)
+        }
+        base_config = super(InstanceNormalization, self).get_config()
+        return dict(list(base_config.items()) + list(config.items()))
--- a/code/keras_contrib/losses/__init__.py
+++ b/code/keras_contrib/losses/__init__.py
+from .dssim import DSSIMObjective
+from .jaccard import jaccard_distance
+from .crf_losses import crf_loss, crf_nll
--- a/code/keras_contrib/losses/__pycache__/__init__.cpython-310.pyc
+++ b/code/keras_contrib/losses/__pycache__/__init__.cpython-310.pyc
--- a/code/keras_contrib/losses/__pycache__/crf_losses.cpython-310.pyc
+++ b/code/keras_contrib/losses/__pycache__/crf_losses.cpython-310.pyc
--- a/code/keras_contrib/losses/__pycache__/dssim.cpython-310.pyc
+++ b/code/keras_contrib/losses/__pycache__/dssim.cpython-310.pyc
--- a/code/keras_contrib/losses/__pycache__/jaccard.cpython-310.pyc
+++ b/code/keras_contrib/losses/__pycache__/jaccard.cpython-310.pyc
--- a/code/keras_contrib/losses/crf_losses.py
+++ b/code/keras_contrib/losses/crf_losses.py
+from keras import backend as K
+from keras.losses import categorical_crossentropy
+from keras.losses import sparse_categorical_crossentropy
+
+
+def crf_nll(y_true, y_pred):
+    """The negative log-likelihood for linear chain Conditional Random Field (CRF).
+
+    This loss function is only used when the `layers.CRF` layer
+    is trained in the "join" mode.
+
+    # Arguments
+        y_true: tensor with true targets.
+        y_pred: tensor with predicted targets.
+
+    # Returns
+        A scalar representing corresponding to the negative log-likelihood.
+
+    # Raises
+        TypeError: If CRF is not the last layer.
+
+    # About GitHub
+        If you open an issue or a pull request about CRF, please
+        add `cc @lzfelix` to notify Luiz Felix.
+    """
+
+    crf, idx = y_pred._keras_history[:2]
+    if crf._outbound_nodes:
+        raise TypeError('When learn_model="join", CRF must be the last layer.')
+    if crf.sparse_target:
+        y_true = K.one_hot(K.cast(y_true[:, :, 0], 'int32'), crf.units)
+    X = crf._inbound_nodes[idx].input_tensors[0]
+    mask = crf._inbound_nodes[idx].input_masks[0]
+    nloglik = crf.get_negative_log_likelihood(y_true, X, mask)
+    return nloglik
+
+
+def crf_loss(y_true, y_pred):
+    """General CRF loss function depending on the learning mode.
+
+    # Arguments
+        y_true: tensor with true targets.
+        y_pred: tensor with predicted targets.
+
+    # Returns
+        If the CRF layer is being trained in the join mode, returns the negative
+        log-likelihood. Otherwise returns the categorical crossentropy implemented
+        by the underlying Keras backend.
+
+    # About GitHub
+        If you open an issue or a pull request about CRF, please
+        add `cc @lzfelix` to notify Luiz Felix.
+    """
+    crf, idx = y_pred._keras_history[:2]
+    if crf.learn_mode == 'join':
+        return crf_nll(y_true, y_pred)
+    else:
+        if crf.sparse_target:
+            return sparse_categorical_crossentropy(y_true, y_pred)
+        else:
+            return categorical_crossentropy(y_true, y_pred)
--- a/code/keras_contrib/losses/dssim.py
+++ b/code/keras_contrib/losses/dssim.py
+from __future__ import absolute_import
+import keras_contrib.backend as KC
+from keras import backend as K
+
+
+class DSSIMObjective:
+    """Difference of Structural Similarity (DSSIM loss function).
+    Clipped between 0 and 0.5
+
+    Note : You should add a regularization term like a l2 loss in addition to this one.
+    Note : In theano, the `kernel_size` must be a factor of the output size. So 3 could
+           not be the `kernel_size` for an output of 32.
+
+    # Arguments
+        k1: Parameter of the SSIM (default 0.01)
+        k2: Parameter of the SSIM (default 0.03)
+        kernel_size: Size of the sliding window (default 3)
+        max_value: Max value of the output (default 1.0)
+    """
+
+    def __init__(self, k1=0.01, k2=0.03, kernel_size=3, max_value=1.0):
+        self.__name__ = 'DSSIMObjective'
+        self.kernel_size = kernel_size
+        self.k1 = k1
+        self.k2 = k2
+        self.max_value = max_value
+        self.c1 = (self.k1 * self.max_value) ** 2
+        self.c2 = (self.k2 * self.max_value) ** 2
+        self.dim_ordering = K.image_data_format()
+        self.backend = K.backend()
+
+    def __int_shape(self, x):
+        return K.int_shape(x) if self.backend == 'tensorflow' else K.shape(x)
+
+    def __call__(self, y_true, y_pred):
+        # There are additional parameters for this function
+        # Note: some of the 'modes' for edge behavior do not yet have a
+        # gradient definition in the Theano tree
+        #   and cannot be used for learning
+
+        kernel = [self.kernel_size, self.kernel_size]
+        y_true = K.reshape(y_true, [-1] + list(self.__int_shape(y_pred)[1:]))
+        y_pred = K.reshape(y_pred, [-1] + list(self.__int_shape(y_pred)[1:]))
+
+        patches_pred = KC.extract_image_patches(y_pred, kernel, kernel, 'valid',
+                                                self.dim_ordering)
+        patches_true = KC.extract_image_patches(y_true, kernel, kernel, 'valid',
+                                                self.dim_ordering)
+
+        # Reshape to get the var in the cells
+        bs, w, h, c1, c2, c3 = self.__int_shape(patches_pred)
+        patches_pred = K.reshape(patches_pred, [-1, w, h, c1 * c2 * c3])
+        patches_true = K.reshape(patches_true, [-1, w, h, c1 * c2 * c3])
+        # Get mean
+        u_true = K.mean(patches_true, axis=-1)
+        u_pred = K.mean(patches_pred, axis=-1)
+        # Get variance
+        var_true = K.var(patches_true, axis=-1)
+        var_pred = K.var(patches_pred, axis=-1)
+        # Get std dev
+        covar_true_pred = K.mean(patches_true * patches_pred, axis=-1) - u_true * u_pred
+
+        ssim = (2 * u_true * u_pred + self.c1) * (2 * covar_true_pred + self.c2)
+        denom = ((K.square(u_true)
+                  + K.square(u_pred)
+                  + self.c1) * (var_pred + var_true + self.c2))
+        ssim /= denom  # no need for clipping, c1 and c2 make the denom non-zero
+        return K.mean((1.0 - ssim) / 2.0)
--- a/code/keras_contrib/losses/jaccard.py
+++ b/code/keras_contrib/losses/jaccard.py
+from keras import backend as K
+
+
+def jaccard_distance(y_true, y_pred, smooth=100):
+    """Jaccard distance for semantic segmentation.
+
+    Also known as the intersection-over-union loss.
+
+    This loss is useful when you have unbalanced numbers of pixels within an image
+    because it gives all classes equal weight. However, it is not the defacto
+    standard for image segmentation.
+
+    For example, assume you are trying to predict if
+    each pixel is cat, dog, or background.
+    You have 80% background pixels, 10% dog, and 10% cat.
+    If the model predicts 100% background
+    should it be be 80% right (as with categorical cross entropy)
+    or 30% (with this loss)?
+
+    The loss has been modified to have a smooth gradient as it converges on zero.
+    This has been shifted so it converges on 0 and is smoothed to avoid exploding
+    or disappearing gradient.
+
+    Jaccard = (|X & Y|)/ (|X|+ |Y| - |X & Y|)
+            = sum(|A*B|)/(sum(|A|)+sum(|B|)-sum(|A*B|))
+
+    # Arguments
+        y_true: The ground truth tensor.
+        y_pred: The predicted tensor
+        smooth: Smoothing factor. Default is 100.
+
+    # Returns
+        The Jaccard distance between the two tensors.
+
+    # References
+        - [What is a good evaluation measure for semantic segmentation?](
+           http://www.bmva.org/bmvc/2013/Papers/paper0032/paper0032.pdf)
+
+    """
+    intersection = K.sum(K.abs(y_true * y_pred), axis=-1)
+    sum_ = K.sum(K.abs(y_true) + K.abs(y_pred), axis=-1)
+    jac = (intersection + smooth) / (sum_ - intersection + smooth)
+    return (1 - jac) * smooth
--- a/code/keras_contrib/metrics/__init__.py
+++ b/code/keras_contrib/metrics/__init__.py
+from .crf_accuracies import crf_accuracy, crf_marginal_accuracy
+from .crf_accuracies import crf_viterbi_accuracy
--- a/code/keras_contrib/metrics/__pycache__/__init__.cpython-310.pyc
+++ b/code/keras_contrib/metrics/__pycache__/__init__.cpython-310.pyc
--- a/code/keras_contrib/metrics/__pycache__/crf_accuracies.cpython-310.pyc
+++ b/code/keras_contrib/metrics/__pycache__/crf_accuracies.cpython-310.pyc
--- a/code/keras_contrib/metrics/crf_accuracies.py
+++ b/code/keras_contrib/metrics/crf_accuracies.py
+from keras import backend as K
+
+
+def _get_accuracy(y_true, y_pred, mask, sparse_target=False):
+    y_pred = K.argmax(y_pred, -1)
+    if sparse_target:
+        y_true = K.cast(y_true[:, :, 0], K.dtype(y_pred))
+    else:
+        y_true = K.argmax(y_true, -1)
+    judge = K.cast(K.equal(y_pred, y_true), K.floatx())
+    if mask is None:
+        return K.mean(judge)
+    else:
+        mask = K.cast(mask, K.floatx())
+        return K.sum(judge * mask) / K.sum(mask)
+
+
+def crf_viterbi_accuracy(y_true, y_pred):
+    '''Use Viterbi algorithm to get best path, and compute its accuracy.
+    `y_pred` must be an output from CRF.'''
+    crf, idx = y_pred._keras_history[:2]
+    X = crf._inbound_nodes[idx].input_tensors[0]
+    mask = crf._inbound_nodes[idx].input_masks[0]
+    y_pred = crf.viterbi_decoding(X, mask)
+    return _get_accuracy(y_true, y_pred, mask, crf.sparse_target)
+
+
+def crf_marginal_accuracy(y_true, y_pred):
+    '''Use time-wise marginal argmax as prediction.
+    `y_pred` must be an output from CRF with `learn_mode="marginal"`.'''
+    crf, idx = y_pred._keras_history[:2]
+    X = crf._inbound_nodes[idx].input_tensors[0]
+    mask = crf._inbound_nodes[idx].input_masks[0]
+    y_pred = crf.get_marginal_prob(X, mask)
+    return _get_accuracy(y_true, y_pred, mask, crf.sparse_target)
+
+
+def crf_accuracy(y_true, y_pred):
+    '''Ge default accuracy based on CRF `test_mode`.'''
+    crf, idx = y_pred._keras_history[:2]
+    if crf.test_mode == 'viterbi':
+        return crf_viterbi_accuracy(y_true, y_pred)
+    else:
+        return crf_marginal_accuracy(y_true, y_pred)
--- a/code/keras_contrib/optimizers/__init__.py
+++ b/code/keras_contrib/optimizers/__init__.py
+from .ftml import FTML
+from .padam import Padam
+from .yogi import Yogi
+from .lars import LARS
+
+# aliases
+ftml = FTML
+lars = LARS
--- a/code/keras_contrib/optimizers/__pycache__/__init__.cpython-310.pyc
+++ b/code/keras_contrib/optimizers/__pycache__/__init__.cpython-310.pyc