Initial commit

8de66223 · maming · 8de66223 · 8de66223 · 8de66223 · 8de66223
Commit 8de66223 authored Feb 04, 2026 by maming
20 changed files
--- a/code/keras_contrib/callbacks/dead_relu_detector.py
+++ b/code/keras_contrib/callbacks/dead_relu_detector.py
+import numpy as np
+
+from keras.callbacks import Callback
+from keras import backend as K
+
+
+class DeadReluDetector(Callback):
+    """Reports the number of dead ReLUs after each training epoch
+    ReLU is considered to be dead if it did not fire once for entire training set
+
+    # Arguments
+        x_train: Training dataset to check whether or not neurons fire
+        verbose: verbosity mode
+            True means that even a single dead neuron triggers a warning message
+            False means that only significant number of dead neurons (10% or more)
+            triggers a warning message
+    """
+
+    def __init__(self, x_train, verbose=False):
+        super(DeadReluDetector, self).__init__()
+        self.x_train = x_train
+        self.verbose = verbose
+        self.dead_neurons_share_threshold = 0.1
+
+    @staticmethod
+    def is_relu_layer(layer):
+        # Should work for all layers with relu
+        # activation. Tested for Dense and Conv2D
+        return layer.get_config().get('activation', None) == 'relu'
+
+    def get_relu_activations(self):
+        model_input = self.model.input
+        is_multi_input = isinstance(model_input, list)
+        if not is_multi_input:
+            model_input = [model_input]
+
+        funcs = {}
+        for index, layer in enumerate(self.model.layers):
+            if not layer.get_weights():
+                continue
+            funcs[index] = K.function(model_input
+                                      + [K.learning_phase()], [layer.output])
+
+        if is_multi_input:
+            list_inputs = []
+            list_inputs.extend(self.x_train)
+            list_inputs.append(1.)
+        else:
+            list_inputs = [self.x_train, 1.]
+
+        layer_outputs = {}
+        for index, func in funcs.items():
+            layer_outputs[index] = func(list_inputs)[0]
+
+        for layer_index, layer_activations in layer_outputs.items():
+            if self.is_relu_layer(self.model.layers[layer_index]):
+                layer_name = self.model.layers[layer_index].name
+                # layer_weight is a list [W] (+ [b])
+                layer_weight = self.model.layers[layer_index].get_weights()
+
+                # with kernel and bias, the weights are saved as a list [W, b].
+                # If only weights, it is [W]
+                if type(layer_weight) is not list:
+                    raise ValueError("'Layer_weight' should be a list, "
+                                     "but was {}".format(type(layer_weight)))
+
+                # there are no weights for current layer; skip it
+                # this is only legitimate if layer is "Activation"
+                if len(layer_weight) == 0:
+                    continue
+
+                layer_weight_shape = np.shape(layer_weight[0])
+                yield [layer_index,
+                       layer_activations,
+                       layer_name,
+                       layer_weight_shape]
+
+    def on_epoch_end(self, epoch, logs={}):
+        for relu_activation in self.get_relu_activations():
+            layer_index = relu_activation[0]
+            activation_values = relu_activation[1]
+            layer_name = relu_activation[2]
+            layer_weight_shape = relu_activation[3]
+
+            shape_act = activation_values.shape
+
+            weight_len = len(layer_weight_shape)
+            act_len = len(shape_act)
+
+            # should work for both Conv and Flat
+            if K.image_data_format() == 'channels_last':
+                # features in last axis
+                axis_filter = -1
+            else:
+                # features before the convolution axis, for weight_
+                # len the input and output have to be subtracted
+                axis_filter = -1 - (weight_len - 2)
+
+            total_featuremaps = shape_act[axis_filter]
+
+            axis = []
+            for i in range(act_len):
+                if (i != axis_filter) and (i != (len(shape_act) + axis_filter)):
+                    axis.append(i)
+            axis = tuple(axis)
+
+            dead_neurons = np.sum(np.sum(activation_values, axis=axis) == 0)
+
+            dead_neurons_share = float(dead_neurons) / float(total_featuremaps)
+            if ((self.verbose and dead_neurons > 0)
+                    or dead_neurons_share >= self.dead_neurons_share_threshold):
+                str_warning = ('Layer {} (#{}) has {} '
+                               'dead neurons ({:.2%})!').format(layer_name,
+                                                                layer_index,
+                                                                dead_neurons,
+                                                                dead_neurons_share)
+                print(str_warning)
--- a/code/keras_contrib/callbacks/snapshot.py
+++ b/code/keras_contrib/callbacks/snapshot.py
+from __future__ import absolute_import
+from __future__ import print_function
+
+import os
+
+import numpy as np
+
+from keras.callbacks import Callback, ModelCheckpoint, LearningRateScheduler
+
+try:
+    import requests
+except ImportError:
+    requests = None
+
+
+class SnapshotModelCheckpoint(Callback):
+    """Callback that saves the snapshot weights of the model.
+
+    Saves the model weights on certain epochs (which can be considered the
+    snapshot of the model at that epoch).
+
+    Should be used with the cosine annealing learning rate schedule to save
+    the weight just before learning rate is sharply increased.
+
+    # Arguments:
+        nb_epochs: total number of epochs that the model will be trained for.
+        nb_snapshots: number of times the weights of the model will be saved.
+        fn_prefix: prefix for the filename of the weights.
+    """
+
+    def __init__(self, nb_epochs, nb_snapshots, fn_prefix='Model'):
+        super(SnapshotModelCheckpoint, self).__init__()
+
+        self.check = nb_epochs // nb_snapshots
+        self.fn_prefix = fn_prefix
+
+    def on_epoch_end(self, epoch, logs={}):
+        if epoch != 0 and (epoch + 1) % self.check == 0:
+            filepath = self.fn_prefix + '-%d.h5' % ((epoch + 1) // self.check)
+            self.model.save_weights(filepath, overwrite=True)
+            # print("Saved snapshot at weights/%s_%d.h5" % (self.fn_prefix, epoch))
+
+
+class SnapshotCallbackBuilder:
+    """Callback builder for snapshot ensemble training of a model.
+    From the paper "Snapshot Ensembles: Train 1, Get M For Free" (
+    https://openreview.net/pdf?id=BJYwwY9ll)
+
+    Creates a list of callbacks, which are provided when training a model
+    so as to save the model weights at certain epochs, and then sharply
+    increase the learning rate.
+    """
+
+    def __init__(self, nb_epochs, nb_snapshots, init_lr=0.1):
+        """
+        Initialize a snapshot callback builder.
+
+        # Arguments:
+            nb_epochs: total number of epochs that the model will be trained for.
+            nb_snapshots: number of times the weights of the model will be saved.
+            init_lr: initial learning rate
+        """
+        self.T = nb_epochs
+        self.M = nb_snapshots
+        self.alpha_zero = init_lr
+
+    def get_callbacks(self, model_prefix='Model'):
+        """
+        Creates a list of callbacks that can be used during training to create a
+        snapshot ensemble of the model.
+
+        Args:
+            model_prefix: prefix for the filename of the weights.
+
+        Returns: list of 3 callbacks [ModelCheckpoint, LearningRateScheduler,
+                 SnapshotModelCheckpoint] which can be provided to the 'fit' function
+        """
+        if not os.path.exists('weights/'):
+            os.makedirs('weights/')
+
+        callback_list = [ModelCheckpoint('weights/%s-Best.h5' % model_prefix,
+                                         monitor='val_acc',
+                                         save_best_only=True, save_weights_only=True),
+                         LearningRateScheduler(schedule=self._cosine_anneal_schedule),
+                         SnapshotModelCheckpoint(self.T,
+                                                 self.M,
+                                                 fn_prefix='weights/%s' % model_prefix)]
+
+        return callback_list
+
+    def _cosine_anneal_schedule(self, t):
+        cos_inner = np.pi * (t % (self.T // self.M))
+        cos_inner /= self.T // self.M
+        cos_out = np.cos(cos_inner) + 1
+        return float(self.alpha_zero / 2 * cos_out)
--- a/code/keras_contrib/callbacks/tensorboard.py
+++ b/code/keras_contrib/callbacks/tensorboard.py
+from keras.callbacks import TensorBoard
+import numpy as np
+import os
+
+
+class TensorBoardGrouped(TensorBoard):
+    """TensorBoard basic visualizations.
+
+    [TensorBoard](https://www.tensorflow.org/guide/summaries_and_tensorboard)
+    is a visualization tool provided with TensorFlow.
+
+    This callback is a subclass of `keras.callbacks.TensorBoard`.
+    The only difference is that the training and validation logs are
+    grouped and written to the same plot.
+
+    It's a drop-in replacement for the keras callback.
+    The arguments are the same.
+    """
+
+    def __init__(self, log_dir='./logs', *args, **kwargs):
+        self.base_log_dir = log_dir
+        self.train_log_dir = os.path.join(log_dir, 'train')
+        self.val_log_dir = os.path.join(log_dir, 'val')
+        super(TensorBoardGrouped, self).__init__(self.train_log_dir,
+                                                 *args,
+                                                 **kwargs)
+
+    def set_model(self, model):
+        super(TensorBoardGrouped, self).set_model(model)
+        import tensorflow as tf
+        self.val_writer = tf.summary.FileWriter(self.val_log_dir)
+
+    def _write_logs(self, logs, index):
+        import tensorflow as tf
+        for name, value in logs.items():
+            if name in ['batch', 'size']:
+                continue
+            if name.startswith('val_'):
+                writer = self.val_writer
+                name = name[4:]  # remove val_
+            else:
+                writer = self.writer
+            summary = tf.Summary()
+            summary_value = summary.value.add()
+            if isinstance(value, np.ndarray):
+                summary_value.simple_value = value.item()
+            else:
+                summary_value.simple_value = value
+            summary_value.tag = name
+            writer.add_summary(summary, index)
+        self.writer.flush()
+        self.val_writer.flush()
+
+    def on_train_end(self, _):
+        self.writer.close()
+        self.val_writer.flush()
--- a/code/keras_contrib/constraints/__init__.py
+++ b/code/keras_contrib/constraints/__init__.py
+from __future__ import absolute_import
+
+from .clip import Clip
+
+# Aliases.
+
+clip = Clip
--- a/code/keras_contrib/constraints/__pycache__/__init__.cpython-310.pyc
+++ b/code/keras_contrib/constraints/__pycache__/__init__.cpython-310.pyc
--- a/code/keras_contrib/constraints/__pycache__/clip.cpython-310.pyc
+++ b/code/keras_contrib/constraints/__pycache__/clip.cpython-310.pyc
--- a/code/keras_contrib/constraints/clip.py
+++ b/code/keras_contrib/constraints/clip.py
+from __future__ import absolute_import
+from keras import backend as K
+from keras.constraints import Constraint
+
+
+class Clip(Constraint):
+    """Clips weights to [-c, c].
+
+    # Arguments
+        c: Clipping parameter.
+    """
+
+    def __init__(self, c=0.01):
+        self.c = c
+
+    def __call__(self, p):
+        return K.clip(p, -self.c, self.c)
+
+    def get_config(self):
+        return {'name': self.__class__.__name__,
+                'c': self.c}
--- a/code/keras_contrib/datasets/__init__.py
+++ b/code/keras_contrib/datasets/__init__.py
--- a/code/keras_contrib/datasets/__pycache__/__init__.cpython-310.pyc
+++ b/code/keras_contrib/datasets/__pycache__/__init__.cpython-310.pyc
--- a/code/keras_contrib/datasets/coco.py
+++ b/code/keras_contrib/datasets/coco.py
--- a/code/keras_contrib/datasets/conll2000.py
+++ b/code/keras_contrib/datasets/conll2000.py
+from __future__ import print_function
+import numpy
+from keras.utils.data_utils import get_file
+from zipfile import ZipFile
+from collections import Counter
+from keras.preprocessing.sequence import pad_sequences
+
+
+def load_data(path='conll2000.zip', min_freq=2):
+    path = get_file(path,
+                    origin='https://raw.githubusercontent.com/nltk'
+                           '/nltk_data/gh-pages/packages/corpora/conll2000.zip')
+    print(path)
+    archive = ZipFile(path, 'r')
+    train = _parse_data(archive.open('conll2000/train.txt'))
+    test = _parse_data(archive.open('conll2000/test.txt'))
+    archive.close()
+
+    word_counts = Counter(row[0].lower() for sample in train for row in sample)
+    vocab = ['<pad>', '<unk>']
+    vocab += [w for w, f in iter(word_counts.items()) if f >= min_freq]
+    # in alphabetic order
+    pos_tags = sorted(list(set(row[1] for sample in train + test for row in sample)))
+    # in alphabetic order
+    chunk_tags = sorted(list(set(row[2] for sample in train + test for row in sample)))
+
+    train = _process_data(train, vocab, pos_tags, chunk_tags)
+    test = _process_data(test, vocab, pos_tags, chunk_tags)
+    return train, test, (vocab, pos_tags, chunk_tags)
+
+
+def _parse_data(fh):
+    string = fh.read()
+    data = []
+    for sample in string.decode().strip().split('\n\n'):
+        data.append([row.split() for row in sample.split('\n')])
+    fh.close()
+    return data
+
+
+def _process_data(data, vocab, pos_tags, chunk_tags, maxlen=None, onehot=False):
+    if maxlen is None:
+        maxlen = max(len(s) for s in data)
+    word2idx = dict((w, i) for i, w in enumerate(vocab))
+    # set to <unk> (index 1) if not in vocab
+    x = [[word2idx.get(w[0].lower(), 1) for w in s] for s in data]
+
+    y_pos = [[pos_tags.index(w[1]) for w in s] for s in data]
+    y_chunk = [[chunk_tags.index(w[2]) for w in s] for s in data]
+
+    x = pad_sequences(x, maxlen)  # left padding
+
+    # lef padded with -1. Indeed, any integer works as it will be masked
+    y_pos = pad_sequences(y_pos, maxlen, value=-1)
+    y_chunk = pad_sequences(y_chunk, maxlen, value=-1)
+
+    if onehot:
+        y_pos = numpy.eye(len(pos_tags), dtype='float32')[y]
+        y_chunk = numpy.eye(len(chunk_tags), dtype='float32')[y]
+    else:
+        y_pos = numpy.expand_dims(y_pos, 2)
+        y_chunk = numpy.expand_dims(y_chunk, 2)
+    return x, y_pos, y_chunk
--- a/code/keras_contrib/datasets/pascal_voc.py
+++ b/code/keras_contrib/datasets/pascal_voc.py
--- a/code/keras_contrib/initializers/__init__.py
+++ b/code/keras_contrib/initializers/__init__.py
+from __future__ import absolute_import
+
+from .convaware import ConvolutionAware
--- a/code/keras_contrib/initializers/__pycache__/__init__.cpython-310.pyc
+++ b/code/keras_contrib/initializers/__pycache__/__init__.cpython-310.pyc
--- a/code/keras_contrib/initializers/__pycache__/convaware.cpython-310.pyc
+++ b/code/keras_contrib/initializers/__pycache__/convaware.cpython-310.pyc
--- a/code/keras_contrib/initializers/convaware.py
+++ b/code/keras_contrib/initializers/convaware.py
+from __future__ import absolute_import
+import numpy as np
+from keras import backend as K
+from keras.initializers import Initializer, Orthogonal
+
+
+class ConvolutionAware(Initializer):
+    """
+    Initializer that generates orthogonal convolution filters in the fourier
+    space. If this initializer is passed a shape that is not 3D or 4D,
+    orthogonal initialization will be used.
+    # Arguments
+        eps_std: Standard deviation for the random normal noise used to break
+        symmetry in the inverse fourier transform.
+        seed: A Python integer. Used to seed the random generator.
+    # References
+        Armen Aghajanyan, https://arxiv.org/abs/1702.06295
+    """
+
+    def __init__(self, eps_std=0.05, seed=None):
+        self.eps_std = eps_std
+        self.seed = seed
+        self.orthogonal = Orthogonal()
+
+    def __call__(self, shape):
+        rank = len(shape)
+
+        if self.seed is not None:
+            np.random.seed(self.seed)
+
+        fan_in, fan_out = _compute_fans(shape, K.image_data_format())
+        variance = 2 / fan_in
+
+        if rank == 3:
+            row, stack_size, filters_size = shape
+
+            transpose_dimensions = (2, 1, 0)
+            kernel_shape = (row,)
+            correct_ifft = lambda shape, s=[None]: np.fft.irfft(shape, s[0])
+            correct_fft = np.fft.rfft
+
+        elif rank == 4:
+            row, column, stack_size, filters_size = shape
+
+            transpose_dimensions = (2, 3, 0, 1)
+            kernel_shape = (row, column)
+            correct_ifft = np.fft.irfft2
+            correct_fft = np.fft.rfft2
+
+        elif rank == 5:
+            x, y, z, stack_size, filters_size = shape
+
+            transpose_dimensions = (3, 4, 0, 1, 2)
+            kernel_shape = (x, y, z)
+            correct_fft = np.fft.rfftn
+            correct_ifft = np.fft.irfftn
+        else:
+            return K.variable(self.orthogonal(shape), dtype=K.floatx())
+
+        kernel_fourier_shape = correct_fft(np.zeros(kernel_shape)).shape
+
+        init = []
+        for i in range(filters_size):
+            basis = self._create_basis(
+                stack_size, np.prod(kernel_fourier_shape))
+            basis = basis.reshape((stack_size,) + kernel_fourier_shape)
+
+            filters = [correct_ifft(x, kernel_shape) +
+                       np.random.normal(0, self.eps_std, kernel_shape) for
+                       x in basis]
+
+            init.append(filters)
+
+        # Format of array is now: filters, stack, row, column
+        init = np.array(init)
+        init = self._scale_filters(init, variance)
+        return init.transpose(transpose_dimensions)
+
+    def _create_basis(self, filters, size):
+        if size == 1:
+            return np.random.normal(0.0, self.eps_std, (filters, size))
+
+        nbb = filters // size + 1
+        li = []
+        for i in range(nbb):
+            a = np.random.normal(0.0, 1.0, (size, size))
+            a = self._symmetrize(a)
+            u, _, v = np.linalg.svd(a)
+            li.extend(u.T.tolist())
+        p = np.array(li[:filters], dtype=K.floatx())
+        return p
+
+    def _symmetrize(self, a):
+        return a + a.T - np.diag(a.diagonal())
+
+    def _scale_filters(self, filters, variance):
+        c_var = np.var(filters)
+        p = np.sqrt(variance / c_var)
+        return filters * p
+
+    def get_config(self):
+        return {
+            'eps_std': self.eps_std,
+            'seed': self.seed
+        }
+
+
+def _compute_fans(shape, data_format='channels_last'):
+    """Computes the number of input and output units for a weight shape.
+
+    # Arguments
+        shape: Integer shape tuple.
+        data_format: Image data format to use for convolution kernels.
+            Note that all kernels in Keras are standardized on the
+            `channels_last` ordering (even when inputs are set
+            to `channels_first`).
+
+    # Returns
+        A tuple of scalars, `(fan_in, fan_out)`.
+
+    # Raises
+        ValueError: in case of invalid `data_format` argument.
+    """
+    if len(shape) == 2:
+        fan_in = shape[0]
+        fan_out = shape[1]
+    elif len(shape) in {3, 4, 5}:
+        # Assuming convolution kernels (1D, 2D or 3D).
+        # TH kernel shape: (depth, input_depth, ...)
+        # TF kernel shape: (..., input_depth, depth)
+        if data_format == 'channels_first':
+            receptive_field_size = np.prod(shape[2:])
+            fan_in = shape[1] * receptive_field_size
+            fan_out = shape[0] * receptive_field_size
+        elif data_format == 'channels_last':
+            receptive_field_size = np.prod(shape[:-2])
+            fan_in = shape[-2] * receptive_field_size
+            fan_out = shape[-1] * receptive_field_size
+        else:
+            raise ValueError('Invalid data_format: ' + data_format)
+    else:
+        # No specific assumptions.
+        fan_in = np.sqrt(np.prod(shape))
+        fan_out = np.sqrt(np.prod(shape))
+    return fan_in, fan_out
--- a/code/keras_contrib/layers/__init__.py
+++ b/code/keras_contrib/layers/__init__.py
+from __future__ import absolute_import
+
+from .advanced_activations.pelu import PELU
+from .advanced_activations.srelu import SReLU
+from .advanced_activations.swish import Swish
+from .advanced_activations.sinerelu import SineReLU
+
+from .convolutional.cosineconvolution2d import CosineConv2D
+from .convolutional.cosineconvolution2d import CosineConvolution2D
+from .convolutional.subpixelupscaling import SubPixelUpscaling
+
+from .core import CosineDense
+
+from .crf import CRF
+
+from .capsule import Capsule
+
+from .normalization.instancenormalization import InstanceNormalization
+from .normalization.groupnormalization import GroupNormalization
--- a/code/keras_contrib/layers/__pycache__/__init__.cpython-310.pyc
+++ b/code/keras_contrib/layers/__pycache__/__init__.cpython-310.pyc
--- a/code/keras_contrib/layers/__pycache__/capsule.cpython-310.pyc
+++ b/code/keras_contrib/layers/__pycache__/capsule.cpython-310.pyc
--- a/code/keras_contrib/layers/__pycache__/core.cpython-310.pyc
+++ b/code/keras_contrib/layers/__pycache__/core.cpython-310.pyc