# -*- coding: utf-8 -*- from __future__ import absolute_import from keras import backend as K from keras import activations from keras import regularizers from keras import initializers from keras import constraints from keras.layers import Layer from keras_contrib.utils.test_utils import to_tuple class Capsule(Layer): """Capsule Layer implementation in Keras This implementation is based on Dynamic Routing of Capsules, Geoffrey Hinton et. al. The Capsule Layer is a Neural Network Layer which helps modeling relationships in image and sequential data better than just CNNs or RNNs. It achieves this by understanding the spatial relationships between objects (in images) or words (in text) by encoding additional information about the image or text, such as angle of rotation, thickness and brightness, relative proportions etc. This layer can be used instead of pooling layers to lower dimensions and still capture important information about the relationships and structures within the data. A normal pooling layer would lose a lot of this information. This layer can be used on the output of any layer which has a 3-D output (including batch_size). For example, in image classification, it can be used on the output of a Conv2D layer for Computer Vision applications. Also, it can be used on the output of a GRU or LSTM Layer (Bidirectional or Unidirectional) for NLP applications. The default activation function is 'linear'. But, this layer is generally used with the 'squash' activation function (recommended). To use the squash activation function, do : from keras_contrib.activations import squash capsule = Capsule(num_capsule=10, dim_capsule=10, routings=3, share_weights=True, activation=squash) # Example usage : 1). COMPUTER VISION input_image = Input(shape=(None, None, 3)) conv_2d = Conv2D(64, (3, 3), activation='relu')(input_image) capsule = Capsule(num_capsule=10, dim_capsule=16, routings=3, activation='relu', share_weights=True)(conv_2d) 2). NLP maxlen = 72 max_features = 120000 input_text = Input(shape=(maxlen,)) embedding = Embedding(max_features, embed_size, weights=[embedding_matrix], trainable=False)(input_text) bi_gru = Bidirectional(GRU(64, return_seqeunces=True))(embedding) capsule = Capsule(num_capsule=5, dim_capsule=5, routings=4, activation='sigmoid', share_weights=True)(bi_gru) # Arguments num_capsule : Number of Capsules (int) dim_capsules : Dimensions of the vector output of each Capsule (int) routings : Number of dynamic routings in the Capsule Layer (int) share_weights : Whether to share weights between Capsules or not (boolean) activation : Activation function for the Capsules regularizer : Regularizer for the weights of the Capsules initializer : Initializer for the weights of the Caspules constraint : Constraint for the weights of the Capsules # Input shape 3D tensor with shape: (batch_size, input_num_capsule, input_dim_capsule) [any 3-D Tensor with the first dimension as batch_size] # Output shape 3D tensor with shape: (batch_size, num_capsule, dim_capsule) # References - [Dynamic-Routing-Between-Capsules] (https://arxiv.org/pdf/1710.09829.pdf) - [Keras-Examples-CIFAR10-CNN-Capsule]""" def __init__(self, num_capsule, dim_capsule, routings=3, share_weights=True, initializer='glorot_uniform', activation=None, regularizer=None, constraint=None, **kwargs): super(Capsule, self).__init__(**kwargs) self.num_capsule = num_capsule self.dim_capsule = dim_capsule self.routings = routings self.share_weights = share_weights self.activation = activations.get(activation) self.regularizer = regularizers.get(regularizer) self.initializer = initializers.get(initializer) self.constraint = constraints.get(constraint) def build(self, input_shape): input_shape = to_tuple(input_shape) input_dim_capsule = input_shape[-1] if self.share_weights: self.W = self.add_weight(name='capsule_kernel', shape=(1, input_dim_capsule, self.num_capsule * self.dim_capsule), initializer=self.initializer, regularizer=self.regularizer, constraint=self.constraint, trainable=True) else: input_num_capsule = input_shape[-2] self.W = self.add_weight(name='capsule_kernel', shape=(input_num_capsule, input_dim_capsule, self.num_capsule * self.dim_capsule), initializer=self.initializer, regularizer=self.regularizer, constraint=self.constraint, trainable=True) self.build = True def call(self, inputs): if self.share_weights: u_hat_vectors = K.conv1d(inputs, self.W) else: u_hat_vectors = K.local_conv1d(inputs, self.W, [1], [1]) # u_hat_vectors : The spatially transformed input vectors (with local_conv_1d) batch_size = K.shape(inputs)[0] input_num_capsule = K.shape(inputs)[1] u_hat_vectors = K.reshape(u_hat_vectors, (batch_size, input_num_capsule, self.num_capsule, self.dim_capsule)) u_hat_vectors = K.permute_dimensions(u_hat_vectors, (0, 2, 1, 3)) routing_weights = K.zeros_like(u_hat_vectors[:, :, :, 0]) for i in range(self.routings): capsule_weights = K.softmax(routing_weights, 1) outputs = K.batch_dot(capsule_weights, u_hat_vectors, [2, 2]) if K.ndim(outputs) == 4: outputs = K.sum(outputs, axis=1) if i < self.routings - 1: outputs = K.l2_normalize(outputs, -1) routing_weights = K.batch_dot(outputs, u_hat_vectors, [2, 3]) if K.ndim(routing_weights) == 4: routing_weights = K.sum(routing_weights, axis=1) return self.activation(outputs) def compute_output_shape(self, input_shape): return (None, self.num_capsule, self.dim_capsule) def get_config(self): config = {'num_capsule': self.num_capsule, 'dim_capsule': self.dim_capsule, 'routings': self.routings, 'share_weights': self.share_weights, 'activation': activations.serialize(self.activation), 'regularizer': regularizers.serialize(self.regularizer), 'initializer': initializers.serialize(self.initializer), 'constraint': constraints.serialize(self.constraint)} base_config = super(Capsule, self).get_config() return dict(list(base_config.items()) + list(config.items()))