sentiment_model.py

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf


def _dynamic_pooling(w_embs):
  """Dynamic Pooling layer.

  Given the variable-sized output of the convolution layer,
  the pooling with a fixed pooling kernel size and stride would
  produce variable-sized output, whereas the following fully-connected
  layer expects fixes input layer size.
  Thus we fix the number of pooling units (to 2) and dynamically
  determine the pooling region size on each data point.

  Args:
    w_embs: a input tensor with dimensionality of 1.
  Returns:
    A tensor of size 2.
  """
  # a Lambda layer maintain separate context, so that tf should be imported
  # here.
  import tensorflow as tf
  t = tf.expand_dims(w_embs, 2)
  pool_size = w_embs.shape[1].value / 2
  pooled = tf.keras.backend.pool2d(t, (pool_size, 1), strides=(
      pool_size, 1), data_format="channels_last")
  return tf.squeeze(pooled, 2)


def _dynamic_pooling_output_shape(input_shape):
  """Output shape for the dynamic pooling layer.

  This function is used for keras Lambda layer to indicate
  the output shape of the dynamic poolic layer.

  Args:
    input_shape: A tuple for the input shape.
  Returns:
    output shape for the dynamic pooling layer.
  """
  shape = list(input_shape)
  assert len(shape) == 2  # only valid for 2D tensors
  shape[1] = 2
  return tuple(shape)


class CNN(tf.keras.models.Model):
  """CNN for sentimental analysis."""

  def __init__(self, emb_dim, num_words, sentence_length, hid_dim,
      class_dim, dropout_rate):
    """Initialize CNN model.

    Args:
      emb_dim: The dimension of the Embedding layer.
      num_words: The number of the most frequent tokens
        to be used from the corpus.
      sentence_length: The number of words in each sentence.
        Longer sentences get cut, shorter ones padded.
      hid_dim: The dimension of the Embedding layer.
      class_dim: The number of the CNN layer filters.
      dropout_rate: The portion of kept value in the Dropout layer.
    Returns:
      tf.keras.models.Model: A model.
    """

    input = tf.keras.layers.Input(shape=(sentence_length,), dtype=tf.int32)

    layer = tf.keras.layers.Embedding(num_words, output_dim=emb_dim)(input)

    layer_conv3 = tf.keras.layers.Conv1D(hid_dim, 3, activation="relu")(layer)
    layer_conv3 = tf.keras.layers.Lambda(_dynamic_pooling,
        output_shape=_dynamic_pooling_output_shape)(layer_conv3)
    layer_conv3 = tf.keras.layers.Flatten()(layer_conv3)

    layer_conv2 = tf.keras.layers.Conv1D(hid_dim, 2, activation="relu")(layer)
    layer_conv2 = tf.keras.layers.Lambda(_dynamic_pooling,
        output_shape=_dynamic_pooling_output_shape)(layer_conv2)
    layer_conv2 = tf.keras.layers.Flatten()(layer_conv2)

    layer = tf.keras.layers.concatenate([layer_conv2, layer_conv3], axis=1)
    layer = tf.keras.layers.Dropout(dropout_rate)(layer)
    layer = tf.keras.layers.BatchNormalization()(layer)

    output = tf.keras.layers.Dense(class_dim, activation="softmax")(layer)

    super(CNN, self).__init__(inputs=[input], outputs=output)