progressive.py

# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""Code probability model used for entropy coding."""

import json

from six.moves import xrange
import tensorflow as tf

from entropy_coder.lib import blocks
from entropy_coder.model import entropy_coder_model
from entropy_coder.model import model_factory

# pylint: disable=not-callable


class BrnnPredictor(blocks.BlockBase):
  """BRNN prediction applied on one layer."""

  def __init__(self, code_depth, name=None):
    super(BrnnPredictor, self).__init__(name)

    with self._BlockScope():
      hidden_depth = 2 * code_depth

      # What is coming from the previous layer/iteration
      # is going through a regular Conv2D layer as opposed to the binary codes
      # of the current layer/iteration which are going through a masked
      # convolution.
      self._adaptation0 = blocks.RasterScanConv2D(
          hidden_depth, [7, 7], [1, 1], 'SAME',
          strict_order=True,
          bias=blocks.Bias(0), act=tf.tanh)
      self._adaptation1 = blocks.Conv2D(
          hidden_depth, [3, 3], [1, 1], 'SAME',
          bias=blocks.Bias(0), act=tf.tanh)
      self._predictor = blocks.CompositionOperator([
          blocks.LineOperator(
              blocks.RasterScanConv2DLSTM(
                  depth=hidden_depth,
                  filter_size=[1, 3],
                  hidden_filter_size=[1, 3],
                  strides=[1, 1],
                  padding='SAME')),
          blocks.Conv2D(hidden_depth, [1, 1], [1, 1], 'SAME',
                        bias=blocks.Bias(0), act=tf.tanh),
          blocks.Conv2D(code_depth, [1, 1], [1, 1], 'SAME',
                        bias=blocks.Bias(0), act=tf.tanh)
      ])

  def _Apply(self, x, s):
    # Code estimation using both:
    # - the state from the previous iteration/layer,
    # - the binary codes that are before in raster scan order.
    h = tf.concat(values=[self._adaptation0(x), self._adaptation1(s)], axis=3)

    estimated_codes = self._predictor(h)

    return estimated_codes


class LayerPrediction(blocks.BlockBase):
  """Binary code prediction for one layer."""

  def __init__(self, layer_count, code_depth, name=None):
    super(LayerPrediction, self).__init__(name)

    self._layer_count = layer_count

    # No previous layer.
    self._layer_state = None
    self._current_layer = 0

    with self._BlockScope():
      # Layers used to do the conditional code prediction.
      self._brnn_predictors = []
      for _ in xrange(layer_count):
        self._brnn_predictors.append(BrnnPredictor(code_depth))

      # Layers used to generate the input of the LSTM operating on the
      # iteration/depth domain.
      hidden_depth = 2 * code_depth
      self._state_blocks = []
      for _ in xrange(layer_count):
        self._state_blocks.append(blocks.CompositionOperator([
            blocks.Conv2D(
                hidden_depth, [3, 3], [1, 1], 'SAME',
                bias=blocks.Bias(0), act=tf.tanh),
            blocks.Conv2D(
                code_depth, [3, 3], [1, 1], 'SAME',
                bias=blocks.Bias(0), act=tf.tanh)
        ]))

      # Memory of the RNN is equivalent to the size of 2 layers of binary
      # codes.
      hidden_depth = 2 * code_depth
      self._layer_rnn = blocks.CompositionOperator([
          blocks.Conv2DLSTM(
              depth=hidden_depth,
              filter_size=[1, 1],
              hidden_filter_size=[1, 1],
              strides=[1, 1],
              padding='SAME'),
          blocks.Conv2D(hidden_depth, [1, 1], [1, 1], 'SAME',
                        bias=blocks.Bias(0), act=tf.tanh),
          blocks.Conv2D(code_depth, [1, 1], [1, 1], 'SAME',
                        bias=blocks.Bias(0), act=tf.tanh)
      ])

  def _Apply(self, x):
    assert self._current_layer < self._layer_count

    # Layer state is set to 0 when there is no previous iteration.
    if self._layer_state is None:
      self._layer_state = tf.zeros_like(x, dtype=tf.float32)

    # Code estimation using both:
    # - the state from the previous iteration/layer,
    # - the binary codes that are before in raster scan order.
    estimated_codes = self._brnn_predictors[self._current_layer](
        x, self._layer_state)

    # Compute the updated layer state.
    h = self._state_blocks[self._current_layer](x)
    self._layer_state = self._layer_rnn(h)
    self._current_layer += 1

    return estimated_codes


class ProgressiveModel(entropy_coder_model.EntropyCoderModel):
  """Progressive BRNN entropy coder model."""

  def __init__(self):
    super(ProgressiveModel, self).__init__()

  def Initialize(self, global_step, optimizer, config_string):
    if config_string is None:
      raise ValueError('The progressive model requires a configuration.')
    config = json.loads(config_string)
    if 'coded_layer_count' not in config:
      config['coded_layer_count'] = 0

    self._config = config
    self._optimizer = optimizer
    self._global_step = global_step

  def BuildGraph(self, input_codes):
    """Build the graph corresponding to the progressive BRNN model."""
    layer_depth = self._config['layer_depth']
    layer_count = self._config['layer_count']

    code_shape = input_codes.get_shape()
    code_depth = code_shape[-1].value
    if self._config['coded_layer_count'] > 0:
      prefix_depth = self._config['coded_layer_count'] * layer_depth
      if code_depth < prefix_depth:
        raise ValueError('Invalid prefix depth: {} VS {}'.format(
            prefix_depth, code_depth))
      input_codes = input_codes[:, :, :, :prefix_depth]

    code_shape = input_codes.get_shape()
    code_depth = code_shape[-1].value
    if code_depth % layer_depth != 0:
      raise ValueError(
          'Code depth must be a multiple of the layer depth: {} vs {}'.format(
              code_depth, layer_depth))
    code_layer_count = code_depth // layer_depth
    if code_layer_count > layer_count:
      raise ValueError('Input codes have too many layers: {}, max={}'.format(
          code_layer_count, layer_count))

    # Block used to estimate binary codes.
    layer_prediction = LayerPrediction(layer_count, layer_depth)

    # Block used to compute code lengths.
    code_length_block = blocks.CodeLength()

    # Loop over all the layers.
    code_length = []
    code_layers = tf.split(
        value=input_codes, num_or_size_splits=code_layer_count, axis=3)
    for k in xrange(code_layer_count):
      x = code_layers[k]
      predicted_x = layer_prediction(x)
      # Saturate the prediction to avoid infinite code length.
      epsilon = 0.001
      predicted_x = tf.clip_by_value(
          predicted_x, -1 + epsilon, +1 - epsilon)
      code_length.append(code_length_block(
          blocks.ConvertSignCodeToZeroOneCode(x),
          blocks.ConvertSignCodeToZeroOneCode(predicted_x)))
      tf.summary.scalar('code_length_layer_{:02d}'.format(k), code_length[-1])
    code_length = tf.stack(code_length)
    self.loss = tf.reduce_mean(code_length)
    tf.summary.scalar('loss', self.loss)

    # Loop over all the remaining layers just to make sure they are
    # instantiated. Otherwise, loading model params could fail.
    dummy_x = tf.zeros_like(code_layers[0])
    for _ in xrange(layer_count - code_layer_count):
      dummy_predicted_x = layer_prediction(dummy_x)

    # Average bitrate over total_line_count.
    self.average_code_length = tf.reduce_mean(code_length)

    if self._optimizer:
      optim_op = self._optimizer.minimize(self.loss,
                                          global_step=self._global_step)
      block_updates = blocks.CreateBlockUpdates()
      if block_updates:
        with tf.get_default_graph().control_dependencies([optim_op]):
          self.train_op = tf.group(*block_updates)
      else:
        self.train_op = optim_op
    else:
      self.train_op = None

  def GetConfigStringForUnitTest(self):
    s = '{\n'
    s += '"layer_depth": 1,\n'
    s += '"layer_count": 8\n'
    s += '}\n'
    return s


@model_factory.RegisterEntropyCoderModel('progressive')
def CreateProgressiveModel():
  return ProgressiveModel()