Commit f5fc733a authored by Byzantine's avatar Byzantine
Browse files

Removing research/community models

parent 09bc9f54
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests of the block operators."""
import numpy as np
import tensorflow as tf
import block_base
import blocks_operator
class AddOneBlock(block_base.BlockBase):
def __init__(self, name=None):
super(AddOneBlock, self).__init__(name)
def _Apply(self, x):
return x + 1.0
class SquareBlock(block_base.BlockBase):
def __init__(self, name=None):
super(SquareBlock, self).__init__(name)
def _Apply(self, x):
return x * x
class BlocksOperatorTest(tf.test.TestCase):
def testComposition(self):
x_value = np.array([[1.0, 2.0, 3.0],
[-1.0, -2.0, -3.0]])
y_expected_value = np.array([[4.0, 9.0, 16.0],
[0.0, 1.0, 4.0]])
x = tf.placeholder(dtype=tf.float32, shape=[2, 3])
complex_block = blocks_operator.CompositionOperator(
[AddOneBlock(),
SquareBlock()])
y = complex_block(x)
with self.test_session():
y_value = y.eval(feed_dict={x: x_value})
self.assertAllClose(y_expected_value, y_value)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Basic blocks for building tensorflow models."""
import numpy as np
import tensorflow as tf
import block_base
import block_util
# pylint does not recognize block_base.BlockBase.__call__().
# pylint: disable=not-callable
def HandleConvPaddingModes(x, padding, kernel_shape, strides):
"""Returns an updated tensor and padding type for REFLECT and SYMMETRIC.
Args:
x: A 4D tensor with shape [batch_size, height, width, depth].
padding: Padding mode (SAME, VALID, REFLECT, or SYMMETRIC).
kernel_shape: Shape of convolution kernel that will be applied.
strides: Convolution stride that will be used.
Returns:
x and padding after adjustments for REFLECT and SYMMETRIC.
"""
# For 1x1 convolution, all padding modes are the same.
if np.all(kernel_shape[:2] == 1):
return x, 'VALID'
if padding == 'REFLECT' or padding == 'SYMMETRIC':
# We manually compute the number of paddings as if 'SAME'.
# From Tensorflow kernel, the formulas are as follows.
# output_shape = ceil(input_shape / strides)
# paddings = (output_shape - 1) * strides + filter_size - input_shape
# Let x, y, s be a shorthand notations for input_shape, output_shape, and
# strides, respectively. Let (x - 1) = sn + r where 0 <= r < s. Note that
# y - 1 = ceil(x / s) - 1 = floor((x - 1) / s) = n
# provided that x > 0. Therefore
# paddings = n * s + filter_size - (sn + r + 1)
# = filter_size - r - 1.
input_shape = x.get_shape() # shape at graph construction time
img_shape = tf.shape(x)[1:3] # image shape (no batch) at run time
remainder = tf.mod(img_shape - 1, strides[1:3])
pad_sizes = kernel_shape[:2] - remainder - 1
pad_rows = pad_sizes[0]
pad_cols = pad_sizes[1]
pad = tf.stack([[0, 0], tf.stack([pad_rows // 2, (pad_rows + 1) // 2]),
tf.stack([pad_cols // 2, (pad_cols + 1) // 2]), [0, 0]])
# Manually pad the input and switch the padding mode to 'VALID'.
x = tf.pad(x, pad, mode=padding)
x.set_shape([input_shape[0], x.get_shape()[1],
x.get_shape()[2], input_shape[3]])
padding = 'VALID'
return x, padding
class PassThrough(block_base.BlockBase):
"""A dummy transform block that does nothing."""
def __init__(self):
# Pass an empty string to disable name scoping.
super(PassThrough, self).__init__(name='')
def _Apply(self, inp):
return inp
@property
def initialized(self):
"""Always returns True."""
return True
class Bias(object):
"""An initialization helper class for BiasAdd block below."""
def __init__(self, value=0):
self.value = value
class BiasAdd(block_base.BlockBase):
"""A tf.nn.bias_add wrapper.
This wrapper may act as a PassThrough block depending on the initializer
provided, to make easier optional bias applications in NN blocks, etc.
See __init__() for the details.
"""
def __init__(self, initializer=Bias(0), name=None):
"""Initializes Bias block.
|initializer| parameter have two special cases.
1. If initializer is None, then this block works as a PassThrough.
2. If initializer is a Bias class object, then tf.constant_initializer is
used with the stored value.
Args:
initializer: An initializer for the bias variable.
name: Name of this block.
"""
super(BiasAdd, self).__init__(name)
with self._BlockScope():
if isinstance(initializer, Bias):
self._initializer = tf.constant_initializer(value=initializer.value)
else:
self._initializer = initializer
self._bias = None
def _Apply(self, x):
if not self._bias:
init = self._initializer([int(x.get_shape()[-1])], x.dtype)
self._bias = self.NewVar(init)
return tf.nn.bias_add(x, self._bias)
def CreateWeightLoss(self):
return []
class LinearBase(block_base.BlockBase):
"""A matmul wrapper.
Returns input * W, where matrix W can be customized through derivation.
"""
def __init__(self, depth, name=None):
super(LinearBase, self).__init__(name)
with self._BlockScope():
self._depth = depth
self._matrix = None
def _CreateKernel(self, shape, dtype):
raise NotImplementedError('This method must be sub-classed.')
def _Apply(self, x):
if not self._matrix:
shape = [int(x.get_shape()[-1]), self._depth]
self._matrix = self._CreateKernel(shape, x.dtype)
return tf.matmul(x, self._matrix)
class Linear(LinearBase):
"""A matmul wrapper.
Returns input * W, where matrix W is learned.
"""
def __init__(self,
depth,
initializer=block_util.RsqrtInitializer(),
name=None):
super(Linear, self).__init__(depth, name)
with self._BlockScope():
self._initializer = initializer
def _CreateKernel(self, shape, dtype):
init = self._initializer(shape, dtype)
return self.NewVar(init)
class NN(block_base.BlockBase):
"""A neural network layer wrapper.
Returns act(input * W + b), where matrix W, bias b are learned, and act is an
optional activation function (i.e., nonlinearity).
This transform block can handle multiple inputs. If x_1, x_2, ..., x_m are
the inputs, then returns act(x_1 * W_1 + ... + x_m * W_m + b).
Attributes:
nunits: The dimension of the output.
"""
def __init__(self,
depth,
bias=Bias(0),
act=None, # e.g., tf.nn.relu
initializer=block_util.RsqrtInitializer(),
linear_block_factory=(lambda d, i: Linear(d, initializer=i)),
name=None):
"""Initializes NN block.
Args:
depth: The depth of the output.
bias: An initializer for the bias, or a Bias class object. If None, there
will be no bias term for this NN block. See BiasAdd block.
act: Optional activation function. If None, no activation is applied.
initializer: The initialization method for the matrix weights.
linear_block_factory: A function used to create a linear block.
name: The name of this block.
"""
super(NN, self).__init__(name)
with self._BlockScope():
self._linear_block_factory = linear_block_factory
self._depth = depth
self._initializer = initializer
self._matrices = None
self._bias = BiasAdd(bias) if bias else PassThrough()
self._act = act if act else PassThrough()
def _Apply(self, *args):
if not self._matrices:
self._matrices = [
self._linear_block_factory(self._depth, self._initializer)
for _ in args]
if len(self._matrices) != len(args):
raise ValueError('{} expected {} inputs, but observed {} inputs'.format(
self.name, len(self._matrices), len(args)))
if len(args) > 1:
y = tf.add_n([m(x) for m, x in zip(self._matrices, args)])
else:
y = self._matrices[0](args[0])
return self._act(self._bias(y))
class Conv2DBase(block_base.BlockBase):
"""A tf.nn.conv2d operator."""
def __init__(self, depth, filter_size, strides, padding,
bias=None, act=None, atrous_rate=None, conv=tf.nn.conv2d,
name=None):
"""Initializes a Conv2DBase block.
Arguments:
depth: The output depth of the block (i.e. #filters); if negative, the
output depth will be set to be the same as the input depth.
filter_size: The size of the 2D filter. If it's specified as an integer,
it's going to create a square filter. Otherwise, this is a tuple
specifying the height x width of the filter.
strides: A tuple specifying the y and x stride.
padding: One of the valid padding modes allowed by tf.nn.conv2d, or
'REFLECT'/'SYMMETRIC' for mirror padding.
bias: An initializer for the bias, or a Bias class object. If None, there
will be no bias in this block. See BiasAdd block.
act: Optional activation function applied to the output.
atrous_rate: optional input rate for ATrous convolution. If not None, this
will be used and the strides will be ignored.
conv: The convolution function to use (e.g. tf.nn.conv2d).
name: The name for this conv2d op.
"""
super(Conv2DBase, self).__init__(name)
with self._BlockScope():
self._act = act if act else PassThrough()
self._bias = BiasAdd(bias) if bias else PassThrough()
self._kernel_shape = np.zeros((4,), dtype=np.int32)
self._kernel_shape[:2] = filter_size
self._kernel_shape[3] = depth
self._strides = np.ones((4,), dtype=np.int32)
self._strides[1:3] = strides
self._strides = list(self._strides)
self._padding = padding
self._kernel = None
self._conv = conv
self._atrous_rate = atrous_rate
def _CreateKernel(self, shape, dtype):
raise NotImplementedError('This method must be sub-classed')
def _Apply(self, x):
"""Apply the self._conv op.
Arguments:
x: input tensor. It needs to be a 4D tensor of the form
[batch, height, width, channels].
Returns:
The output of the convolution of x with the current convolutional
kernel.
Raises:
ValueError: if number of channels is not defined at graph construction.
"""
input_shape = x.get_shape().with_rank(4)
input_shape[3:].assert_is_fully_defined() # channels must be defined
if self._kernel is None:
assert self._kernel_shape[2] == 0, self._kernel_shape
self._kernel_shape[2] = input_shape[3].value
if self._kernel_shape[3] < 0:
# Make output depth be the same as input depth.
self._kernel_shape[3] = self._kernel_shape[2]
self._kernel = self._CreateKernel(self._kernel_shape, x.dtype)
x, padding = HandleConvPaddingModes(
x, self._padding, self._kernel_shape, self._strides)
if self._atrous_rate is None:
x = self._conv(x, self._kernel, strides=self._strides, padding=padding)
else:
x = self._conv(x, self._kernel, rate=self._atrous_rate, padding=padding)
if self._padding != 'VALID':
# Manually update shape. Known shape information can be lost by tf.pad().
height = (1 + (input_shape[1].value - 1) // self._strides[1]
if input_shape[1].value else None)
width = (1 + (input_shape[2].value - 1) // self._strides[2]
if input_shape[2].value else None)
shape = x.get_shape()
x.set_shape([shape[0], height, width, shape[3]])
return self._act(self._bias(x))
class Conv2D(Conv2DBase):
"""A tf.nn.conv2d operator."""
def __init__(self, depth, filter_size, strides, padding,
bias=None, act=None, initializer=None, name=None):
"""Initializes a Conv2D block.
Arguments:
depth: The output depth of the block (i.e., #filters)
filter_size: The size of the 2D filter. If it's specified as an integer,
it's going to create a square filter. Otherwise, this is a tuple
specifying the height x width of the filter.
strides: A tuple specifying the y and x stride.
padding: One of the valid padding modes allowed by tf.nn.conv2d, or
'REFLECT'/'SYMMETRIC' for mirror padding.
bias: An initializer for the bias, or a Bias class object. If None, there
will be no bias in this block. See BiasAdd block.
act: Optional activation function applied to the output.
initializer: Optional initializer for weights.
name: The name for this conv2d op.
"""
super(Conv2D, self).__init__(depth, filter_size, strides, padding, bias,
act, conv=tf.nn.conv2d, name=name)
with self._BlockScope():
if initializer is None:
initializer = block_util.RsqrtInitializer(dims=(0, 1, 2))
self._initializer = initializer
def _CreateKernel(self, shape, dtype):
return self.NewVar(self._initializer(shape, dtype))
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for basic tensorflow blocks_std."""
from __future__ import division
from __future__ import unicode_literals
import math
import os
import numpy as np
from six.moves import xrange
import tensorflow as tf
import blocks_std
def _NumpyConv2D(x, f, strides, padding, rate=1):
assert strides[0] == 1 and strides[3] == 1, strides
if rate > 1:
f_shape = f.shape
expand_f = np.zeros([f_shape[0], ((f_shape[1] - 1) * rate + 1),
f_shape[2], f_shape[3]])
expand_f[:, [y * rate for y in range(f_shape[1])], :, :] = f
f = np.zeros([((f_shape[0] - 1) * rate + 1), expand_f.shape[1],
f_shape[2], f_shape[3]])
f[[y * rate for y in range(f_shape[0])], :, :, :] = expand_f
if padding != 'VALID':
assert x.shape[1] > 0 and x.shape[2] > 0, x.shape
# Compute the number of padded rows and cols.
# See Conv2D block comments for a math explanation.
remainder = ((x.shape[1] - 1) % strides[1], (x.shape[2] - 1) % strides[2])
pad_rows = f.shape[0] - remainder[0] - 1
pad_cols = f.shape[1] - remainder[1] - 1
pad = ((0, 0),
(pad_rows // 2, (pad_rows + 1) // 2),
(pad_cols // 2, (pad_cols + 1) // 2),
(0, 0))
# Pad the input using numpy.pad().
mode = None
if padding == 'SAME':
mode = str('constant')
if padding == 'REFLECT':
mode = str('reflect')
if padding == 'SYMMETRIC':
mode = str('symmetric')
x = np.pad(x, pad, mode=mode)
# Since x is now properly padded, proceed as if padding mode is VALID.
x_window = np.empty(
(x.shape[0],
int(math.ceil((x.shape[1] - f.shape[0] + 1) / strides[1])),
int(math.ceil((x.shape[2] - f.shape[1] + 1) / strides[2])),
np.prod(f.shape[:3])))
# The output at pixel location (i, j) is the result of linear transformation
# applied to the window whose top-left corner is at
# (i * row_stride, j * col_stride).
for i in xrange(x_window.shape[1]):
k = i * strides[1]
for j in xrange(x_window.shape[2]):
l = j * strides[2]
x_window[:, i, j, :] = x[:,
k:(k + f.shape[0]),
l:(l + f.shape[1]),
:].reshape((x_window.shape[0], -1))
y = np.tensordot(x_window, f.reshape((-1, f.shape[3])), axes=1)
return y
class BlocksStdTest(tf.test.TestCase):
def CheckUnary(self, y, op_type):
self.assertEqual(op_type, y.op.type)
self.assertEqual(1, len(y.op.inputs))
return y.op.inputs[0]
def CheckBinary(self, y, op_type):
self.assertEqual(op_type, y.op.type)
self.assertEqual(2, len(y.op.inputs))
return y.op.inputs
def testPassThrough(self):
p = blocks_std.PassThrough()
x = tf.placeholder(dtype=tf.float32, shape=[1])
self.assertIs(p(x), x)
def CheckBiasAdd(self, y, b):
x, u = self.CheckBinary(y, 'BiasAdd')
self.assertIs(u, b._bias.value())
self.assertEqual(x.dtype, u.dtype.base_dtype)
return x
def testBiasAdd(self):
b = blocks_std.BiasAdd()
x = tf.placeholder(dtype=tf.float32, shape=[4, 8])
y = b(x)
self.assertEqual(b._bias.get_shape(), x.get_shape()[-1:])
self.assertIs(x, self.CheckBiasAdd(y, b))
def testBiasRankTest(self):
b = blocks_std.BiasAdd()
x = tf.placeholder(dtype=tf.float32, shape=[10])
with self.assertRaises(ValueError):
b(x)
def CheckLinear(self, y, m):
x, w = self.CheckBinary(y, 'MatMul')
self.assertIs(w, m._matrix.value())
self.assertEqual(x.dtype, w.dtype.base_dtype)
return x
def testLinear(self):
m = blocks_std.Linear(10)
x = tf.placeholder(dtype=tf.float32, shape=[8, 9])
y = m(x)
self.assertEqual(m._matrix.get_shape(), [9, 10])
self.assertIs(x, self.CheckLinear(y, m))
def testLinearShared(self):
# Create a linear map which is applied twice on different inputs
# (i.e. the weights of the map are shared).
linear_map = blocks_std.Linear(6)
x1 = tf.random_normal(shape=[1, 5])
x2 = tf.random_normal(shape=[1, 5])
xs = x1 + x2
# Apply the transform with the same weights.
y1 = linear_map(x1)
y2 = linear_map(x2)
ys = linear_map(xs)
with self.test_session() as sess:
# Initialize all the variables of the graph.
tf.global_variables_initializer().run()
y1_res, y2_res, ys_res = sess.run([y1, y2, ys])
self.assertAllClose(y1_res + y2_res, ys_res)
def CheckNN(self, y, nn, act=None):
if act:
pre_act = self.CheckUnary(y, act)
else:
pre_act = y
if not isinstance(nn._bias, blocks_std.PassThrough):
pre_bias = self.CheckBiasAdd(pre_act, nn._bias)
else:
pre_bias = pre_act
if len(nn._matrices) > 1:
self.assertEqual('AddN', pre_bias.op.type)
pre_bias = pre_bias.op.inputs
else:
pre_bias = [pre_bias]
self.assertEqual(len(pre_bias), len(nn._matrices))
return [self.CheckLinear(u, m) for u, m in zip(pre_bias, nn._matrices)]
def testNNWithoutActWithoutBias(self):
nn = blocks_std.NN(10, act=None, bias=None)
x = tf.placeholder(dtype=tf.float32, shape=[5, 7])
y = nn(x)
self.assertIs(x, self.CheckNN(y, nn)[0])
def testNNWithoutBiasWithAct(self):
nn = blocks_std.NN(10, act=tf.nn.relu, bias=None)
x = tf.placeholder(dtype=tf.float32, shape=[5, 7])
y = nn(x)
self.assertIs(x, self.CheckNN(y, nn, 'Relu')[0])
def testNNWithBiasWithoutAct(self):
nn = blocks_std.NN(10, bias=blocks_std.Bias(0), act=None)
x = tf.placeholder(dtype=tf.float32, shape=[5, 7])
y = nn(x)
self.assertIs(x, self.CheckNN(y, nn)[0])
def testNNWithBiasWithAct(self):
nn = blocks_std.NN(10, bias=blocks_std.Bias(0), act=tf.square)
x = tf.placeholder(dtype=tf.float32, shape=[5, 7])
y = nn(x)
self.assertIs(x, self.CheckNN(y, nn, 'Square')[0])
def testNNMultipleInputs(self):
nn = blocks_std.NN(10, bias=blocks_std.Bias(0), act=tf.tanh)
x = [tf.placeholder(dtype=tf.float32, shape=[5, 7]),
tf.placeholder(dtype=tf.float32, shape=[5, 3]),
tf.placeholder(dtype=tf.float32, shape=[5, 5])]
y = nn(*x)
xs = self.CheckNN(y, nn, 'Tanh')
self.assertEqual(len(x), len(xs))
for u, v in zip(x, xs):
self.assertIs(u, v)
def testConv2DSAME(self):
np.random.seed(142536)
x_shape = [4, 16, 11, 5]
f_shape = [4, 3, 5, 6]
strides = [1, 2, 2, 1]
padding = 'SAME'
conv = blocks_std.Conv2D(depth=f_shape[-1],
filter_size=f_shape[0:2],
strides=strides[1:3],
padding=padding,
act=None,
bias=None)
x_value = np.random.normal(size=x_shape)
x = tf.convert_to_tensor(x_value, dtype=tf.float32)
y = conv(x)
with self.test_session():
tf.global_variables_initializer().run()
f_value = conv._kernel.eval()
y_value = y.eval()
y_expected = _NumpyConv2D(x_value, f_value,
strides=strides, padding=padding)
self.assertAllClose(y_expected, y_value)
def testConv2DValid(self):
np.random.seed(253647)
x_shape = [4, 11, 12, 5]
f_shape = [5, 2, 5, 5]
strides = [1, 2, 2, 1]
padding = 'VALID'
conv = blocks_std.Conv2D(depth=f_shape[-1],
filter_size=f_shape[0:2],
strides=strides[1:3],
padding=padding,
act=None,
bias=None)
x_value = np.random.normal(size=x_shape)
x = tf.convert_to_tensor(x_value, dtype=tf.float32)
y = conv(x)
with self.test_session():
tf.global_variables_initializer().run()
f_value = conv._kernel.eval()
y_value = y.eval()
y_expected = _NumpyConv2D(x_value, f_value,
strides=strides, padding=padding)
self.assertAllClose(y_expected, y_value)
def testConv2DSymmetric(self):
np.random.seed(364758)
x_shape = [4, 10, 12, 6]
f_shape = [3, 4, 6, 5]
strides = [1, 1, 1, 1]
padding = 'SYMMETRIC'
conv = blocks_std.Conv2D(depth=f_shape[-1],
filter_size=f_shape[0:2],
strides=strides[1:3],
padding=padding,
act=None,
bias=None)
x_value = np.random.normal(size=x_shape)
x = tf.convert_to_tensor(x_value, dtype=tf.float32)
y = conv(x)
with self.test_session():
tf.global_variables_initializer().run()
f_value = conv._kernel.eval()
y_value = y.eval()
y_expected = _NumpyConv2D(x_value, f_value,
strides=strides, padding=padding)
self.assertAllClose(y_expected, y_value)
def testConv2DReflect(self):
np.random.seed(768798)
x_shape = [4, 10, 12, 6]
f_shape = [3, 4, 6, 5]
strides = [1, 2, 2, 1]
padding = 'REFLECT'
conv = blocks_std.Conv2D(depth=f_shape[-1],
filter_size=f_shape[0:2],
strides=strides[1:3],
padding=padding,
act=None,
bias=None)
x_value = np.random.normal(size=x_shape)
x = tf.convert_to_tensor(x_value, dtype=tf.float32)
y = conv(x)
with self.test_session():
tf.global_variables_initializer().run()
f_value = conv._kernel.eval()
y_value = y.eval()
y_expected = _NumpyConv2D(x_value, f_value,
strides=strides, padding=padding)
self.assertAllClose(y_expected, y_value)
def testConv2DBias(self):
input_shape = [19, 14, 14, 64]
filter_shape = [3, 7, 64, 128]
strides = [1, 2, 2, 1]
output_shape = [19, 6, 4, 128]
conv = blocks_std.Conv2D(depth=filter_shape[-1],
filter_size=filter_shape[0:2],
strides=strides[1:3],
padding='VALID',
act=None,
bias=blocks_std.Bias(1))
x = tf.placeholder(dtype=tf.float32, shape=input_shape)
y = conv(x)
self.CheckBiasAdd(y, conv._bias)
self.assertEqual(output_shape, y.get_shape().as_list())
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Entropy coder model."""
class EntropyCoderModel(object):
"""Entropy coder model."""
def __init__(self):
# Loss used for training the model.
self.loss = None
# Tensorflow op to run to train the model.
self.train_op = None
# Tensor corresponding to the average code length of the input bit field
# tensor. The average code length is a number of output bits per input bit.
# To get an effective compression, this number should be between 0.0
# and 1.0 (1.0 corresponds to no compression).
self.average_code_length = None
def Initialize(self, global_step, optimizer, config_string):
raise NotImplementedError()
def BuildGraph(self, input_codes):
"""Build the Tensorflow graph corresponding to the entropy coder model.
Args:
input_codes: Tensor of size: batch_size x height x width x bit_depth
corresponding to the codes to compress.
The input codes are {-1, +1} codes.
"""
# TODO:
# - consider switching to {0, 1} codes.
# - consider passing an extra tensor which gives for each (b, y, x)
# what is the actual depth (which would allow to use more or less bits
# for each (y, x) location.
raise NotImplementedError()
def GetConfigStringForUnitTest(self):
"""Returns a default model configuration to be used for unit tests."""
return None
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Entropy coder model registrar."""
class ModelFactory(object):
"""Factory of encoder/decoder models."""
def __init__(self):
self._model_dictionary = dict()
def RegisterModel(self,
entropy_coder_model_name,
entropy_coder_model_factory):
self._model_dictionary[entropy_coder_model_name] = (
entropy_coder_model_factory)
def CreateModel(self, model_name):
current_model_factory = self._model_dictionary[model_name]
return current_model_factory()
def GetAvailableModels(self):
return self._model_dictionary.keys()
_model_registry = ModelFactory()
def GetModelRegistry():
return _model_registry
class RegisterEntropyCoderModel(object):
def __init__(self, model_name):
self._model_name = model_name
def __call__(self, f):
_model_registry.RegisterModel(self._model_name, f)
return f
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Code probability model used for entropy coding."""
import json
from six.moves import xrange
import tensorflow as tf
from entropy_coder.lib import blocks
from entropy_coder.model import entropy_coder_model
from entropy_coder.model import model_factory
# pylint: disable=not-callable
class BrnnPredictor(blocks.BlockBase):
"""BRNN prediction applied on one layer."""
def __init__(self, code_depth, name=None):
super(BrnnPredictor, self).__init__(name)
with self._BlockScope():
hidden_depth = 2 * code_depth
# What is coming from the previous layer/iteration
# is going through a regular Conv2D layer as opposed to the binary codes
# of the current layer/iteration which are going through a masked
# convolution.
self._adaptation0 = blocks.RasterScanConv2D(
hidden_depth, [7, 7], [1, 1], 'SAME',
strict_order=True,
bias=blocks.Bias(0), act=tf.tanh)
self._adaptation1 = blocks.Conv2D(
hidden_depth, [3, 3], [1, 1], 'SAME',
bias=blocks.Bias(0), act=tf.tanh)
self._predictor = blocks.CompositionOperator([
blocks.LineOperator(
blocks.RasterScanConv2DLSTM(
depth=hidden_depth,
filter_size=[1, 3],
hidden_filter_size=[1, 3],
strides=[1, 1],
padding='SAME')),
blocks.Conv2D(hidden_depth, [1, 1], [1, 1], 'SAME',
bias=blocks.Bias(0), act=tf.tanh),
blocks.Conv2D(code_depth, [1, 1], [1, 1], 'SAME',
bias=blocks.Bias(0), act=tf.tanh)
])
def _Apply(self, x, s):
# Code estimation using both:
# - the state from the previous iteration/layer,
# - the binary codes that are before in raster scan order.
h = tf.concat(values=[self._adaptation0(x), self._adaptation1(s)], axis=3)
estimated_codes = self._predictor(h)
return estimated_codes
class LayerPrediction(blocks.BlockBase):
"""Binary code prediction for one layer."""
def __init__(self, layer_count, code_depth, name=None):
super(LayerPrediction, self).__init__(name)
self._layer_count = layer_count
# No previous layer.
self._layer_state = None
self._current_layer = 0
with self._BlockScope():
# Layers used to do the conditional code prediction.
self._brnn_predictors = []
for _ in xrange(layer_count):
self._brnn_predictors.append(BrnnPredictor(code_depth))
# Layers used to generate the input of the LSTM operating on the
# iteration/depth domain.
hidden_depth = 2 * code_depth
self._state_blocks = []
for _ in xrange(layer_count):
self._state_blocks.append(blocks.CompositionOperator([
blocks.Conv2D(
hidden_depth, [3, 3], [1, 1], 'SAME',
bias=blocks.Bias(0), act=tf.tanh),
blocks.Conv2D(
code_depth, [3, 3], [1, 1], 'SAME',
bias=blocks.Bias(0), act=tf.tanh)
]))
# Memory of the RNN is equivalent to the size of 2 layers of binary
# codes.
hidden_depth = 2 * code_depth
self._layer_rnn = blocks.CompositionOperator([
blocks.Conv2DLSTM(
depth=hidden_depth,
filter_size=[1, 1],
hidden_filter_size=[1, 1],
strides=[1, 1],
padding='SAME'),
blocks.Conv2D(hidden_depth, [1, 1], [1, 1], 'SAME',
bias=blocks.Bias(0), act=tf.tanh),
blocks.Conv2D(code_depth, [1, 1], [1, 1], 'SAME',
bias=blocks.Bias(0), act=tf.tanh)
])
def _Apply(self, x):
assert self._current_layer < self._layer_count
# Layer state is set to 0 when there is no previous iteration.
if self._layer_state is None:
self._layer_state = tf.zeros_like(x, dtype=tf.float32)
# Code estimation using both:
# - the state from the previous iteration/layer,
# - the binary codes that are before in raster scan order.
estimated_codes = self._brnn_predictors[self._current_layer](
x, self._layer_state)
# Compute the updated layer state.
h = self._state_blocks[self._current_layer](x)
self._layer_state = self._layer_rnn(h)
self._current_layer += 1
return estimated_codes
class ProgressiveModel(entropy_coder_model.EntropyCoderModel):
"""Progressive BRNN entropy coder model."""
def __init__(self):
super(ProgressiveModel, self).__init__()
def Initialize(self, global_step, optimizer, config_string):
if config_string is None:
raise ValueError('The progressive model requires a configuration.')
config = json.loads(config_string)
if 'coded_layer_count' not in config:
config['coded_layer_count'] = 0
self._config = config
self._optimizer = optimizer
self._global_step = global_step
def BuildGraph(self, input_codes):
"""Build the graph corresponding to the progressive BRNN model."""
layer_depth = self._config['layer_depth']
layer_count = self._config['layer_count']
code_shape = input_codes.get_shape()
code_depth = code_shape[-1].value
if self._config['coded_layer_count'] > 0:
prefix_depth = self._config['coded_layer_count'] * layer_depth
if code_depth < prefix_depth:
raise ValueError('Invalid prefix depth: {} VS {}'.format(
prefix_depth, code_depth))
input_codes = input_codes[:, :, :, :prefix_depth]
code_shape = input_codes.get_shape()
code_depth = code_shape[-1].value
if code_depth % layer_depth != 0:
raise ValueError(
'Code depth must be a multiple of the layer depth: {} vs {}'.format(
code_depth, layer_depth))
code_layer_count = code_depth // layer_depth
if code_layer_count > layer_count:
raise ValueError('Input codes have too many layers: {}, max={}'.format(
code_layer_count, layer_count))
# Block used to estimate binary codes.
layer_prediction = LayerPrediction(layer_count, layer_depth)
# Block used to compute code lengths.
code_length_block = blocks.CodeLength()
# Loop over all the layers.
code_length = []
code_layers = tf.split(
value=input_codes, num_or_size_splits=code_layer_count, axis=3)
for k in xrange(code_layer_count):
x = code_layers[k]
predicted_x = layer_prediction(x)
# Saturate the prediction to avoid infinite code length.
epsilon = 0.001
predicted_x = tf.clip_by_value(
predicted_x, -1 + epsilon, +1 - epsilon)
code_length.append(code_length_block(
blocks.ConvertSignCodeToZeroOneCode(x),
blocks.ConvertSignCodeToZeroOneCode(predicted_x)))
tf.summary.scalar('code_length_layer_{:02d}'.format(k), code_length[-1])
code_length = tf.stack(code_length)
self.loss = tf.reduce_mean(code_length)
tf.summary.scalar('loss', self.loss)
# Loop over all the remaining layers just to make sure they are
# instantiated. Otherwise, loading model params could fail.
dummy_x = tf.zeros_like(code_layers[0])
for _ in xrange(layer_count - code_layer_count):
dummy_predicted_x = layer_prediction(dummy_x)
# Average bitrate over total_line_count.
self.average_code_length = tf.reduce_mean(code_length)
if self._optimizer:
optim_op = self._optimizer.minimize(self.loss,
global_step=self._global_step)
block_updates = blocks.CreateBlockUpdates()
if block_updates:
with tf.get_default_graph().control_dependencies([optim_op]):
self.train_op = tf.group(*block_updates)
else:
self.train_op = optim_op
else:
self.train_op = None
def GetConfigStringForUnitTest(self):
s = '{\n'
s += '"layer_depth": 1,\n'
s += '"layer_count": 8\n'
s += '}\n'
return s
@model_factory.RegisterEntropyCoderModel('progressive')
def CreateProgressiveModel():
return ProgressiveModel()
# Image Compression with Neural Networks
This is a [TensorFlow](http://www.tensorflow.org/) model for compressing and
decompressing images using an already trained Residual GRU model as descibed
in [Full Resolution Image Compression with Recurrent Neural Networks](https://arxiv.org/abs/1608.05148). Please consult the paper for more details
on the architecture and compression results.
This code will allow you to perform the lossy compression on an model
already trained on compression. This code doesn't not currently contain the
Entropy Coding portions of our paper.
## Prerequisites
The only software requirements for running the encoder and decoder is having
Tensorflow installed. You will also need to [download](http://download.tensorflow.org/models/compression_residual_gru-2016-08-23.tar.gz)
and extract the model residual_gru.pb.
If you want to generate the perceptual similarity under MS-SSIM, you will also
need to [Install SciPy](https://www.scipy.org/install.html).
## Encoding
The Residual GRU network is fully convolutional, but requires the images
height and width in pixels by a multiple of 32. There is an image in this folder
called example.png that is 768x1024 if one is needed for testing. We also
rely on TensorFlow's built in decoding ops, which support only PNG and JPEG at
time of release.
To encode an image, simply run the following command:
`python encoder.py --input_image=/your/image/here.png
--output_codes=output_codes.npz --iteration=15
--model=/path/to/model/residual_gru.pb
`
The iteration parameter specifies the lossy-quality to target for compression.
The quality can be [0-15], where 0 corresponds to a target of 1/8 (bits per
pixel) bpp and every increment results in an additional 1/8 bpp.
| Iteration | BPP | Compression Ratio |
|---: |---: |---: |
|0 | 0.125 | 192:1|
|1 | 0.250 | 96:1|
|2 | 0.375 | 64:1|
|3 | 0.500 | 48:1|
|4 | 0.625 | 38.4:1|
|5 | 0.750 | 32:1|
|6 | 0.875 | 27.4:1|
|7 | 1.000 | 24:1|
|8 | 1.125 | 21.3:1|
|9 | 1.250 | 19.2:1|
|10 | 1.375 | 17.4:1|
|11 | 1.500 | 16:1|
|12 | 1.625 | 14.7:1|
|13 | 1.750 | 13.7:1|
|14 | 1.875 | 12.8:1|
|15 | 2.000 | 12:1|
The output_codes file contains the numpy shape and a flattened, bit-packed
array of the codes. These can be inspected in python by using numpy.load().
## Decoding
After generating codes for an image, the lossy reconstructions for that image
can be done as follows:
`python decoder.py --input_codes=codes.npz --output_directory=/tmp/decoded/
--model=residual_gru.pb`
The output_directory will contain images decoded at each quality level.
## Comparing Similarity
One of our primary metrics for comparing how similar two images are
is MS-SSIM.
To generate these metrics on your images you can run:
`python msssim.py --original_image=/path/to/your/image.png
--compared_image=/tmp/decoded/image_15.png`
## Results
CSV results containing the post-entropy bitrates and MS-SSIM over Kodak can
are available for reference. Each row of the CSV represents each of the Kodak
images in their dataset number (1-24). Each column of the CSV represents each
iteration of the model (1-16).
[Post Entropy Bitrates](https://storage.googleapis.com/compression-ml/residual_gru_results/bitrate.csv)
[MS-SSIM](https://storage.googleapis.com/compression-ml/residual_gru_results/msssim.csv)
## FAQ
#### How do I train my own compression network?
We currently don't provide the code to build and train a compression
graph from scratch.
#### I get an InvalidArgumentError: Incompatible shapes.
This is usually due to the fact that our network only supports images that are
both height and width divisible by 32 pixel. Try padding your images to 32
pixel boundaries.
## Contact Info
Model repository maintained by Nick Johnston ([nmjohn](https://github.com/nmjohn)).
#!/usr/bin/python
#
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Neural Network Image Compression Decoder.
Decompress an image from the numpy's npz format generated by the encoder.
Example usage:
python decoder.py --input_codes=output_codes.pkl --iteration=15 \
--output_directory=/tmp/compression_output/ --model=residual_gru.pb
"""
import io
import os
import numpy as np
import tensorflow as tf
tf.flags.DEFINE_string('input_codes', None, 'Location of binary code file.')
tf.flags.DEFINE_integer('iteration', -1, 'The max quality level of '
'the images to output. Use -1 to infer from loaded '
' codes.')
tf.flags.DEFINE_string('output_directory', None, 'Directory to save decoded '
'images.')
tf.flags.DEFINE_string('model', None, 'Location of compression model.')
FLAGS = tf.flags.FLAGS
def get_input_tensor_names():
name_list = ['GruBinarizer/SignBinarizer/Sign:0']
for i in range(1, 16):
name_list.append('GruBinarizer/SignBinarizer/Sign_{}:0'.format(i))
return name_list
def get_output_tensor_names():
return ['loop_{0:02d}/add:0'.format(i) for i in range(0, 16)]
def main(_):
if (FLAGS.input_codes is None or FLAGS.output_directory is None or
FLAGS.model is None):
print('\nUsage: python decoder.py --input_codes=output_codes.pkl '
'--iteration=15 --output_directory=/tmp/compression_output/ '
'--model=residual_gru.pb\n\n')
return
if FLAGS.iteration < -1 or FLAGS.iteration > 15:
print('\n--iteration must be between 0 and 15 inclusive, or -1 to infer '
'from file.\n')
return
iteration = FLAGS.iteration
if not tf.gfile.Exists(FLAGS.output_directory):
tf.gfile.MkDir(FLAGS.output_directory)
if not tf.gfile.Exists(FLAGS.input_codes):
print('\nInput codes not found.\n')
return
contents = ''
with tf.gfile.FastGFile(FLAGS.input_codes, 'rb') as code_file:
contents = code_file.read()
loaded_codes = np.load(io.BytesIO(contents))
assert ['codes', 'shape'] not in loaded_codes.files
loaded_shape = loaded_codes['shape']
loaded_array = loaded_codes['codes']
# Unpack and recover code shapes.
unpacked_codes = np.reshape(np.unpackbits(loaded_array)
[:np.prod(loaded_shape)],
loaded_shape)
numpy_int_codes = np.split(unpacked_codes, len(unpacked_codes))
if iteration == -1:
iteration = len(unpacked_codes) - 1
# Convert back to float and recover scale.
numpy_codes = [np.squeeze(x.astype(np.float32), 0) * 2 - 1 for x in
numpy_int_codes]
with tf.Graph().as_default() as graph:
# Load the inference model for decoding.
with tf.gfile.FastGFile(FLAGS.model, 'rb') as model_file:
graph_def = tf.GraphDef()
graph_def.ParseFromString(model_file.read())
_ = tf.import_graph_def(graph_def, name='')
# For encoding the tensors into PNGs.
input_image = tf.placeholder(tf.uint8)
encoded_image = tf.image.encode_png(input_image)
input_tensors = [graph.get_tensor_by_name(name) for name in
get_input_tensor_names()][0:iteration+1]
outputs = [graph.get_tensor_by_name(name) for name in
get_output_tensor_names()][0:iteration+1]
feed_dict = {key: value for (key, value) in zip(input_tensors,
numpy_codes)}
with tf.Session(graph=graph) as sess:
results = sess.run(outputs, feed_dict=feed_dict)
for index, result in enumerate(results):
img = np.uint8(np.clip(result + 0.5, 0, 255))
img = img.squeeze()
png_img = sess.run(encoded_image, feed_dict={input_image: img})
with tf.gfile.FastGFile(os.path.join(FLAGS.output_directory,
'image_{0:02d}.png'.format(index)),
'w') as output_image:
output_image.write(png_img)
if __name__ == '__main__':
tf.app.run()
#!/usr/bin/python
#
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Neural Network Image Compression Encoder.
Compresses an image to a binarized numpy array. The image must be padded to a
multiple of 32 pixels in height and width.
Example usage:
python encoder.py --input_image=/your/image/here.png \
--output_codes=output_codes.pkl --iteration=15 --model=residual_gru.pb
"""
import io
import os
import numpy as np
import tensorflow as tf
tf.flags.DEFINE_string('input_image', None, 'Location of input image. We rely '
'on tf.image to decode the image, so only PNG and JPEG '
'formats are currently supported.')
tf.flags.DEFINE_integer('iteration', 15, 'Quality level for encoding image. '
'Must be between 0 and 15 inclusive.')
tf.flags.DEFINE_string('output_codes', None, 'File to save output encoding.')
tf.flags.DEFINE_string('model', None, 'Location of compression model.')
FLAGS = tf.flags.FLAGS
def get_output_tensor_names():
name_list = ['GruBinarizer/SignBinarizer/Sign:0']
for i in range(1, 16):
name_list.append('GruBinarizer/SignBinarizer/Sign_{}:0'.format(i))
return name_list
def main(_):
if (FLAGS.input_image is None or FLAGS.output_codes is None or
FLAGS.model is None):
print('\nUsage: python encoder.py --input_image=/your/image/here.png '
'--output_codes=output_codes.pkl --iteration=15 '
'--model=residual_gru.pb\n\n')
return
if FLAGS.iteration < 0 or FLAGS.iteration > 15:
print('\n--iteration must be between 0 and 15 inclusive.\n')
return
with tf.gfile.FastGFile(FLAGS.input_image, 'rb') as input_image:
input_image_str = input_image.read()
with tf.Graph().as_default() as graph:
# Load the inference model for encoding.
with tf.gfile.FastGFile(FLAGS.model, 'rb') as model_file:
graph_def = tf.GraphDef()
graph_def.ParseFromString(model_file.read())
_ = tf.import_graph_def(graph_def, name='')
input_tensor = graph.get_tensor_by_name('Placeholder:0')
outputs = [graph.get_tensor_by_name(name) for name in
get_output_tensor_names()]
input_image = tf.placeholder(tf.string)
_, ext = os.path.splitext(FLAGS.input_image)
if ext == '.png':
decoded_image = tf.image.decode_png(input_image, channels=3)
elif ext == '.jpeg' or ext == '.jpg':
decoded_image = tf.image.decode_jpeg(input_image, channels=3)
else:
assert False, 'Unsupported file format {}'.format(ext)
decoded_image = tf.expand_dims(decoded_image, 0)
with tf.Session(graph=graph) as sess:
img_array = sess.run(decoded_image, feed_dict={input_image:
input_image_str})
results = sess.run(outputs, feed_dict={input_tensor: img_array})
results = results[0:FLAGS.iteration + 1]
int_codes = np.asarray([x.astype(np.int8) for x in results])
# Convert int codes to binary.
int_codes = (int_codes + 1)//2
export = np.packbits(int_codes.reshape(-1))
output = io.BytesIO()
np.savez_compressed(output, shape=int_codes.shape, codes=export)
with tf.gfile.FastGFile(FLAGS.output_codes, 'w') as code_file:
code_file.write(output.getvalue())
if __name__ == '__main__':
tf.app.run()
#!/usr/bin/python
#
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Python implementation of MS-SSIM.
Usage:
python msssim.py --original_image=original.png --compared_image=distorted.png
"""
import numpy as np
from scipy import signal
from scipy.ndimage.filters import convolve
import tensorflow as tf
tf.flags.DEFINE_string('original_image', None, 'Path to PNG image.')
tf.flags.DEFINE_string('compared_image', None, 'Path to PNG image.')
FLAGS = tf.flags.FLAGS
def _FSpecialGauss(size, sigma):
"""Function to mimic the 'fspecial' gaussian MATLAB function."""
radius = size // 2
offset = 0.0
start, stop = -radius, radius + 1
if size % 2 == 0:
offset = 0.5
stop -= 1
x, y = np.mgrid[offset + start:stop, offset + start:stop]
assert len(x) == size
g = np.exp(-((x**2 + y**2)/(2.0 * sigma**2)))
return g / g.sum()
def _SSIMForMultiScale(img1, img2, max_val=255, filter_size=11,
filter_sigma=1.5, k1=0.01, k2=0.03):
"""Return the Structural Similarity Map between `img1` and `img2`.
This function attempts to match the functionality of ssim_index_new.m by
Zhou Wang: http://www.cns.nyu.edu/~lcv/ssim/msssim.zip
Arguments:
img1: Numpy array holding the first RGB image batch.
img2: Numpy array holding the second RGB image batch.
max_val: the dynamic range of the images (i.e., the difference between the
maximum the and minimum allowed values).
filter_size: Size of blur kernel to use (will be reduced for small images).
filter_sigma: Standard deviation for Gaussian blur kernel (will be reduced
for small images).
k1: Constant used to maintain stability in the SSIM calculation (0.01 in
the original paper).
k2: Constant used to maintain stability in the SSIM calculation (0.03 in
the original paper).
Returns:
Pair containing the mean SSIM and contrast sensitivity between `img1` and
`img2`.
Raises:
RuntimeError: If input images don't have the same shape or don't have four
dimensions: [batch_size, height, width, depth].
"""
if img1.shape != img2.shape:
raise RuntimeError('Input images must have the same shape (%s vs. %s).',
img1.shape, img2.shape)
if img1.ndim != 4:
raise RuntimeError('Input images must have four dimensions, not %d',
img1.ndim)
img1 = img1.astype(np.float64)
img2 = img2.astype(np.float64)
_, height, width, _ = img1.shape
# Filter size can't be larger than height or width of images.
size = min(filter_size, height, width)
# Scale down sigma if a smaller filter size is used.
sigma = size * filter_sigma / filter_size if filter_size else 0
if filter_size:
window = np.reshape(_FSpecialGauss(size, sigma), (1, size, size, 1))
mu1 = signal.fftconvolve(img1, window, mode='valid')
mu2 = signal.fftconvolve(img2, window, mode='valid')
sigma11 = signal.fftconvolve(img1 * img1, window, mode='valid')
sigma22 = signal.fftconvolve(img2 * img2, window, mode='valid')
sigma12 = signal.fftconvolve(img1 * img2, window, mode='valid')
else:
# Empty blur kernel so no need to convolve.
mu1, mu2 = img1, img2
sigma11 = img1 * img1
sigma22 = img2 * img2
sigma12 = img1 * img2
mu11 = mu1 * mu1
mu22 = mu2 * mu2
mu12 = mu1 * mu2
sigma11 -= mu11
sigma22 -= mu22
sigma12 -= mu12
# Calculate intermediate values used by both ssim and cs_map.
c1 = (k1 * max_val) ** 2
c2 = (k2 * max_val) ** 2
v1 = 2.0 * sigma12 + c2
v2 = sigma11 + sigma22 + c2
ssim = np.mean((((2.0 * mu12 + c1) * v1) / ((mu11 + mu22 + c1) * v2)))
cs = np.mean(v1 / v2)
return ssim, cs
def MultiScaleSSIM(img1, img2, max_val=255, filter_size=11, filter_sigma=1.5,
k1=0.01, k2=0.03, weights=None):
"""Return the MS-SSIM score between `img1` and `img2`.
This function implements Multi-Scale Structural Similarity (MS-SSIM) Image
Quality Assessment according to Zhou Wang's paper, "Multi-scale structural
similarity for image quality assessment" (2003).
Link: https://ece.uwaterloo.ca/~z70wang/publications/msssim.pdf
Author's MATLAB implementation:
http://www.cns.nyu.edu/~lcv/ssim/msssim.zip
Arguments:
img1: Numpy array holding the first RGB image batch.
img2: Numpy array holding the second RGB image batch.
max_val: the dynamic range of the images (i.e., the difference between the
maximum the and minimum allowed values).
filter_size: Size of blur kernel to use (will be reduced for small images).
filter_sigma: Standard deviation for Gaussian blur kernel (will be reduced
for small images).
k1: Constant used to maintain stability in the SSIM calculation (0.01 in
the original paper).
k2: Constant used to maintain stability in the SSIM calculation (0.03 in
the original paper).
weights: List of weights for each level; if none, use five levels and the
weights from the original paper.
Returns:
MS-SSIM score between `img1` and `img2`.
Raises:
RuntimeError: If input images don't have the same shape or don't have four
dimensions: [batch_size, height, width, depth].
"""
if img1.shape != img2.shape:
raise RuntimeError('Input images must have the same shape (%s vs. %s).',
img1.shape, img2.shape)
if img1.ndim != 4:
raise RuntimeError('Input images must have four dimensions, not %d',
img1.ndim)
# Note: default weights don't sum to 1.0 but do match the paper / matlab code.
weights = np.array(weights if weights else
[0.0448, 0.2856, 0.3001, 0.2363, 0.1333])
levels = weights.size
downsample_filter = np.ones((1, 2, 2, 1)) / 4.0
im1, im2 = [x.astype(np.float64) for x in [img1, img2]]
mssim = np.array([])
mcs = np.array([])
for _ in range(levels):
ssim, cs = _SSIMForMultiScale(
im1, im2, max_val=max_val, filter_size=filter_size,
filter_sigma=filter_sigma, k1=k1, k2=k2)
mssim = np.append(mssim, ssim)
mcs = np.append(mcs, cs)
filtered = [convolve(im, downsample_filter, mode='reflect')
for im in [im1, im2]]
im1, im2 = [x[:, ::2, ::2, :] for x in filtered]
return (np.prod(mcs[0:levels-1] ** weights[0:levels-1]) *
(mssim[levels-1] ** weights[levels-1]))
def main(_):
if FLAGS.original_image is None or FLAGS.compared_image is None:
print('\nUsage: python msssim.py --original_image=original.png '
'--compared_image=distorted.png\n\n')
return
if not tf.gfile.Exists(FLAGS.original_image):
print('\nCannot find --original_image.\n')
return
if not tf.gfile.Exists(FLAGS.compared_image):
print('\nCannot find --compared_image.\n')
return
with tf.gfile.FastGFile(FLAGS.original_image) as image_file:
img1_str = image_file.read('rb')
with tf.gfile.FastGFile(FLAGS.compared_image) as image_file:
img2_str = image_file.read('rb')
input_img = tf.placeholder(tf.string)
decoded_image = tf.expand_dims(tf.image.decode_png(input_img, channels=3), 0)
with tf.Session() as sess:
img1 = sess.run(decoded_image, feed_dict={input_img: img1_str})
img2 = sess.run(decoded_image, feed_dict={input_img: img2_str})
print((MultiScaleSSIM(img1, img2, max_val=255)))
if __name__ == '__main__':
tf.app.run()
# Cross-View Training
This repository contains code for *Semi-Supervised Sequence Modeling with Cross-View Training*. Currently sequence tagging and dependency parsing tasks are supported.
## Requirements
* [Tensorflow](https://www.tensorflow.org/)
* [Numpy](http://www.numpy.org/)
This code has been run with TensorFlow 1.10.1 and Numpy 1.14.5; other versions may work, but have not been tested.
## Fetching and Preprocessing Data
Run `fetch_data.sh` to download and extract pretrained [GloVe](https://nlp.stanford.edu/projects/glove/) vectors, the [1 Billion Word Language Model Benchmark](http://www.statmt.org/lm-benchmark/) corpus of unlabeled data, and the CoNLL-2000 [text chunking](https://www.clips.uantwerpen.be/conll2000/chunking/) dataset. Unfortunately the other datasets from our paper are not freely available and so can't be included in this repository.
To apply CVT to other datasets, the data should be placed in `data/raw_data/<task_name>/(train|dev|test).txt`. For sequence tagging data, each line should contain a word followed by a space followed by that word's tag. Sentences should be separated by empty lines. For dependency parsing, each tag should be of the form ``<index_of_head>-<relation>`` (e.g., `0-root`).
After all of the data has been downloaded, run `preprocessing.py`.
## Training a Model
Run `python cvt.py --mode=train --model_name=chunking_model`. By default this trains a model on the chunking data downloaded with `fetch_data.sh`. To change which task(s) are trained on or model hyperparameters, modify [base/configure.py](base/configure.py). Models are automatically checkpointed every 1000 steps; training will continue from the latest checkpoint if training is interrupted and restarted. Model checkpoints and other data such as dev set accuracy over time are stored in `data/models/<model_name>`.
## Evaluating a Model
Run `python cvt.py --mode=eval --model_name=chunking_model`. A CVT model trained on the chunking data for 200k steps should get at least 97.1 F1 on the dev set and 96.6 F1 on the test set.
## Citation
If you use this code for your publication, please cite the original paper:
```
@inproceedings{clark2018semi,
title = {Semi-Supervised Sequence Modeling with Cross-View Training},
author = {Kevin Clark and Minh-Thang Luong and Christopher D. Manning and Quoc V. Le},
booktitle = {EMNLP},
year = {2018}
}
```
## Contact
* [Kevin Clark](https://cs.stanford.edu/~kevclark/) (@clarkkev).
* [Thang Luong](https://nlp.stanford.edu/~lmthang/) (@lmthang).
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Classes for storing hyperparameters, data locations, etc."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import json
from os.path import join
import tensorflow as tf
class Config(object):
"""Stores everything needed to train a model."""
def __init__(self, **kwargs):
# general
self.data_dir = './data' # top directory for data (corpora, models, etc.)
self.model_name = 'default_model' # name identifying the current model
# mode
self.mode = 'train' # either "train" or "eval"
self.task_names = ['chunk'] # list of tasks this model will learn
# more than one trains a multi-task model
self.is_semisup = True # whether to use CVT or train purely supervised
self.for_preprocessing = False # is this for the preprocessing script
# embeddings
self.pretrained_embeddings = 'glove.6B.300d.txt' # which pretrained
# embeddings to use
self.word_embedding_size = 300 # size of each word embedding
# encoder
self.use_chars = True # whether to include a character-level cnn
self.char_embedding_size = 50 # size of character embeddings
self.char_cnn_filter_widths = [2, 3, 4] # filter widths for the char cnn
self.char_cnn_n_filters = 100 # number of filters for each filter width
self.unidirectional_sizes = [1024] # size of first Bi-LSTM
self.bidirectional_sizes = [512] # size of second Bi-LSTM
self.projection_size = 512 # projections size for LSTMs and hidden layers
# dependency parsing
self.depparse_projection_size = 128 # size of the representations used in
# the bilinear classifier for parsing
# tagging
self.label_encoding = 'BIOES' # label encoding scheme for entity-level
# tagging tasks
self.label_smoothing = 0.1 # label smoothing rate for tagging tasks
# optimization
self.lr = 0.5 # base learning rate
self.momentum = 0.9 # momentum
self.grad_clip = 1.0 # maximum gradient norm during optimization
self.warm_up_steps = 5000.0 # linearly ramp up the lr for this many steps
self.lr_decay = 0.005 # factor for gradually decaying the lr
# EMA
self.ema_decay = 0.998 # EMA coefficient for averaged model weights
self.ema_test = True # whether to use EMA weights at test time
self.ema_teacher = False # whether to use EMA weights for the teacher model
# regularization
self.labeled_keep_prob = 0.5 # 1 - dropout on labeled examples
self.unlabeled_keep_prob = 0.8 # 1 - dropout on unlabeled examples
# sizing
self.max_sentence_length = 100 # maximum length of unlabeled sentences
self.max_word_length = 20 # maximum length of words for char cnn
self.train_batch_size = 64 # train batch size
self.test_batch_size = 64 # test batch size
self.buckets = [(0, 15), (15, 40), (40, 1000)] # buckets for binning
# sentences by length
# training
self.print_every = 25 # how often to print out training progress
self.eval_dev_every = 500 # how often to evaluate on the dev set
self.eval_train_every = 2000 # how often to evaluate on the train set
self.save_model_every = 1000 # how often to checkpoint the model
# data set
self.train_set_percent = 100 # how much of the train set to use
for k, v in kwargs.iteritems():
if k not in self.__dict__:
raise ValueError("Unknown argument", k)
self.__dict__[k] = v
self.dev_set = self.mode == "train" # whether to evaluate on the dev or
# test set
# locations of various data files
self.raw_data_topdir = join(self.data_dir, 'raw_data')
self.unsupervised_data = join(
self.raw_data_topdir,
'unlabeled_data',
'1-billion-word-language-modeling-benchmark-r13output',
'training-monolingual.tokenized.shuffled')
self.pretrained_embeddings_file = join(
self.raw_data_topdir, 'pretrained_embeddings',
self.pretrained_embeddings)
self.preprocessed_data_topdir = join(self.data_dir, 'preprocessed_data')
self.embeddings_dir = join(self.preprocessed_data_topdir,
self.pretrained_embeddings.rsplit('.', 1)[0])
self.word_vocabulary = join(self.embeddings_dir, 'word_vocabulary.pkl')
self.word_embeddings = join(self.embeddings_dir, 'word_embeddings.pkl')
self.model_dir = join(self.data_dir, "models", self.model_name)
self.checkpoints_dir = join(self.model_dir, 'checkpoints')
self.checkpoint = join(self.checkpoints_dir, 'checkpoint.ckpt')
self.best_model_checkpoints_dir = join(
self.model_dir, 'best_model_checkpoints')
self.best_model_checkpoint = join(
self.best_model_checkpoints_dir, 'checkpoint.ckpt')
self.progress = join(self.checkpoints_dir, 'progress.pkl')
self.summaries_dir = join(self.model_dir, 'summaries')
self.history_file = join(self.model_dir, 'history.pkl')
def write(self):
tf.gfile.MakeDirs(self.model_dir)
with open(join(self.model_dir, 'config.json'), 'w') as f:
f.write(json.dumps(self.__dict__, sort_keys=True, indent=4,
separators=(',', ': ')))
# coding=utf-8
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utilities for handling word embeddings."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import re
import numpy as np
import tensorflow as tf
from base import utils
_CHARS = [
# punctuation
'!', '\'', '#', '$', '%', '&', '"', '(', ')', '*', '+', ',', '-', '.',
'/', '\\', '_', '`', '{', '}', '[', ']', '<', '>', ':', ';', '?', '@',
# digits
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
# letters
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
# special characters
'£', '€', '®', '™', '�', '½', '»', '•', '—', '“', '”', '°', '‘', '’'
]
# words not in GloVe that still should have embeddings
_EXTRA_WORDS = [
# common digit patterns
'0/0', '0/00', '00/00', '0/000',
'00/00/00', '0/00/00', '00/00/0000', '0/00/0000',
'00-00', '00-00-00', '0-00-00', '00-00-0000', '0-00-0000', '0000-00-00',
'00-0-00-0', '00000000', '0:00.000', '00:00.000',
'0%', '00%', '00.' '0000.', '0.0bn', '0.0m', '0-', '00-',
# ontonotes uses **f to represent formulas and -amp- instead of amperstands
'**f', '-amp-'
]
SPECIAL_TOKENS = ['<pad>', '<unk>', '<start>', '<end>', '<missing>']
NUM_CHARS = len(_CHARS) + len(SPECIAL_TOKENS)
PAD, UNK, START, END, MISSING = 0, 1, 2, 3, 4
class Vocabulary(collections.OrderedDict):
def __getitem__(self, w):
return self.get(w, UNK)
@utils.Memoize
def get_char_vocab():
characters = _CHARS
for i, special in enumerate(SPECIAL_TOKENS):
characters.insert(i, special)
return Vocabulary({c: i for i, c in enumerate(characters)})
@utils.Memoize
def get_inv_char_vocab():
return {i: c for c, i in get_char_vocab().items()}
def get_word_vocab(config):
return Vocabulary(utils.load_cpickle(config.word_vocabulary))
def get_word_embeddings(config):
return utils.load_cpickle(config.word_embeddings)
@utils.Memoize
def _punctuation_ids(vocab_path):
vocab = Vocabulary(utils.load_cpickle(vocab_path))
return set(i for w, i in vocab.iteritems() if w in [
'!', '...', '``', '{', '}', '(', ')', '[', ']', '--', '-', ',', '.',
"''", '`', ';', ':', '?'])
def get_punctuation_ids(config):
return _punctuation_ids(config.word_vocabulary)
class PretrainedEmbeddingLoader(object):
def __init__(self, config):
self.config = config
self.vocabulary = {}
self.vectors = []
self.vector_size = config.word_embedding_size
def _add_vector(self, w):
if w not in self.vocabulary:
self.vocabulary[w] = len(self.vectors)
self.vectors.append(np.zeros(self.vector_size, dtype='float32'))
def build(self):
utils.log('loading pretrained embeddings from',
self.config.pretrained_embeddings_file)
for special in SPECIAL_TOKENS:
self._add_vector(special)
for extra in _EXTRA_WORDS:
self._add_vector(extra)
with tf.gfile.GFile(
self.config.pretrained_embeddings_file, 'r') as f:
for i, line in enumerate(f):
if i % 10000 == 0:
utils.log('on line', i)
split = line.decode('utf8').split()
w = normalize_word(split[0])
try:
vec = np.array(map(float, split[1:]), dtype='float32')
if vec.size != self.vector_size:
utils.log('vector for line', i, 'has size', vec.size, 'so skipping')
utils.log(line[:100] + '...')
continue
except:
utils.log('can\'t parse line', i, 'so skipping')
utils.log(line[:100] + '...')
continue
if w not in self.vocabulary:
self.vocabulary[w] = len(self.vectors)
self.vectors.append(vec)
utils.log('writing vectors!')
self._write()
def _write(self):
utils.write_cpickle(np.vstack(self.vectors), self.config.word_embeddings)
utils.write_cpickle(self.vocabulary, self.config.word_vocabulary)
def normalize_chars(w):
if w == '-LRB-':
return '('
elif w == '-RRB-':
return ')'
elif w == '-LCB-':
return '{'
elif w == '-RCB-':
return '}'
elif w == '-LSB-':
return '['
elif w == '-RSB-':
return ']'
return w.replace(r'\/', '/').replace(r'\*', '*')
def normalize_word(w):
return re.sub(r'\d', '0', normalize_chars(w).lower())
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Various utilities."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cPickle
import sys
import tensorflow as tf
class Memoize(object):
def __init__(self, f):
self.f = f
self.cache = {}
def __call__(self, *args):
if args not in self.cache:
self.cache[args] = self.f(*args)
return self.cache[args]
def load_cpickle(path, memoized=True):
return _load_cpickle_memoize(path) if memoized else _load_cpickle(path)
def _load_cpickle(path):
with tf.gfile.GFile(path, 'r') as f:
return cPickle.load(f)
@Memoize
def _load_cpickle_memoize(path):
return _load_cpickle(path)
def write_cpickle(o, path):
tf.gfile.MakeDirs(path.rsplit('/', 1)[0])
with tf.gfile.GFile(path, 'w') as f:
cPickle.dump(o, f, -1)
def log(*args):
msg = ' '.join(map(str, args))
sys.stdout.write(msg + '\n')
sys.stdout.flush()
def heading(*args):
log()
log(80 * '=')
log(*args)
log(80 * '=')
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment