Removing research/community models

f5fc733a · Byzantine · 09bc9f54 · 09bc9f54 · 09bc9f54 · 09bc9f54
Commit f5fc733a authored Feb 03, 2022 by Byzantine
20 changed files
--- a/research/attention_ocr/python/data_provider.py
+++ b/research/attention_ocr/python/data_provider.py
-# Copyright 2017 The TensorFlow Authors All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Functions to read, decode and pre-process input data for the Model.
-"""
-import collections
-import functools
-import tensorflow as tf
-from tensorflow.contrib import slim
-
-import inception_preprocessing
-
-# Tuple to store input data endpoints for the Model.
-# It has following fields (tensors):
-#    images: input images,
-#      shape [batch_size x H x W x 3];
-#    labels: ground truth label ids,
-#      shape=[batch_size x seq_length];
-#    labels_one_hot: labels in one-hot encoding,
-#      shape [batch_size x seq_length x num_char_classes];
-InputEndpoints = collections.namedtuple(
-    'InputEndpoints', ['images', 'images_orig', 'labels', 'labels_one_hot'])
-
-# A namedtuple to define a configuration for shuffled batch fetching.
-#   num_batching_threads: A number of parallel threads to fetch data.
-#   queue_capacity: a max number of elements in the batch shuffling queue.
-#   min_after_dequeue: a min number elements in the queue after a dequeue, used
-#     to ensure a level of mixing of elements.
-ShuffleBatchConfig = collections.namedtuple('ShuffleBatchConfig', [
-    'num_batching_threads', 'queue_capacity', 'min_after_dequeue'
-])
-
-DEFAULT_SHUFFLE_CONFIG = ShuffleBatchConfig(
-    num_batching_threads=8, queue_capacity=3000, min_after_dequeue=1000)
-
-
-def augment_image(image):
-  """Augmentation the image with a random modification.
-
-  Args:
-    image: input Tensor image of rank 3, with the last dimension
-           of size 3.
-
-  Returns:
-    Distorted Tensor image of the same shape.
-  """
-  with tf.variable_scope('AugmentImage'):
-    height = image.get_shape().dims[0].value
-    width = image.get_shape().dims[1].value
-
-    # Random crop cut from the street sign image, resized to the same size.
-    # Assures that the crop is covers at least 0.8 area of the input image.
-    bbox_begin, bbox_size, _ = tf.image.sample_distorted_bounding_box(
-        tf.shape(image),
-        bounding_boxes=tf.zeros([0, 0, 4]),
-        min_object_covered=0.8,
-        aspect_ratio_range=[0.8, 1.2],
-        area_range=[0.8, 1.0],
-        use_image_if_no_bounding_boxes=True)
-    distorted_image = tf.slice(image, bbox_begin, bbox_size)
-
-    # Randomly chooses one of the 4 interpolation methods
-    distorted_image = inception_preprocessing.apply_with_random_selector(
-        distorted_image,
-        lambda x, method: tf.image.resize_images(x, [height, width], method),
-        num_cases=4)
-    distorted_image.set_shape([height, width, 3])
-
-    # Color distortion
-    distorted_image = inception_preprocessing.apply_with_random_selector(
-        distorted_image,
-        functools.partial(
-            inception_preprocessing.distort_color, fast_mode=False),
-        num_cases=4)
-    distorted_image = tf.clip_by_value(distorted_image, -1.5, 1.5)
-
-  return distorted_image
-
-
-def central_crop(image, crop_size):
-  """Returns a central crop for the specified size of an image.
-
-  Args:
-    image: A tensor with shape [height, width, channels]
-    crop_size: A tuple (crop_width, crop_height)
-
-  Returns:
-    A tensor of shape [crop_height, crop_width, channels].
-  """
-  with tf.variable_scope('CentralCrop'):
-    target_width, target_height = crop_size
-    image_height, image_width = tf.shape(image)[0], tf.shape(image)[1]
-    assert_op1 = tf.Assert(
-        tf.greater_equal(image_height, target_height),
-        ['image_height < target_height', image_height, target_height])
-    assert_op2 = tf.Assert(
-        tf.greater_equal(image_width, target_width),
-        ['image_width < target_width', image_width, target_width])
-    with tf.control_dependencies([assert_op1, assert_op2]):
-      offset_width = tf.cast((image_width - target_width) / 2, tf.int32)
-      offset_height = tf.cast((image_height - target_height) / 2, tf.int32)
-      return tf.image.crop_to_bounding_box(image, offset_height, offset_width,
-                                           target_height, target_width)
-
-
-def preprocess_image(image, augment=False, central_crop_size=None,
-                     num_towers=4):
-  """Normalizes image to have values in a narrow range around zero.
-
-  Args:
-    image: a [H x W x 3] uint8 tensor.
-    augment: optional, if True do random image distortion.
-    central_crop_size: A tuple (crop_width, crop_height).
-    num_towers: optional, number of shots of the same image in the input image.
-
-  Returns:
-    A float32 tensor of shape [H x W x 3] with RGB values in the required
-    range.
-  """
-  with tf.variable_scope('PreprocessImage'):
-    image = tf.image.convert_image_dtype(image, dtype=tf.float32)
-    if augment or central_crop_size:
-      if num_towers == 1:
-        images = [image]
-      else:
-        images = tf.split(value=image, num_or_size_splits=num_towers, axis=1)
-      if central_crop_size:
-        view_crop_size = (int(central_crop_size[0] / num_towers),
-                          central_crop_size[1])
-        images = [central_crop(img, view_crop_size) for img in images]
-      if augment:
-        images = [augment_image(img) for img in images]
-      image = tf.concat(images, 1)
-
-    image = tf.subtract(image, 0.5)
-    image = tf.multiply(image, 2.5)
-
-  return image
-
-
-def get_data(dataset,
-             batch_size,
-             augment=False,
-             central_crop_size=None,
-             shuffle_config=None,
-             shuffle=True):
-  """Wraps calls to DatasetDataProviders and shuffle_batch.
-
-  For more details about supported Dataset objects refer to datasets/fsns.py.
-
-  Args:
-    dataset: a slim.data.dataset.Dataset object.
-    batch_size: number of samples per batch.
-    augment: optional, if True does random image distortion.
-    central_crop_size: A CharLogittuple (crop_width, crop_height).
-    shuffle_config: A namedtuple ShuffleBatchConfig.
-    shuffle: if True use data shuffling.
-
-  Returns:
-
-  """
-  if not shuffle_config:
-    shuffle_config = DEFAULT_SHUFFLE_CONFIG
-
-  provider = slim.dataset_data_provider.DatasetDataProvider(
-      dataset,
-      shuffle=shuffle,
-      common_queue_capacity=2 * batch_size,
-      common_queue_min=batch_size)
-  image_orig, label = provider.get(['image', 'label'])
-
-  image = preprocess_image(
-      image_orig, augment, central_crop_size, num_towers=dataset.num_of_views)
-  label_one_hot = slim.one_hot_encoding(label, dataset.num_char_classes)
-
-  images, images_orig, labels, labels_one_hot = (tf.train.shuffle_batch(
-      [image, image_orig, label, label_one_hot],
-      batch_size=batch_size,
-      num_threads=shuffle_config.num_batching_threads,
-      capacity=shuffle_config.queue_capacity,
-      min_after_dequeue=shuffle_config.min_after_dequeue))
-
-  return InputEndpoints(
-      images=images,
-      images_orig=images_orig,
-      labels=labels,
-      labels_one_hot=labels_one_hot)
--- a/research/attention_ocr/python/data_provider_test.py
+++ b/research/attention_ocr/python/data_provider_test.py
-# Copyright 2017 The TensorFlow Authors All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for data_provider."""
-
-import numpy as np
-import tensorflow as tf
-from tensorflow.contrib.slim import queues
-
-import datasets
-import data_provider
-
-
-class DataProviderTest(tf.test.TestCase):
-  def setUp(self):
-    tf.test.TestCase.setUp(self)
-
-  def test_preprocessed_image_values_are_in_range(self):
-    image_shape = (5, 4, 3)
-    fake_image = np.random.randint(low=0, high=255, size=image_shape)
-    image_tf = data_provider.preprocess_image(fake_image)
-
-    with self.test_session() as sess:
-      image_np = sess.run(image_tf)
-
-    self.assertEqual(image_np.shape, image_shape)
-    min_value, max_value = np.min(image_np), np.max(image_np)
-    self.assertTrue((-1.28 < min_value) and (min_value < 1.27))
-    self.assertTrue((-1.28 < max_value) and (max_value < 1.27))
-
-  def test_provided_data_has_correct_shape(self):
-    batch_size = 4
-    data = data_provider.get_data(
-        dataset=datasets.fsns_test.get_test_split(),
-        batch_size=batch_size,
-        augment=True,
-        central_crop_size=None)
-
-    with self.test_session() as sess, queues.QueueRunners(sess):
-      images_np, labels_np = sess.run([data.images, data.labels_one_hot])
-
-    self.assertEqual(images_np.shape, (batch_size, 150, 600, 3))
-    self.assertEqual(labels_np.shape, (batch_size, 37, 134))
-
-  def test_optionally_applies_central_crop(self):
-    batch_size = 4
-    data = data_provider.get_data(
-        dataset=datasets.fsns_test.get_test_split(),
-        batch_size=batch_size,
-        augment=True,
-        central_crop_size=(500, 100))
-
-    with self.test_session() as sess, queues.QueueRunners(sess):
-      images_np = sess.run(data.images)
-
-    self.assertEqual(images_np.shape, (batch_size, 100, 500, 3))
-
-
-if __name__ == '__main__':
-  tf.test.main()
--- a/research/attention_ocr/python/datasets/__init__.py
+++ b/research/attention_ocr/python/datasets/__init__.py
-# Copyright 2017 The TensorFlow Authors All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-from datasets import fsns
-from datasets import fsns_test
-
-__all__ = [fsns, fsns_test]
--- a/research/attention_ocr/python/datasets/fsns.py
+++ b/research/attention_ocr/python/datasets/fsns.py
-# -*- coding: utf-8 -*-
-# Copyright 2017 The TensorFlow Authors All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Configuration to read FSNS dataset https://goo.gl/3Ldm8v."""
-
-import os
-import re
-import tensorflow as tf
-from tensorflow.contrib import slim
-import logging
-
-DEFAULT_DATASET_DIR = os.path.join(os.path.dirname(__file__), 'data', 'fsns')
-
-# The dataset configuration, should be used only as a default value.
-DEFAULT_CONFIG = {
-    'name': 'FSNS',
-    'splits': {
-        'train': {
-            'size': 1044868,
-            'pattern': 'train/train*'
-        },
-        'test': {
-            'size': 20404,
-            'pattern': 'test/test*'
-        },
-        'validation': {
-            'size': 16150,
-            'pattern': 'validation/validation*'
-        }
-    },
-    'charset_filename': 'charset_size=134.txt',
-    'image_shape': (150, 600, 3),
-    'num_of_views': 4,
-    'max_sequence_length': 37,
-    'null_code': 133,
-    'items_to_descriptions': {
-        'image': 'A [150 x 600 x 3] color image.',
-        'label': 'Characters codes.',
-        'text': 'A unicode string.',
-        'length': 'A length of the encoded text.',
-        'num_of_views': 'A number of different views stored within the image.'
-    }
-}
-
-
-def read_charset(filename, null_character=u'\u2591'):
-  """Reads a charset definition from a tab separated text file.
-
-  charset file has to have format compatible with the FSNS dataset.
-
-  Args:
-    filename: a path to the charset file.
-    null_character: a unicode character used to replace '<null>' character. the
-      default value is a light shade block '░'.
-
-  Returns:
-    a dictionary with keys equal to character codes and values - unicode
-    characters.
-  """
-  pattern = re.compile(r'(\d+)\t(.+)')
-  charset = {}
-  with tf.gfile.GFile(filename) as f:
-    for i, line in enumerate(f):
-      m = pattern.match(line)
-      if m is None:
-        logging.warning('incorrect charset file. line #%d: %s', i, line)
-        continue
-      code = int(m.group(1))
-      char = m.group(2)
-      if char == '<nul>':
-        char = null_character
-      charset[code] = char
-  return charset
-
-
-class _NumOfViewsHandler(slim.tfexample_decoder.ItemHandler):
-  """Convenience handler to determine number of views stored in an image."""
-
-  def __init__(self, width_key, original_width_key, num_of_views):
-    super(_NumOfViewsHandler, self).__init__([width_key, original_width_key])
-    self._width_key = width_key
-    self._original_width_key = original_width_key
-    self._num_of_views = num_of_views
-
-  def tensors_to_item(self, keys_to_tensors):
-    return tf.to_int64(
-        self._num_of_views * keys_to_tensors[self._original_width_key] /
-        keys_to_tensors[self._width_key])
-
-
-def get_split(split_name, dataset_dir=None, config=None):
-  """Returns a dataset tuple for FSNS dataset.
-
-  Args:
-    split_name: A train/test split name.
-    dataset_dir: The base directory of the dataset sources, by default it uses
-      a predefined CNS path (see DEFAULT_DATASET_DIR).
-    config: A dictionary with dataset configuration. If None - will use the
-      DEFAULT_CONFIG.
-
-  Returns:
-    A `Dataset` namedtuple.
-
-  Raises:
-    ValueError: if `split_name` is not a valid train/test split.
-  """
-  if not dataset_dir:
-    dataset_dir = DEFAULT_DATASET_DIR
-
-  if not config:
-    config = DEFAULT_CONFIG
-
-  if split_name not in config['splits']:
-    raise ValueError('split name %s was not recognized.' % split_name)
-
-  logging.info('Using %s dataset split_name=%s dataset_dir=%s', config['name'],
-               split_name, dataset_dir)
-
-  # Ignores the 'image/height' feature.
-  zero = tf.zeros([1], dtype=tf.int64)
-  keys_to_features = {
-      'image/encoded':
-      tf.FixedLenFeature((), tf.string, default_value=''),
-      'image/format':
-      tf.FixedLenFeature((), tf.string, default_value='png'),
-      'image/width':
-      tf.FixedLenFeature([1], tf.int64, default_value=zero),
-      'image/orig_width':
-      tf.FixedLenFeature([1], tf.int64, default_value=zero),
-      'image/class':
-      tf.FixedLenFeature([config['max_sequence_length']], tf.int64),
-      'image/unpadded_class':
-      tf.VarLenFeature(tf.int64),
-      'image/text':
-      tf.FixedLenFeature([1], tf.string, default_value=''),
-  }
-  items_to_handlers = {
-      'image':
-      slim.tfexample_decoder.Image(
-          shape=config['image_shape'],
-          image_key='image/encoded',
-          format_key='image/format'),
-      'label':
-      slim.tfexample_decoder.Tensor(tensor_key='image/class'),
-      'text':
-      slim.tfexample_decoder.Tensor(tensor_key='image/text'),
-      'num_of_views':
-      _NumOfViewsHandler(
-          width_key='image/width',
-          original_width_key='image/orig_width',
-          num_of_views=config['num_of_views'])
-  }
-  decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features,
-                                                    items_to_handlers)
-  charset_file = os.path.join(dataset_dir, config['charset_filename'])
-  charset = read_charset(charset_file)
-  file_pattern = os.path.join(dataset_dir,
-                              config['splits'][split_name]['pattern'])
-  return slim.dataset.Dataset(
-      data_sources=file_pattern,
-      reader=tf.TFRecordReader,
-      decoder=decoder,
-      num_samples=config['splits'][split_name]['size'],
-      items_to_descriptions=config['items_to_descriptions'],
-      #  additional parameters for convenience.
-      charset=charset,
-      num_char_classes=len(charset),
-      num_of_views=config['num_of_views'],
-      max_sequence_length=config['max_sequence_length'],
-      null_code=config['null_code'])
--- a/research/attention_ocr/python/datasets/fsns_test.py
+++ b/research/attention_ocr/python/datasets/fsns_test.py
-# Copyright 2017 The TensorFlow Authors All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for FSNS datasets module."""
-
-import collections
-import os
-import tensorflow as tf
-from tensorflow.contrib import slim
-
-from datasets import fsns
-from datasets import unittest_utils
-
-FLAGS = tf.flags.FLAGS
-
-
-def get_test_split():
-  config = fsns.DEFAULT_CONFIG.copy()
-  config['splits'] = {'test': {'size': 5, 'pattern': 'fsns-00000-of-00001'}}
-  return fsns.get_split('test', dataset_dir(), config)
-
-
-def dataset_dir():
-  return os.path.join(os.path.dirname(__file__), 'testdata/fsns')
-
-
-class FsnsTest(tf.test.TestCase):
-  def test_decodes_example_proto(self):
-    expected_label = range(37)
-    expected_image, encoded = unittest_utils.create_random_image(
-        'PNG', shape=(150, 600, 3))
-    serialized = unittest_utils.create_serialized_example({
-        'image/encoded': [encoded],
-        'image/format': [b'PNG'],
-        'image/class':
-        expected_label,
-        'image/unpadded_class':
-        range(10),
-        'image/text': [b'Raw text'],
-        'image/orig_width': [150],
-        'image/width': [600]
-    })
-
-    decoder = fsns.get_split('train', dataset_dir()).decoder
-    with self.test_session() as sess:
-      data_tuple = collections.namedtuple('DecodedData', decoder.list_items())
-      data = sess.run(data_tuple(*decoder.decode(serialized)))
-
-    self.assertAllEqual(expected_image, data.image)
-    self.assertAllEqual(expected_label, data.label)
-    self.assertEqual([b'Raw text'], data.text)
-    self.assertEqual([1], data.num_of_views)
-
-  def test_label_has_shape_defined(self):
-    serialized = 'fake'
-    decoder = fsns.get_split('train', dataset_dir()).decoder
-
-    [label_tf] = decoder.decode(serialized, ['label'])
-
-    self.assertEqual(label_tf.get_shape().dims[0], 37)
-
-  def test_dataset_tuple_has_all_extra_attributes(self):
-    dataset = fsns.get_split('train', dataset_dir())
-
-    self.assertTrue(dataset.charset)
-    self.assertTrue(dataset.num_char_classes)
-    self.assertTrue(dataset.num_of_views)
-    self.assertTrue(dataset.max_sequence_length)
-    self.assertTrue(dataset.null_code)
-
-  def test_can_use_the_test_data(self):
-    batch_size = 1
-    dataset = get_test_split()
-    provider = slim.dataset_data_provider.DatasetDataProvider(
-        dataset,
-        shuffle=True,
-        common_queue_capacity=2 * batch_size,
-        common_queue_min=batch_size)
-    image_tf, label_tf = provider.get(['image', 'label'])
-
-    with self.test_session() as sess:
-      sess.run(tf.global_variables_initializer())
-      with slim.queues.QueueRunners(sess):
-        image_np, label_np = sess.run([image_tf, label_tf])
-
-    self.assertEqual((150, 600, 3), image_np.shape)
-    self.assertEqual((37, ), label_np.shape)
-
-
-if __name__ == '__main__':
-  tf.test.main()
--- a/research/attention_ocr/python/datasets/testdata/fsns/charset_size=134.txt
+++ b/research/attention_ocr/python/datasets/testdata/fsns/charset_size=134.txt
-0	 
-133	<nul>
-1	l
-2	’
-3	é
-4	t
-5	e
-6	i
-7	n
-8	s
-9	x
-10	g
-11	u
-12	o
-13	1
-14	8
-15	7
-16	0
-17	-
-18	.
-19	p
-20	a
-21	r
-22	è
-23	d
-24	c
-25	V
-26	v
-27	b
-28	m
-29	)
-30	C
-31	z
-32	S
-33	y
-34	,
-35	k
-36	É
-37	A
-38	h
-39	E
-40	»
-41	D
-42	/
-43	H
-44	M
-45	(
-46	G
-47	P
-48	ç
-2	'
-49	R
-50	f
-51	"
-52	2
-53	j
-54	|
-55	N
-56	6
-57	°
-58	5
-59	T
-60	O
-61	U
-62	3
-63	%
-64	9
-65	q
-66	Z
-67	B
-68	K
-69	w
-70	W
-71	:
-72	4
-73	L
-74	F
-75	]
-76	ï
-2	‘
-77	I
-78	J
-79	ä
-80	î
-81	;
-82	à
-83	ê
-84	X
-85	ü
-86	Y
-87	ô
-88	=
-89	+
-90	\
-91	{
-92	}
-93	_
-94	Q
-95	œ
-96	ñ
-97	*
-98	!
-99	Ü
-51	“
-100	â
-101	Ç
-102	Œ
-103	û
-104	?
-105	$
-106	ë
-107	«
-108	€
-109	&
-110	<
-51	”
-111	æ
-112	#
-113	®
-114	Â
-115	È
-116	>
-117	[
-17	—
-118	Æ
-119	ù
-120	Î
-121	Ô
-122	ÿ
-123	À
-124	Ê
-125	@
-126	Ï
-127	©
-128	Ë
-129	Ù
-130	£
-131	Ÿ
-132	Û
--- a/research/attention_ocr/python/datasets/testdata/fsns/download_data.py
+++ b/research/attention_ocr/python/datasets/testdata/fsns/download_data.py
-import urllib.request
-import tensorflow as tf
-import itertools
-
-URL = 'http://download.tensorflow.org/data/fsns-20160927/testdata/fsns-00000-of-00001'
-DST_ORIG = 'fsns-00000-of-00001.orig'
-DST = 'fsns-00000-of-00001'
-KEEP_NUM_RECORDS = 5
-
-print('Downloading %s ...' % URL)
-urllib.request.urlretrieve(URL, DST_ORIG)
-
-print('Writing %d records from %s to %s ...' % (KEEP_NUM_RECORDS, DST_ORIG, DST))
-with tf.io.TFRecordWriter(DST) as writer:
-    for raw_record in itertools.islice(tf.python_io.tf_record_iterator(DST_ORIG), KEEP_NUM_RECORDS):
-        writer.write(raw_record)
--- a/research/attention_ocr/python/datasets/testdata/fsns/fsns-00000-of-00001
+++ b/research/attention_ocr/python/datasets/testdata/fsns/fsns-00000-of-00001
--- a/research/attention_ocr/python/datasets/testdata/fsns/links.txt
+++ b/research/attention_ocr/python/datasets/testdata/fsns/links.txt
-http://download.tensorflow.org/data/fsns-20160927/testdata/fsns-00000-of-00001
--- a/research/attention_ocr/python/datasets/unittest_utils.py
+++ b/research/attention_ocr/python/datasets/unittest_utils.py
-# Copyright 2017 The TensorFlow Authors All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Functions to make unit testing easier."""
-
-import numpy as np
-import io
-from PIL import Image as PILImage
-import tensorflow as tf
-
-def create_random_image(image_format, shape):
-  """Creates an image with random values.
-
-  Args:
-    image_format: An image format (PNG or JPEG).
-    shape: A tuple with image shape (including channels).
-
-  Returns:
-    A tuple (<numpy ndarray>, <a string with encoded image>)
-  """
-  image = np.random.randint(low=0, high=255, size=shape, dtype='uint8')
-  fd = io.BytesIO()
-  image_pil = PILImage.fromarray(image)
-  image_pil.save(fd, image_format, subsampling=0, quality=100)
-  return image, fd.getvalue()
-
-
-def create_serialized_example(name_to_values):
-  """Creates a tf.Example proto using a dictionary.
-
-  It automatically detects type of values and define a corresponding feature.
-
-  Args:
-    name_to_values: A dictionary.
-
-  Returns:
-    tf.Example proto.
-  """
-  example = tf.train.Example()
-  for name, values in name_to_values.items():
-    feature = example.features.feature[name]
-    if isinstance(values[0], str) or isinstance(values[0], bytes):
-      add = feature.bytes_list.value.extend
-    elif isinstance(values[0], float):
-      add = feature.float32_list.value.extend
-    elif isinstance(values[0], int):
-      add = feature.int64_list.value.extend
-    else:
-      raise AssertionError('Unsupported type: %s' % type(values[0]))
-    add(values)
-  return example.SerializeToString()
--- a/research/attention_ocr/python/datasets/unittest_utils_test.py
+++ b/research/attention_ocr/python/datasets/unittest_utils_test.py
-# Copyright 2017 The TensorFlow Authors All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for unittest_utils."""
-
-import numpy as np
-import io
-from PIL import Image as PILImage
-import tensorflow as tf
-
-from datasets import unittest_utils
-
-
-class UnittestUtilsTest(tf.test.TestCase):
-  def test_creates_an_image_of_specified_shape(self):
-    image, _ = unittest_utils.create_random_image('PNG', (10, 20, 3))
-    self.assertEqual(image.shape, (10, 20, 3))
-
-  def test_encoded_image_corresponds_to_numpy_array(self):
-    image, encoded = unittest_utils.create_random_image('PNG', (20, 10, 3))
-    pil_image = PILImage.open(io.BytesIO(encoded))
-    self.assertAllEqual(image, np.array(pil_image))
-
-  def test_created_example_has_correct_values(self):
-    example_serialized = unittest_utils.create_serialized_example({
-        'labels': [1, 2, 3],
-        'data': [b'FAKE']
-    })
-    example = tf.train.Example()
-    example.ParseFromString(example_serialized)
-    self.assertProtoEquals("""
-      features {
-        feature {
-          key: "labels"
-           value { int64_list {
-             value: 1
-             value: 2
-             value: 3
-           }}
-         }
-         feature {
-           key: "data"
-           value { bytes_list {
-             value: "FAKE"
-           }}
-         }
-      }
-    """, example)
-
-
-if __name__ == '__main__':
-  tf.test.main()
--- a/research/attention_ocr/python/demo_inference.py
+++ b/research/attention_ocr/python/demo_inference.py
-"""A script to run inference on a set of image files.
-
-NOTE #1: The Attention OCR model was trained only using FSNS train dataset and
-it will work only for images which look more or less similar to french street
-names. In order to apply it to images from a different distribution you need
-to retrain (or at least fine-tune) it using images from that distribution.
-
-NOTE #2: This script exists for demo purposes only. It is highly recommended
-to use tools and mechanisms provided by the TensorFlow Serving system to run
-inference on TensorFlow models in production:
-https://www.tensorflow.org/serving/serving_basic
-
-Usage:
-python demo_inference.py --batch_size=32 \
-  --checkpoint=model.ckpt-399731\
-  --image_path_pattern=./datasets/data/fsns/temp/fsns_train_%02d.png
-"""
-import numpy as np
-import PIL.Image
-
-import tensorflow as tf
-from tensorflow.python.platform import flags
-from tensorflow.python.training import monitored_session
-
-import common_flags
-import datasets
-import data_provider
-
-FLAGS = flags.FLAGS
-common_flags.define()
-
-# e.g. ./datasets/data/fsns/temp/fsns_train_%02d.png
-flags.DEFINE_string('image_path_pattern', '',
-                    'A file pattern with a placeholder for the image index.')
-
-
-def get_dataset_image_size(dataset_name):
-  # Ideally this info should be exposed through the dataset interface itself.
-  # But currently it is not available by other means.
-  ds_module = getattr(datasets, dataset_name)
-  height, width, _ = ds_module.DEFAULT_CONFIG['image_shape']
-  return width, height
-
-
-def load_images(file_pattern, batch_size, dataset_name):
-  width, height = get_dataset_image_size(dataset_name)
-  images_actual_data = np.ndarray(shape=(batch_size, height, width, 3),
-                                  dtype='uint8')
-  for i in range(batch_size):
-    path = file_pattern % i
-    print("Reading %s" % path)
-    pil_image = PIL.Image.open(tf.gfile.GFile(path, 'rb'))
-    images_actual_data[i, ...] = np.asarray(pil_image)
-  return images_actual_data
-
-
-def create_model(batch_size, dataset_name):
-  width, height = get_dataset_image_size(dataset_name)
-  dataset = common_flags.create_dataset(split_name=FLAGS.split_name)
-  model = common_flags.create_model(
-    num_char_classes=dataset.num_char_classes,
-    seq_length=dataset.max_sequence_length,
-    num_views=dataset.num_of_views,
-    null_code=dataset.null_code,
-    charset=dataset.charset)
-  raw_images = tf.placeholder(tf.uint8, shape=[batch_size, height, width, 3])
-  images = tf.map_fn(data_provider.preprocess_image, raw_images,
-                     dtype=tf.float32)
-  endpoints = model.create_base(images, labels_one_hot=None)
-  return raw_images, endpoints
-
-
-def run(checkpoint, batch_size, dataset_name, image_path_pattern):
-  images_placeholder, endpoints = create_model(batch_size,
-                                               dataset_name)
-  images_data = load_images(image_path_pattern, batch_size,
-                            dataset_name)
-  session_creator = monitored_session.ChiefSessionCreator(
-    checkpoint_filename_with_path=checkpoint)
-  with monitored_session.MonitoredSession(
-      session_creator=session_creator) as sess:
-    predictions = sess.run(endpoints.predicted_text,
-                           feed_dict={images_placeholder: images_data})
-  return [pr_bytes.decode('utf-8') for pr_bytes in predictions.tolist()]
-
-
-def main(_):
-  print("Predicted strings:")
-  predictions = run(FLAGS.checkpoint, FLAGS.batch_size, FLAGS.dataset_name,
-                  FLAGS.image_path_pattern)
-  for line in predictions:
-    print(line)
-
-
-if __name__ == '__main__':
-  tf.app.run()
--- a/research/attention_ocr/python/demo_inference_test.py
+++ b/research/attention_ocr/python/demo_inference_test.py
-#!/usr/bin/python
-# -*- coding: UTF-8 -*-
-import os
-import demo_inference
-import tensorflow as tf
-from tensorflow.python.training import monitored_session
-
-_CHECKPOINT = 'model.ckpt-399731'
-_CHECKPOINT_URL = 'http://download.tensorflow.org/models/attention_ocr_2017_08_09.tar.gz'
-
-
-class DemoInferenceTest(tf.test.TestCase):
-  def setUp(self):
-    super(DemoInferenceTest, self).setUp()
-    for suffix in ['.meta', '.index', '.data-00000-of-00001']:
-      filename = _CHECKPOINT + suffix
-      self.assertTrue(tf.gfile.Exists(filename),
-                      msg='Missing checkpoint file %s. '
-                          'Please download and extract it from %s' %
-                          (filename, _CHECKPOINT_URL))
-    self._batch_size = 32
-    tf.flags.FLAGS.dataset_dir = os.path.join(os.path.dirname(__file__), 'datasets/testdata/fsns')
-
-  def test_moving_variables_properly_loaded_from_a_checkpoint(self):
-    batch_size = 32
-    dataset_name = 'fsns'
-    images_placeholder, endpoints = demo_inference.create_model(batch_size,
-                                                                dataset_name)
-    image_path_pattern = 'testdata/fsns_train_%02d.png'
-    images_data = demo_inference.load_images(image_path_pattern, batch_size,
-                                             dataset_name)
-    tensor_name = 'AttentionOcr_v1/conv_tower_fn/INCE/InceptionV3/Conv2d_2a_3x3/BatchNorm/moving_mean'
-    moving_mean_tf = tf.get_default_graph().get_tensor_by_name(
-      tensor_name + ':0')
-    reader = tf.train.NewCheckpointReader(_CHECKPOINT)
-    moving_mean_expected = reader.get_tensor(tensor_name)
-
-    session_creator = monitored_session.ChiefSessionCreator(
-      checkpoint_filename_with_path=_CHECKPOINT)
-    with monitored_session.MonitoredSession(
-        session_creator=session_creator) as sess:
-      moving_mean_np = sess.run(moving_mean_tf,
-                                feed_dict={images_placeholder: images_data})
-
-    self.assertAllEqual(moving_mean_expected, moving_mean_np)
-
-  def test_correct_results_on_test_data(self):
-    image_path_pattern = 'testdata/fsns_train_%02d.png'
-    predictions = demo_inference.run(_CHECKPOINT, self._batch_size,
-                                     'fsns',
-                                     image_path_pattern)
-    self.assertEqual([
-      u'Boulevard de Lunel░░░░░░░░░░░░░░░░░░░',
-      'Rue de Provence░░░░░░░░░░░░░░░░░░░░░░',
-      'Rue de Port Maria░░░░░░░░░░░░░░░░░░░░',
-      'Avenue Charles Gounod░░░░░░░░░░░░░░░░',
-      'Rue de l‘Aurore░░░░░░░░░░░░░░░░░░░░░░',
-      'Rue de Beuzeville░░░░░░░░░░░░░░░░░░░░',
-      'Rue d‘Orbey░░░░░░░░░░░░░░░░░░░░░░░░░░',
-      'Rue Victor Schoulcher░░░░░░░░░░░░░░░░',
-      'Rue de la Gare░░░░░░░░░░░░░░░░░░░░░░░',
-      'Rue des Tulipes░░░░░░░░░░░░░░░░░░░░░░',
-      'Rue André Maginot░░░░░░░░░░░░░░░░░░░░',
-      'Route de Pringy░░░░░░░░░░░░░░░░░░░░░░',
-      'Rue des Landelles░░░░░░░░░░░░░░░░░░░░',
-      'Rue des Ilettes░░░░░░░░░░░░░░░░░░░░░░',
-      'Avenue de Maurin░░░░░░░░░░░░░░░░░░░░░',
-      'Rue Théresa░░░░░░░░░░░░░░░░░░░░░░░░░░',  # GT='Rue Thérésa'
-      'Route de la Balme░░░░░░░░░░░░░░░░░░░░',
-      'Rue Hélène Roederer░░░░░░░░░░░░░░░░░░',
-      'Rue Emile Bernard░░░░░░░░░░░░░░░░░░░░',
-      'Place de la Mairie░░░░░░░░░░░░░░░░░░░',
-      'Rue des Perrots░░░░░░░░░░░░░░░░░░░░░░',
-      'Rue de la Libération░░░░░░░░░░░░░░░░░',
-      'Impasse du Capcir░░░░░░░░░░░░░░░░░░░░',
-      'Avenue de la Grand Mare░░░░░░░░░░░░░░',
-      'Rue Pierre Brossolette░░░░░░░░░░░░░░░',
-      'Rue de Provence░░░░░░░░░░░░░░░░░░░░░░',
-      'Rue du Docteur Mourre░░░░░░░░░░░░░░░░',
-      'Rue d‘Ortheuil░░░░░░░░░░░░░░░░░░░░░░░',
-      'Rue des Sarments░░░░░░░░░░░░░░░░░░░░░',
-      'Rue du Centre░░░░░░░░░░░░░░░░░░░░░░░░',
-      'Impasse Pierre Mourgues░░░░░░░░░░░░░░',
-      'Rue Marcel Dassault░░░░░░░░░░░░░░░░░░'
-    ], predictions)
-
-
-if __name__ == '__main__':
-  tf.test.main()
--- a/research/attention_ocr/python/eval.py
+++ b/research/attention_ocr/python/eval.py
-# Copyright 2017 The TensorFlow Authors All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Script to evaluate a trained Attention OCR model.
-
-A simple usage example:
-python eval.py
-"""
-import tensorflow as tf
-from tensorflow.contrib import slim
-from tensorflow import app
-from tensorflow.python.platform import flags
-
-import data_provider
-import common_flags
-
-FLAGS = flags.FLAGS
-common_flags.define()
-
-# yapf: disable
-flags.DEFINE_integer('num_batches', 100,
-                     'Number of batches to run eval for.')
-
-flags.DEFINE_string('eval_log_dir', '/tmp/attention_ocr/eval',
-                    'Directory where the evaluation results are saved to.')
-
-flags.DEFINE_integer('eval_interval_secs', 60,
-                     'Frequency in seconds to run evaluations.')
-
-flags.DEFINE_integer('number_of_steps', None,
-                     'Number of times to run evaluation.')
-# yapf: enable
-
-
-def main(_):
-  if not tf.gfile.Exists(FLAGS.eval_log_dir):
-    tf.gfile.MakeDirs(FLAGS.eval_log_dir)
-
-  dataset = common_flags.create_dataset(split_name=FLAGS.split_name)
-  model = common_flags.create_model(dataset.num_char_classes,
-                                    dataset.max_sequence_length,
-                                    dataset.num_of_views, dataset.null_code)
-  data = data_provider.get_data(
-      dataset,
-      FLAGS.batch_size,
-      augment=False,
-      central_crop_size=common_flags.get_crop_size())
-  endpoints = model.create_base(data.images, labels_one_hot=None)
-  model.create_loss(data, endpoints)
-  eval_ops = model.create_summaries(
-      data, endpoints, dataset.charset, is_training=False)
-  slim.get_or_create_global_step()
-  session_config = tf.ConfigProto(device_count={"GPU": 0})
-  slim.evaluation.evaluation_loop(
-      master=FLAGS.master,
-      checkpoint_dir=FLAGS.train_log_dir,
-      logdir=FLAGS.eval_log_dir,
-      eval_op=eval_ops,
-      num_evals=FLAGS.num_batches,
-      eval_interval_secs=FLAGS.eval_interval_secs,
-      max_number_of_evaluations=FLAGS.number_of_steps,
-      session_config=session_config)
-
-
-if __name__ == '__main__':
-  app.run()
--- a/research/attention_ocr/python/inception_preprocessing.py
+++ b/research/attention_ocr/python/inception_preprocessing.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Provides utilities to preprocess images for the Inception networks."""
-
-# TODO(gorban): add as a dependency, when slim or tensorflow/models are pipfied
-# Source:
-# https://raw.githubusercontent.com/tensorflow/models/a9d0e6e8923a4/slim/preprocessing/inception_preprocessing.py
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow as tf
-
-from tensorflow.python.ops import control_flow_ops
-
-
-def apply_with_random_selector(x, func, num_cases):
-  """Computes func(x, sel), with sel sampled from [0...num_cases-1].
-
-  Args:
-    x: input Tensor.
-    func: Python function to apply.
-    num_cases: Python int32, number of cases to sample sel from.
-
-  Returns:
-    The result of func(x, sel), where func receives the value of the
-    selector as a python integer, but sel is sampled dynamically.
-  """
-  sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32)
-  # Pass the real x only to one of the func calls.
-  return control_flow_ops.merge([
-      func(control_flow_ops.switch(x, tf.equal(sel, case))[1], case)
-      for case in range(num_cases)
-  ])[0]
-
-
-def distort_color(image, color_ordering=0, fast_mode=True, scope=None):
-  """Distort the color of a Tensor image.
-
-  Each color distortion is non-commutative and thus ordering of the color ops
-  matters. Ideally we would randomly permute the ordering of the color ops.
-  Rather than adding that level of complication, we select a distinct ordering
-  of color ops for each preprocessing thread.
-
-  Args:
-    image: 3-D Tensor containing single image in [0, 1].
-    color_ordering: Python int, a type of distortion (valid values: 0-3).
-    fast_mode: Avoids slower ops (random_hue and random_contrast)
-    scope: Optional scope for name_scope.
-  Returns:
-    3-D Tensor color-distorted image on range [0, 1]
-  Raises:
-    ValueError: if color_ordering not in [0, 3]
-  """
-  with tf.name_scope(scope, 'distort_color', [image]):
-    if fast_mode:
-      if color_ordering == 0:
-        image = tf.image.random_brightness(image, max_delta=32. / 255.)
-        image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
-      else:
-        image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
-        image = tf.image.random_brightness(image, max_delta=32. / 255.)
-    else:
-      if color_ordering == 0:
-        image = tf.image.random_brightness(image, max_delta=32. / 255.)
-        image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
-        image = tf.image.random_hue(image, max_delta=0.2)
-        image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
-      elif color_ordering == 1:
-        image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
-        image = tf.image.random_brightness(image, max_delta=32. / 255.)
-        image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
-        image = tf.image.random_hue(image, max_delta=0.2)
-      elif color_ordering == 2:
-        image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
-        image = tf.image.random_hue(image, max_delta=0.2)
-        image = tf.image.random_brightness(image, max_delta=32. / 255.)
-        image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
-      elif color_ordering == 3:
-        image = tf.image.random_hue(image, max_delta=0.2)
-        image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
-        image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
-        image = tf.image.random_brightness(image, max_delta=32. / 255.)
-      else:
-        raise ValueError('color_ordering must be in [0, 3]')
-
-    # The random_* ops do not necessarily clamp.
-    return tf.clip_by_value(image, 0.0, 1.0)
-
-
-def distorted_bounding_box_crop(image,
-                                bbox,
-                                min_object_covered=0.1,
-                                aspect_ratio_range=(0.75, 1.33),
-                                area_range=(0.05, 1.0),
-                                max_attempts=100,
-                                scope=None):
-  """Generates cropped_image using a one of the bboxes randomly distorted.
-
-  See `tf.image.sample_distorted_bounding_box` for more documentation.
-
-  Args:
-    image: 3-D Tensor of image (it will be converted to floats in [0, 1]).
-    bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
-      where each coordinate is [0, 1) and the coordinates are arranged
-      as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the
-      whole image.
-    min_object_covered: An optional `float`. Defaults to `0.1`. The cropped
-      area of the image must contain at least this fraction of any bounding box
-      supplied.
-    aspect_ratio_range: An optional list of `floats`. The cropped area of the
-      image must have an aspect ratio = width / height within this range.
-    area_range: An optional list of `floats`. The cropped area of the image
-      must contain a fraction of the supplied image within in this range.
-    max_attempts: An optional `int`. Number of attempts at generating a cropped
-      region of the image of the specified constraints. After `max_attempts`
-      failures, return the entire image.
-    scope: Optional scope for name_scope.
-  Returns:
-    A tuple, a 3-D Tensor cropped_image and the distorted bbox
-  """
-  with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bbox]):
-    # Each bounding box has shape [1, num_boxes, box coords] and
-    # the coordinates are ordered [ymin, xmin, ymax, xmax].
-
-    # A large fraction of image datasets contain a human-annotated bounding
-    # box delineating the region of the image containing the object of interest.
-    # We choose to create a new bounding box for the object which is a randomly
-    # distorted version of the human-annotated bounding box that obeys an
-    # allowed range of aspect ratios, sizes and overlap with the human-annotated
-    # bounding box. If no box is supplied, then we assume the bounding box is
-    # the entire image.
-    sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box(
-        tf.shape(image),
-        bounding_boxes=bbox,
-        min_object_covered=min_object_covered,
-        aspect_ratio_range=aspect_ratio_range,
-        area_range=area_range,
-        max_attempts=max_attempts,
-        use_image_if_no_bounding_boxes=True)
-    bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box
-
-    # Crop the image to the specified bounding box.
-    cropped_image = tf.slice(image, bbox_begin, bbox_size)
-    return cropped_image, distort_bbox
-
-
-def preprocess_for_train(image,
-                         height,
-                         width,
-                         bbox,
-                         fast_mode=True,
-                         scope=None):
-  """Distort one image for training a network.
-
-  Distorting images provides a useful technique for augmenting the data
-  set during training in order to make the network invariant to aspects
-  of the image that do not effect the label.
-
-  Additionally it would create image_summaries to display the different
-  transformations applied to the image.
-
-  Args:
-    image: 3-D Tensor of image. If dtype is tf.float32 then the range should be
-      [0, 1], otherwise it would converted to tf.float32 assuming that the range
-      is [0, MAX], where MAX is largest positive representable number for
-      int(8/16/32) data type (see `tf.image.convert_image_dtype` for details).
-    height: integer
-    width: integer
-    bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
-      where each coordinate is [0, 1) and the coordinates are arranged
-      as [ymin, xmin, ymax, xmax].
-    fast_mode: Optional boolean, if True avoids slower transformations (i.e.
-      bi-cubic resizing, random_hue or random_contrast).
-    scope: Optional scope for name_scope.
-  Returns:
-    3-D float Tensor of distorted image used for training with range [-1, 1].
-  """
-  with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]):
-    if bbox is None:
-      bbox = tf.constant(
-          [0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4])
-    if image.dtype != tf.float32:
-      image = tf.image.convert_image_dtype(image, dtype=tf.float32)
-    # Each bounding box has shape [1, num_boxes, box coords] and
-    # the coordinates are ordered [ymin, xmin, ymax, xmax].
-    image_with_box = tf.image.draw_bounding_boxes(
-        tf.expand_dims(image, 0), bbox)
-    tf.summary.image('image_with_bounding_boxes', image_with_box)
-
-    distorted_image, distorted_bbox = distorted_bounding_box_crop(image, bbox)
-    # Restore the shape since the dynamic slice based upon the bbox_size loses
-    # the third dimension.
-    distorted_image.set_shape([None, None, 3])
-    image_with_distorted_box = tf.image.draw_bounding_boxes(
-        tf.expand_dims(image, 0), distorted_bbox)
-    tf.summary.image('images_with_distorted_bounding_box',
-                     image_with_distorted_box)
-
-    # This resizing operation may distort the images because the aspect
-    # ratio is not respected. We select a resize method in a round robin
-    # fashion based on the thread number.
-    # Note that ResizeMethod contains 4 enumerated resizing methods.
-
-    # We select only 1 case for fast_mode bilinear.
-    num_resize_cases = 1 if fast_mode else 4
-    distorted_image = apply_with_random_selector(
-        distorted_image,
-        lambda x, method: tf.image.resize_images(x, [height, width], method=method),
-        num_cases=num_resize_cases)
-
-    tf.summary.image('cropped_resized_image',
-                     tf.expand_dims(distorted_image, 0))
-
-    # Randomly flip the image horizontally.
-    distorted_image = tf.image.random_flip_left_right(distorted_image)
-
-    # Randomly distort the colors. There are 4 ways to do it.
-    distorted_image = apply_with_random_selector(
-        distorted_image,
-        lambda x, ordering: distort_color(x, ordering, fast_mode),
-        num_cases=4)
-
-    tf.summary.image('final_distorted_image',
-                     tf.expand_dims(distorted_image, 0))
-    distorted_image = tf.subtract(distorted_image, 0.5)
-    distorted_image = tf.multiply(distorted_image, 2.0)
-    return distorted_image
-
-
-def preprocess_for_eval(image,
-                        height,
-                        width,
-                        central_fraction=0.875,
-                        scope=None):
-  """Prepare one image for evaluation.
-
-  If height and width are specified it would output an image with that size by
-  applying resize_bilinear.
-
-  If central_fraction is specified it would cropt the central fraction of the
-  input image.
-
-  Args:
-    image: 3-D Tensor of image. If dtype is tf.float32 then the range should be
-      [0, 1], otherwise it would converted to tf.float32 assuming that the range
-      is [0, MAX], where MAX is largest positive representable number for
-      int(8/16/32) data type (see `tf.image.convert_image_dtype` for details)
-    height: integer
-    width: integer
-    central_fraction: Optional Float, fraction of the image to crop.
-    scope: Optional scope for name_scope.
-  Returns:
-    3-D float Tensor of prepared image.
-  """
-  with tf.name_scope(scope, 'eval_image', [image, height, width]):
-    if image.dtype != tf.float32:
-      image = tf.image.convert_image_dtype(image, dtype=tf.float32)
-    # Crop the central region of the image with an area containing 87.5% of
-    # the original image.
-    if central_fraction:
-      image = tf.image.central_crop(image, central_fraction=central_fraction)
-
-    if height and width:
-      # Resize the image to the specified height and width.
-      image = tf.expand_dims(image, 0)
-      image = tf.image.resize_bilinear(
-          image, [height, width], align_corners=False)
-      image = tf.squeeze(image, [0])
-    image = tf.subtract(image, 0.5)
-    image = tf.multiply(image, 2.0)
-    return image
-
-
-def preprocess_image(image,
-                     height,
-                     width,
-                     is_training=False,
-                     bbox=None,
-                     fast_mode=True):
-  """Pre-process one image for training or evaluation.
-
-  Args:
-    image: 3-D Tensor [height, width, channels] with the image.
-    height: integer, image expected height.
-    width: integer, image expected width.
-    is_training: Boolean. If true it would transform an image for train,
-      otherwise it would transform it for evaluation.
-    bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
-      where each coordinate is [0, 1) and the coordinates are arranged as
-      [ymin, xmin, ymax, xmax].
-    fast_mode: Optional boolean, if True avoids slower transformations.
-
-  Returns:
-    3-D float Tensor containing an appropriately scaled image
-
-  Raises:
-    ValueError: if user does not provide bounding box
-  """
-  if is_training:
-    return preprocess_for_train(image, height, width, bbox, fast_mode)
-  else:
-    return preprocess_for_eval(image, height, width)
--- a/research/attention_ocr/python/metrics.py
+++ b/research/attention_ocr/python/metrics.py
-# Copyright 2017 The TensorFlow Authors All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Quality metrics for the model."""
-
-import tensorflow as tf
-
-
-def char_accuracy(predictions, targets, rej_char, streaming=False):
-  """Computes character level accuracy.
-
-  Both predictions and targets should have the same shape
-  [batch_size x seq_length].
-
-  Args:
-    predictions: predicted characters ids.
-    targets: ground truth character ids.
-    rej_char: the character id used to mark an empty element (end of sequence).
-    streaming: if True, uses the streaming mean from the slim.metric module.
-
-  Returns:
-    a update_ops for execution and value tensor whose value on evaluation
-    returns the total character accuracy.
-  """
-  with tf.variable_scope('CharAccuracy'):
-    predictions.get_shape().assert_is_compatible_with(targets.get_shape())
-
-    targets = tf.to_int32(targets)
-    const_rej_char = tf.constant(rej_char, shape=targets.get_shape())
-    weights = tf.to_float(tf.not_equal(targets, const_rej_char))
-    correct_chars = tf.to_float(tf.equal(predictions, targets))
-    accuracy_per_example = tf.div(
-        tf.reduce_sum(tf.multiply(correct_chars, weights), 1),
-        tf.reduce_sum(weights, 1))
-    if streaming:
-      return tf.contrib.metrics.streaming_mean(accuracy_per_example)
-    else:
-      return tf.reduce_mean(accuracy_per_example)
-
-
-def sequence_accuracy(predictions, targets, rej_char, streaming=False):
-  """Computes sequence level accuracy.
-
-  Both input tensors should have the same shape: [batch_size x seq_length].
-
-  Args:
-    predictions: predicted character classes.
-    targets: ground truth character classes.
-    rej_char: the character id used to mark empty element (end of sequence).
-    streaming: if True, uses the streaming mean from the slim.metric module.
-
-  Returns:
-    a update_ops for execution and value tensor whose value on evaluation
-    returns the total sequence accuracy.
-  """
-
-  with tf.variable_scope('SequenceAccuracy'):
-    predictions.get_shape().assert_is_compatible_with(targets.get_shape())
-
-    targets = tf.to_int32(targets)
-    const_rej_char = tf.constant(
-        rej_char, shape=targets.get_shape(), dtype=tf.int32)
-    include_mask = tf.not_equal(targets, const_rej_char)
-    include_predictions = tf.to_int32(
-        tf.where(include_mask, predictions,
-                 tf.zeros_like(predictions) + rej_char))
-    correct_chars = tf.to_float(tf.equal(include_predictions, targets))
-    correct_chars_counts = tf.cast(
-        tf.reduce_sum(correct_chars, reduction_indices=[1]), dtype=tf.int32)
-    target_length = targets.get_shape().dims[1].value
-    target_chars_counts = tf.constant(
-        target_length, shape=correct_chars_counts.get_shape())
-    accuracy_per_example = tf.to_float(
-        tf.equal(correct_chars_counts, target_chars_counts))
-    if streaming:
-      return tf.contrib.metrics.streaming_mean(accuracy_per_example)
-    else:
-      return tf.reduce_mean(accuracy_per_example)
--- a/research/attention_ocr/python/metrics_test.py
+++ b/research/attention_ocr/python/metrics_test.py
-# Copyright 2017 The TensorFlow Authors All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for the metrics module."""
-import contextlib
-import numpy as np
-import tensorflow as tf
-
-import metrics
-
-
-class AccuracyTest(tf.test.TestCase):
-  def setUp(self):
-    tf.test.TestCase.setUp(self)
-    self.rng = np.random.RandomState([11, 23, 50])
-    self.num_char_classes = 3
-    self.batch_size = 4
-    self.seq_length = 5
-    self.rej_char = 42
-
-  @contextlib.contextmanager
-  def initialized_session(self):
-    """Wrapper for test session context manager with required initialization.
-
-    Yields:
-      A session object that should be used as a context manager.
-    """
-    with self.cached_session() as sess:
-      sess.run(tf.global_variables_initializer())
-      sess.run(tf.local_variables_initializer())
-      yield sess
-
-  def _fake_labels(self):
-    return self.rng.randint(
-        low=0,
-        high=self.num_char_classes,
-        size=(self.batch_size, self.seq_length),
-        dtype='int32')
-
-  def _incorrect_copy(self, values, bad_indexes):
-    incorrect = np.copy(values)
-    incorrect[bad_indexes] = values[bad_indexes] + 1
-    return incorrect
-
-  def test_sequence_accuracy_identical_samples(self):
-    labels_tf = tf.convert_to_tensor(self._fake_labels())
-
-    accuracy_tf = metrics.sequence_accuracy(labels_tf, labels_tf,
-                                            self.rej_char)
-    with self.initialized_session() as sess:
-      accuracy_np = sess.run(accuracy_tf)
-
-    self.assertAlmostEqual(accuracy_np, 1.0)
-
-  def test_sequence_accuracy_one_char_difference(self):
-    ground_truth_np = self._fake_labels()
-    ground_truth_tf = tf.convert_to_tensor(ground_truth_np)
-    prediction_tf = tf.convert_to_tensor(
-        self._incorrect_copy(ground_truth_np, bad_indexes=((0, 0))))
-
-    accuracy_tf = metrics.sequence_accuracy(prediction_tf, ground_truth_tf,
-                                            self.rej_char)
-    with self.initialized_session() as sess:
-      accuracy_np = sess.run(accuracy_tf)
-
-    # 1 of 4 sequences is incorrect.
-    self.assertAlmostEqual(accuracy_np, 1.0 - 1.0 / self.batch_size)
-
-  def test_char_accuracy_one_char_difference_with_padding(self):
-    ground_truth_np = self._fake_labels()
-    ground_truth_tf = tf.convert_to_tensor(ground_truth_np)
-    prediction_tf = tf.convert_to_tensor(
-        self._incorrect_copy(ground_truth_np, bad_indexes=((0, 0))))
-
-    accuracy_tf = metrics.char_accuracy(prediction_tf, ground_truth_tf,
-                                        self.rej_char)
-    with self.initialized_session() as sess:
-      accuracy_np = sess.run(accuracy_tf)
-
-    chars_count = self.seq_length * self.batch_size
-    self.assertAlmostEqual(accuracy_np, 1.0 - 1.0 / chars_count)
-
-
-if __name__ == '__main__':
-  tf.test.main()
--- a/research/attention_ocr/python/model.py
+++ b/research/attention_ocr/python/model.py
-# Copyright 2017 The TensorFlow Authors All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Functions to build the Attention OCR model.
-
-Usage example:
-  ocr_model = model.Model(num_char_classes, seq_length, num_of_views)
-
-  data = ... # create namedtuple InputEndpoints
-  endpoints = model.create_base(data.images, data.labels_one_hot)
-  # endpoints.predicted_chars is a tensor with predicted character codes.
-  total_loss = model.create_loss(data, endpoints)
-"""
-import sys
-import collections
-import logging
-import tensorflow as tf
-from tensorflow.contrib import slim
-from tensorflow.contrib.slim.nets import inception
-
-import metrics
-import sequence_layers
-import utils
-
-OutputEndpoints = collections.namedtuple('OutputEndpoints', [
-  'chars_logit', 'chars_log_prob', 'predicted_chars', 'predicted_scores',
-  'predicted_text'
-])
-
-# TODO(gorban): replace with tf.HParams when it is released.
-ModelParams = collections.namedtuple('ModelParams', [
-  'num_char_classes', 'seq_length', 'num_views', 'null_code'
-])
-
-ConvTowerParams = collections.namedtuple('ConvTowerParams', ['final_endpoint'])
-
-SequenceLogitsParams = collections.namedtuple('SequenceLogitsParams', [
-  'use_attention', 'use_autoregression', 'num_lstm_units', 'weight_decay',
-  'lstm_state_clip_value'
-])
-
-SequenceLossParams = collections.namedtuple('SequenceLossParams', [
-  'label_smoothing', 'ignore_nulls', 'average_across_timesteps'
-])
-
-EncodeCoordinatesParams = collections.namedtuple('EncodeCoordinatesParams', [
-  'enabled'
-])
-
-
-def _dict_to_array(id_to_char, default_character):
-  num_char_classes = max(id_to_char.keys()) + 1
-  array = [default_character] * num_char_classes
-  for k, v in id_to_char.items():
-    array[k] = v
-  return array
-
-
-class CharsetMapper(object):
-  """A simple class to map tensor ids into strings.
-
-    It works only when the character set is 1:1 mapping between individual
-    characters and individual ids.
-
-    Make sure you call tf.tables_initializer().run() as part of the init op.
-    """
-
-  def __init__(self, charset, default_character='?'):
-    """Creates a lookup table.
-
-    Args:
-      charset: a dictionary with id-to-character mapping.
-    """
-    mapping_strings = tf.constant(_dict_to_array(charset, default_character))
-    self.table = tf.contrib.lookup.index_to_string_table_from_tensor(
-      mapping=mapping_strings, default_value=default_character)
-
-  def get_text(self, ids):
-    """Returns a string corresponding to a sequence of character ids.
-
-        Args:
-          ids: a tensor with shape [batch_size, max_sequence_length]
-        """
-    return tf.reduce_join(
-      self.table.lookup(tf.to_int64(ids)), reduction_indices=1)
-
-
-def get_softmax_loss_fn(label_smoothing):
-  """Returns sparse or dense loss function depending on the label_smoothing.
-
-    Args:
-      label_smoothing: weight for label smoothing
-
-    Returns:
-      a function which takes labels and predictions as arguments and returns
-      a softmax loss for the selected type of labels (sparse or dense).
-    """
-  if label_smoothing > 0:
-
-    def loss_fn(labels, logits):
-      return (tf.nn.softmax_cross_entropy_with_logits(
-        logits=logits, labels=labels))
-  else:
-
-    def loss_fn(labels, logits):
-      return tf.nn.sparse_softmax_cross_entropy_with_logits(
-        logits=logits, labels=labels)
-
-  return loss_fn
-
-
-class Model(object):
-  """Class to create the Attention OCR Model."""
-
-  def __init__(self,
-               num_char_classes,
-               seq_length,
-               num_views,
-               null_code,
-               mparams=None,
-               charset=None):
-    """Initialized model parameters.
-
-    Args:
-      num_char_classes: size of character set.
-      seq_length: number of characters in a sequence.
-      num_views: Number of views (conv towers) to use.
-      null_code: A character code corresponding to a character which
-        indicates end of a sequence.
-      mparams: a dictionary with hyper parameters for methods,  keys -
-        function names, values - corresponding namedtuples.
-      charset: an optional dictionary with a mapping between character ids and
-        utf8 strings. If specified the OutputEndpoints.predicted_text will
-        utf8 encoded strings corresponding to the character ids returned by
-        OutputEndpoints.predicted_chars (by default the predicted_text contains
-        an empty vector). 
-        NOTE: Make sure you call tf.tables_initializer().run() if the charset
-        specified.
-    """
-    super(Model, self).__init__()
-    self._params = ModelParams(
-      num_char_classes=num_char_classes,
-      seq_length=seq_length,
-      num_views=num_views,
-      null_code=null_code)
-    self._mparams = self.default_mparams()
-    if mparams:
-      self._mparams.update(mparams)
-    self._charset = charset
-
-  def default_mparams(self):
-    return {
-      'conv_tower_fn':
-        ConvTowerParams(final_endpoint='Mixed_5d'),
-      'sequence_logit_fn':
-        SequenceLogitsParams(
-          use_attention=True,
-          use_autoregression=True,
-          num_lstm_units=256,
-          weight_decay=0.00004,
-          lstm_state_clip_value=10.0),
-      'sequence_loss_fn':
-        SequenceLossParams(
-          label_smoothing=0.1,
-          ignore_nulls=True,
-          average_across_timesteps=False),
-      'encode_coordinates_fn': EncodeCoordinatesParams(enabled=False)
-    }
-
-  def set_mparam(self, function, **kwargs):
-    self._mparams[function] = self._mparams[function]._replace(**kwargs)
-
-  def conv_tower_fn(self, images, is_training=True, reuse=None):
-    """Computes convolutional features using the InceptionV3 model.
-
-    Args:
-      images: A tensor of shape [batch_size, height, width, channels].
-      is_training: whether is training or not.
-      reuse: whether or not the network and its variables should be reused. To
-        be able to reuse 'scope' must be given.
-
-    Returns:
-      A tensor of shape [batch_size, OH, OW, N], where OWxOH is resolution of
-      output feature map and N is number of output features (depends on the
-      network architecture).
-    """
-    mparams = self._mparams['conv_tower_fn']
-    logging.debug('Using final_endpoint=%s', mparams.final_endpoint)
-    with tf.variable_scope('conv_tower_fn/INCE'):
-      if reuse:
-        tf.get_variable_scope().reuse_variables()
-      with slim.arg_scope(inception.inception_v3_arg_scope()):
-        with slim.arg_scope([slim.batch_norm, slim.dropout],
-                            is_training=is_training):
-          net, _ = inception.inception_v3_base(
-            images, final_endpoint=mparams.final_endpoint)
-      return net
-
-  def _create_lstm_inputs(self, net):
-    """Splits an input tensor into a list of tensors (features).
-
-    Args:
-      net: A feature map of shape [batch_size, num_features, feature_size].
-
-    Raises:
-      AssertionError: if num_features is less than seq_length.
-
-    Returns:
-      A list with seq_length tensors of shape [batch_size, feature_size]
-    """
-    num_features = net.get_shape().dims[1].value
-    if num_features < self._params.seq_length:
-      raise AssertionError('Incorrect dimension #1 of input tensor'
-                           ' %d should be bigger than %d (shape=%s)' %
-                           (num_features, self._params.seq_length,
-                            net.get_shape()))
-    elif num_features > self._params.seq_length:
-      logging.warning('Ignoring some features: use %d of %d (shape=%s)',
-                      self._params.seq_length, num_features, net.get_shape())
-      net = tf.slice(net, [0, 0, 0], [-1, self._params.seq_length, -1])
-
-    return tf.unstack(net, axis=1)
-
-  def sequence_logit_fn(self, net, labels_one_hot):
-    mparams = self._mparams['sequence_logit_fn']
-    # TODO(gorban): remove /alias suffixes from the scopes.
-    with tf.variable_scope('sequence_logit_fn/SQLR'):
-      layer_class = sequence_layers.get_layer_class(mparams.use_attention,
-                                                    mparams.use_autoregression)
-      layer = layer_class(net, labels_one_hot, self._params, mparams)
-      return layer.create_logits()
-
-  def max_pool_views(self, nets_list):
-    """Max pool across all nets in spatial dimensions.
-
-    Args:
-      nets_list: A list of 4D tensors with identical size.
-
-    Returns:
-      A tensor with the same size as any input tensors.
-    """
-    batch_size, height, width, num_features = [
-      d.value for d in nets_list[0].get_shape().dims
-    ]
-    xy_flat_shape = (batch_size, 1, height * width, num_features)
-    nets_for_merge = []
-    with tf.variable_scope('max_pool_views', values=nets_list):
-      for net in nets_list:
-        nets_for_merge.append(tf.reshape(net, xy_flat_shape))
-      merged_net = tf.concat(nets_for_merge, 1)
-      net = slim.max_pool2d(
-        merged_net, kernel_size=[len(nets_list), 1], stride=1)
-      net = tf.reshape(net, (batch_size, height, width, num_features))
-    return net
-
-  def pool_views_fn(self, nets):
-    """Combines output of multiple convolutional towers into a single tensor.
-
-    It stacks towers one on top another (in height dim) in a 4x1 grid.
-    The order is arbitrary design choice and shouldn't matter much.
-
-    Args:
-      nets: list of tensors of shape=[batch_size, height, width, num_features].
-
-    Returns:
-      A tensor of shape [batch_size, seq_length, features_size].
-    """
-    with tf.variable_scope('pool_views_fn/STCK'):
-      net = tf.concat(nets, 1)
-      batch_size = net.get_shape().dims[0].value
-      feature_size = net.get_shape().dims[3].value
-      return tf.reshape(net, [batch_size, -1, feature_size])
-
-  def char_predictions(self, chars_logit):
-    """Returns confidence scores (softmax values) for predicted characters.
-
-    Args:
-      chars_logit: chars logits, a tensor with shape
-        [batch_size x seq_length x num_char_classes]
-
-    Returns:
-      A tuple (ids, log_prob, scores), where:
-        ids - predicted characters, a int32 tensor with shape
-          [batch_size x seq_length];
-        log_prob - a log probability of all characters, a float tensor with
-          shape [batch_size, seq_length, num_char_classes];
-        scores - corresponding confidence scores for characters, a float
-        tensor
-          with shape [batch_size x seq_length].
-    """
-    log_prob = utils.logits_to_log_prob(chars_logit)
-    ids = tf.to_int32(tf.argmax(log_prob, axis=2), name='predicted_chars')
-    mask = tf.cast(
-      slim.one_hot_encoding(ids, self._params.num_char_classes), tf.bool)
-    all_scores = tf.nn.softmax(chars_logit)
-    selected_scores = tf.boolean_mask(all_scores, mask, name='char_scores')
-    scores = tf.reshape(selected_scores, shape=(-1, self._params.seq_length))
-    return ids, log_prob, scores
-
-  def encode_coordinates_fn(self, net):
-    """Adds one-hot encoding of coordinates to different views in the networks.
-
-    For each "pixel" of a feature map it adds a onehot encoded x and y
-    coordinates.
-
-    Args:
-      net: a tensor of shape=[batch_size, height, width, num_features]
-
-    Returns:
-      a tensor with the same height and width, but altered feature_size.
-    """
-    mparams = self._mparams['encode_coordinates_fn']
-    if mparams.enabled:
-      batch_size, h, w, _ = net.shape.as_list()
-      x, y = tf.meshgrid(tf.range(w), tf.range(h))
-      w_loc = slim.one_hot_encoding(x, num_classes=w)
-      h_loc = slim.one_hot_encoding(y, num_classes=h)
-      loc = tf.concat([h_loc, w_loc], 2)
-      loc = tf.tile(tf.expand_dims(loc, 0), [batch_size, 1, 1, 1])
-      return tf.concat([net, loc], 3)
-    else:
-      return net
-
-  def create_base(self,
-                  images,
-                  labels_one_hot,
-                  scope='AttentionOcr_v1',
-                  reuse=None):
-    """Creates a base part of the Model (no gradients, losses or summaries).
-
-    Args:
-      images: A tensor of shape [batch_size, height, width, channels].
-      labels_one_hot: Optional (can be None) one-hot encoding for ground truth
-        labels. If provided the function will create a model for training.
-      scope: Optional variable_scope.
-      reuse: whether or not the network and its variables should be reused. To
-        be able to reuse 'scope' must be given.
-
-    Returns:
-      A named tuple OutputEndpoints.
-    """
-    logging.debug('images: %s', images)
-    is_training = labels_one_hot is not None
-    with tf.variable_scope(scope, reuse=reuse):
-      views = tf.split(
-        value=images, num_or_size_splits=self._params.num_views, axis=2)
-      logging.debug('Views=%d single view: %s', len(views), views[0])
-
-      nets = [
-        self.conv_tower_fn(v, is_training, reuse=(i != 0))
-        for i, v in enumerate(views)
-      ]
-      logging.debug('Conv tower: %s', nets[0])
-
-      nets = [self.encode_coordinates_fn(net) for net in nets]
-      logging.debug('Conv tower w/ encoded coordinates: %s', nets[0])
-
-      net = self.pool_views_fn(nets)
-      logging.debug('Pooled views: %s', net)
-
-      chars_logit = self.sequence_logit_fn(net, labels_one_hot)
-      logging.debug('chars_logit: %s', chars_logit)
-
-      predicted_chars, chars_log_prob, predicted_scores = (
-        self.char_predictions(chars_logit))
-      if self._charset:
-        character_mapper = CharsetMapper(self._charset)
-        predicted_text = character_mapper.get_text(predicted_chars)
-      else:
-        predicted_text = tf.constant([])
-    return OutputEndpoints(
-      chars_logit=chars_logit,
-      chars_log_prob=chars_log_prob,
-      predicted_chars=predicted_chars,
-      predicted_scores=predicted_scores,
-      predicted_text=predicted_text)
-
-  def create_loss(self, data, endpoints):
-    """Creates all losses required to train the model.
-
-    Args:
-      data: InputEndpoints namedtuple.
-      endpoints: Model namedtuple.
-
-    Returns:
-      Total loss.
-    """
-    # NOTE: the return value of ModelLoss is not used directly for the
-    # gradient computation because under the hood it calls slim.losses.AddLoss,
-    # which registers the loss in an internal collection and later returns it
-    # as part of GetTotalLoss. We need to use total loss because model may have
-    # multiple losses including regularization losses.
-    self.sequence_loss_fn(endpoints.chars_logit, data.labels)
-    total_loss = slim.losses.get_total_loss()
-    tf.summary.scalar('TotalLoss', total_loss)
-    return total_loss
-
-  def label_smoothing_regularization(self, chars_labels, weight=0.1):
-    """Applies a label smoothing regularization.
-
-    Uses the same method as in https://arxiv.org/abs/1512.00567.
-
-    Args:
-      chars_labels: ground truth ids of charactes,
-        shape=[batch_size, seq_length];
-      weight: label-smoothing regularization weight.
-
-    Returns:
-      A sensor with the same shape as the input.
-    """
-    one_hot_labels = tf.one_hot(
-      chars_labels, depth=self._params.num_char_classes, axis=-1)
-    pos_weight = 1.0 - weight
-    neg_weight = weight / self._params.num_char_classes
-    return one_hot_labels * pos_weight + neg_weight
-
-  def sequence_loss_fn(self, chars_logits, chars_labels):
-    """Loss function for char sequence.
-
-    Depending on values of hyper parameters it applies label smoothing and can
-    also ignore all null chars after the first one.
-
-    Args:
-      chars_logits: logits for predicted characters,
-        shape=[batch_size, seq_length, num_char_classes];
-      chars_labels: ground truth ids of characters,
-        shape=[batch_size, seq_length];
-      mparams: method hyper parameters.
-
-    Returns:
-      A Tensor with shape [batch_size] - the log-perplexity for each sequence.
-    """
-    mparams = self._mparams['sequence_loss_fn']
-    with tf.variable_scope('sequence_loss_fn/SLF'):
-      if mparams.label_smoothing > 0:
-        smoothed_one_hot_labels = self.label_smoothing_regularization(
-          chars_labels, mparams.label_smoothing)
-        labels_list = tf.unstack(smoothed_one_hot_labels, axis=1)
-      else:
-        # NOTE: in case of sparse softmax we are not using one-hot
-        # encoding.
-        labels_list = tf.unstack(chars_labels, axis=1)
-
-      batch_size, seq_length, _ = chars_logits.shape.as_list()
-      if mparams.ignore_nulls:
-        weights = tf.ones((batch_size, seq_length), dtype=tf.float32)
-      else:
-        # Suppose that reject character is the last in the charset.
-        reject_char = tf.constant(
-          self._params.num_char_classes - 1,
-          shape=(batch_size, seq_length),
-          dtype=tf.int64)
-        known_char = tf.not_equal(chars_labels, reject_char)
-        weights = tf.to_float(known_char)
-
-      logits_list = tf.unstack(chars_logits, axis=1)
-      weights_list = tf.unstack(weights, axis=1)
-      loss = tf.contrib.legacy_seq2seq.sequence_loss(
-        logits_list,
-        labels_list,
-        weights_list,
-        softmax_loss_function=get_softmax_loss_fn(mparams.label_smoothing),
-        average_across_timesteps=mparams.average_across_timesteps)
-      tf.losses.add_loss(loss)
-      return loss
-
-  def create_summaries(self, data, endpoints, charset, is_training):
-    """Creates all summaries for the model.
-
-    Args:
-      data: InputEndpoints namedtuple.
-      endpoints: OutputEndpoints namedtuple.
-      charset: A dictionary with mapping between character codes and
-        unicode characters. Use the one provided by a dataset.charset.
-      is_training: If True will create summary prefixes for training job,
-        otherwise - for evaluation.
-
-    Returns:
-      A list of evaluation ops
-    """
-
-    def sname(label):
-      prefix = 'train' if is_training else 'eval'
-      return '%s/%s' % (prefix, label)
-
-    max_outputs = 4
-    # TODO(gorban): uncomment, when tf.summary.text released.
-    # charset_mapper = CharsetMapper(charset)
-    # pr_text = charset_mapper.get_text(
-    #     endpoints.predicted_chars[:max_outputs,:])
-    # tf.summary.text(sname('text/pr'), pr_text)
-    # gt_text = charset_mapper.get_text(data.labels[:max_outputs,:])
-    # tf.summary.text(sname('text/gt'), gt_text)
-    tf.summary.image(sname('image'), data.images, max_outputs=max_outputs)
-
-    if is_training:
-      tf.summary.image(
-        sname('image/orig'), data.images_orig, max_outputs=max_outputs)
-      for var in tf.trainable_variables():
-        tf.summary.histogram(var.op.name, var)
-      return None
-
-    else:
-      names_to_values = {}
-      names_to_updates = {}
-
-      def use_metric(name, value_update_tuple):
-        names_to_values[name] = value_update_tuple[0]
-        names_to_updates[name] = value_update_tuple[1]
-
-      use_metric('CharacterAccuracy',
-                 metrics.char_accuracy(
-                   endpoints.predicted_chars,
-                   data.labels,
-                   streaming=True,
-                   rej_char=self._params.null_code))
-      # Sequence accuracy computed by cutting sequence at the first null char
-      use_metric('SequenceAccuracy',
-                 metrics.sequence_accuracy(
-                   endpoints.predicted_chars,
-                   data.labels,
-                   streaming=True,
-                   rej_char=self._params.null_code))
-
-      for name, value in names_to_values.items():
-        summary_name = 'eval/' + name
-        tf.summary.scalar(summary_name, tf.Print(value, [value], summary_name))
-      return list(names_to_updates.values())
-
-  def create_init_fn_to_restore(self, master_checkpoint,
-                                inception_checkpoint=None):
-    """Creates an init operations to restore weights from various checkpoints.
-
-    Args:
-      master_checkpoint: path to a checkpoint which contains all weights for
-        the whole model.
-      inception_checkpoint: path to a checkpoint which contains weights for the
-        inception part only.
-
-    Returns:
-      a function to run initialization ops.
-    """
-    all_assign_ops = []
-    all_feed_dict = {}
-
-    def assign_from_checkpoint(variables, checkpoint):
-      logging.info('Request to re-store %d weights from %s',
-                   len(variables), checkpoint)
-      if not variables:
-        logging.error('Can\'t find any variables to restore.')
-        sys.exit(1)
-      assign_op, feed_dict = slim.assign_from_checkpoint(checkpoint, variables)
-      all_assign_ops.append(assign_op)
-      all_feed_dict.update(feed_dict)
-
-    logging.info('variables_to_restore:\n%s' % utils.variables_to_restore().keys())
-    logging.info('moving_average_variables:\n%s' % [v.op.name for v in tf.moving_average_variables()])
-    logging.info('trainable_variables:\n%s' % [v.op.name for v in tf.trainable_variables()])
-    if master_checkpoint:
-      assign_from_checkpoint(utils.variables_to_restore(), master_checkpoint)
-
-    if inception_checkpoint:
-      variables = utils.variables_to_restore(
-        'AttentionOcr_v1/conv_tower_fn/INCE', strip_scope=True)
-      assign_from_checkpoint(variables, inception_checkpoint)
-
-    def init_assign_fn(sess):
-      logging.info('Restoring checkpoint(s)')
-      sess.run(all_assign_ops, all_feed_dict)
-
-    return init_assign_fn
--- a/research/attention_ocr/python/model_test.py
+++ b/research/attention_ocr/python/model_test.py
-# Copyright 2017 The TensorFlow Authors All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for the model."""
-
-import numpy as np
-import string
-import tensorflow as tf
-from tensorflow.contrib import slim
-
-import model
-import data_provider
-
-
-def create_fake_charset(num_char_classes):
-  charset = {}
-  for i in range(num_char_classes):
-    charset[i] = string.printable[i % len(string.printable)]
-  return charset
-
-
-class ModelTest(tf.test.TestCase):
-  def setUp(self):
-    tf.test.TestCase.setUp(self)
-
-    self.rng = np.random.RandomState([11, 23, 50])
-
-    self.batch_size = 4
-    self.image_width = 600
-    self.image_height = 30
-    self.seq_length = 40
-    self.num_char_classes = 72
-    self.null_code = 62
-    self.num_views = 4
-
-    feature_size = 288
-    self.conv_tower_shape = (self.batch_size, 1, 72, feature_size)
-    self.features_shape = (self.batch_size, self.seq_length, feature_size)
-    self.chars_logit_shape = (self.batch_size, self.seq_length,
-                              self.num_char_classes)
-    self.length_logit_shape = (self.batch_size, self.seq_length + 1)
-
-    self.initialize_fakes()
-
-  def initialize_fakes(self):
-    self.images_shape = (self.batch_size, self.image_height, self.image_width,
-                         3)
-    self.fake_images = tf.constant(
-        self.rng.randint(low=0, high=255,
-                         size=self.images_shape).astype('float32'),
-        name='input_node')
-    self.fake_conv_tower_np = self.rng.randn(
-        *self.conv_tower_shape).astype('float32')
-    self.fake_conv_tower = tf.constant(self.fake_conv_tower_np)
-    self.fake_logits = tf.constant(
-        self.rng.randn(*self.chars_logit_shape).astype('float32'))
-    self.fake_labels = tf.constant(
-        self.rng.randint(
-            low=0,
-            high=self.num_char_classes,
-            size=(self.batch_size, self.seq_length)).astype('int64'))
-
-  def create_model(self, charset=None):
-    return model.Model(
-        self.num_char_classes, self.seq_length, num_views=4, null_code=62,
-        charset=charset)
-
-  def test_char_related_shapes(self):
-    ocr_model = self.create_model()
-    with self.test_session() as sess:
-      endpoints_tf = ocr_model.create_base(
-          images=self.fake_images, labels_one_hot=None)
-
-      sess.run(tf.global_variables_initializer())
-      endpoints = sess.run(endpoints_tf)
-
-      self.assertEqual((self.batch_size, self.seq_length,
-                        self.num_char_classes), endpoints.chars_logit.shape)
-      self.assertEqual((self.batch_size, self.seq_length,
-                        self.num_char_classes), endpoints.chars_log_prob.shape)
-      self.assertEqual((self.batch_size, self.seq_length),
-                       endpoints.predicted_chars.shape)
-      self.assertEqual((self.batch_size, self.seq_length),
-                       endpoints.predicted_scores.shape)
-
-  def test_predicted_scores_are_within_range(self):
-    ocr_model = self.create_model()
-
-    _, _, scores = ocr_model.char_predictions(self.fake_logits)
-    with self.test_session() as sess:
-      scores_np = sess.run(scores)
-
-    values_in_range = (scores_np >= 0.0) & (scores_np <= 1.0)
-    self.assertTrue(
-        np.all(values_in_range),
-        msg=('Scores contains out of the range values %s' %
-             scores_np[np.logical_not(values_in_range)]))
-
-  def test_conv_tower_shape(self):
-    with self.test_session() as sess:
-      ocr_model = self.create_model()
-      conv_tower = ocr_model.conv_tower_fn(self.fake_images)
-
-      sess.run(tf.global_variables_initializer())
-      conv_tower_np = sess.run(conv_tower)
-
-      self.assertEqual(self.conv_tower_shape, conv_tower_np.shape)
-
-  def test_model_size_less_then1_gb(self):
-    # NOTE: Actual amount of memory occupied my TF during training will be at
-    # least 4X times bigger because of space need to store original weights,
-    # updates, gradients and variances. It also depends on the type of used
-    # optimizer.
-    ocr_model = self.create_model()
-    ocr_model.create_base(images=self.fake_images, labels_one_hot=None)
-    with self.test_session() as sess:
-      tfprof_root = tf.profiler.profile(
-          sess.graph,
-          options=tf.profiler.ProfileOptionBuilder.trainable_variables_parameter())
-
-      model_size_bytes = 4 * tfprof_root.total_parameters
-      self.assertLess(model_size_bytes, 1 * 2**30)
-
-  def test_create_summaries_is_runnable(self):
-    ocr_model = self.create_model()
-    data = data_provider.InputEndpoints(
-        images=self.fake_images,
-        images_orig=self.fake_images,
-        labels=self.fake_labels,
-        labels_one_hot=slim.one_hot_encoding(self.fake_labels,
-                                             self.num_char_classes))
-    endpoints = ocr_model.create_base(
-        images=self.fake_images, labels_one_hot=None)
-    charset = create_fake_charset(self.num_char_classes)
-    summaries = ocr_model.create_summaries(
-        data, endpoints, charset, is_training=False)
-    with self.test_session() as sess:
-      sess.run(tf.global_variables_initializer())
-      sess.run(tf.local_variables_initializer())
-      tf.tables_initializer().run()
-      sess.run(summaries)  # just check it is runnable
-
-  def test_sequence_loss_function_without_label_smoothing(self):
-    model = self.create_model()
-    model.set_mparam('sequence_loss_fn', label_smoothing=0)
-
-    loss = model.sequence_loss_fn(self.fake_logits, self.fake_labels)
-    with self.test_session() as sess:
-      loss_np = sess.run(loss)
-
-    # This test checks that the loss function is 'runnable'.
-    self.assertEqual(loss_np.shape, tuple())
-
-  def encode_coordinates_alt(self, net):
-    """An alternative implemenation for the encoding coordinates.
-
-    Args:
-      net: a tensor of shape=[batch_size, height, width, num_features]
-
-    Returns:
-      a list of tensors with encoded image coordinates in them.
-    """
-    batch_size, h, w, _ = net.shape.as_list()
-    h_loc = [
-      tf.tile(
-          tf.reshape(
-              tf.contrib.layers.one_hot_encoding(
-                  tf.constant([i]), num_classes=h), [h, 1]), [1, w])
-      for i in range(h)
-    ]
-    h_loc = tf.concat([tf.expand_dims(t, 2) for t in h_loc], 2)
-    w_loc = [
-      tf.tile(
-          tf.contrib.layers.one_hot_encoding(tf.constant([i]), num_classes=w),
-          [h, 1]) for i in range(w)
-    ]
-    w_loc = tf.concat([tf.expand_dims(t, 2) for t in w_loc], 2)
-    loc = tf.concat([h_loc, w_loc], 2)
-    loc = tf.tile(tf.expand_dims(loc, 0), [batch_size, 1, 1, 1])
-    return tf.concat([net, loc], 3)
-
-  def test_encoded_coordinates_have_correct_shape(self):
-    model = self.create_model()
-    model.set_mparam('encode_coordinates_fn', enabled=True)
-    conv_w_coords_tf = model.encode_coordinates_fn(self.fake_conv_tower)
-
-    with self.test_session() as sess:
-      conv_w_coords = sess.run(conv_w_coords_tf)
-
-    batch_size, height, width, feature_size = self.conv_tower_shape
-    self.assertEqual(conv_w_coords.shape, (batch_size, height, width,
-                                           feature_size + height + width))
-
-  def test_disabled_coordinate_encoding_returns_features_unchanged(self):
-    model = self.create_model()
-    model.set_mparam('encode_coordinates_fn', enabled=False)
-    conv_w_coords_tf = model.encode_coordinates_fn(self.fake_conv_tower)
-
-    with self.test_session() as sess:
-      conv_w_coords = sess.run(conv_w_coords_tf)
-
-    self.assertAllEqual(conv_w_coords, self.fake_conv_tower_np)
-
-  def test_coordinate_encoding_is_correct_for_simple_example(self):
-    shape = (1, 2, 3, 4)  # batch_size, height, width, feature_size
-    fake_conv_tower = tf.constant(2 * np.ones(shape), dtype=tf.float32)
-    model = self.create_model()
-    model.set_mparam('encode_coordinates_fn', enabled=True)
-    conv_w_coords_tf = model.encode_coordinates_fn(fake_conv_tower)
-
-    with self.test_session() as sess:
-      conv_w_coords = sess.run(conv_w_coords_tf)
-
-    # Original features
-    self.assertAllEqual(conv_w_coords[0, :, :, :4],
-                        [[[2, 2, 2, 2], [2, 2, 2, 2], [2, 2, 2, 2]],
-                         [[2, 2, 2, 2], [2, 2, 2, 2], [2, 2, 2, 2]]])
-    # Encoded coordinates
-    self.assertAllEqual(conv_w_coords[0, :, :, 4:],
-                        [[[1, 0, 1, 0, 0], [1, 0, 0, 1, 0], [1, 0, 0, 0, 1]],
-                         [[0, 1, 1, 0, 0], [0, 1, 0, 1, 0], [0, 1, 0, 0, 1]]])
-
-  def test_alt_implementation_of_coordinate_encoding_returns_same_values(self):
-    model = self.create_model()
-    model.set_mparam('encode_coordinates_fn', enabled=True)
-    conv_w_coords_tf = model.encode_coordinates_fn(self.fake_conv_tower)
-    conv_w_coords_alt_tf = self.encode_coordinates_alt(self.fake_conv_tower)
-
-    with self.test_session() as sess:
-      conv_w_coords_tf, conv_w_coords_alt_tf = sess.run(
-          [conv_w_coords_tf, conv_w_coords_alt_tf])
-
-    self.assertAllEqual(conv_w_coords_tf, conv_w_coords_alt_tf)
-
-  def test_predicted_text_has_correct_shape_w_charset(self):
-    charset = create_fake_charset(self.num_char_classes)
-    ocr_model = self.create_model(charset=charset)
-
-    with self.test_session() as sess:
-      endpoints_tf = ocr_model.create_base(
-          images=self.fake_images, labels_one_hot=None)
-
-      sess.run(tf.global_variables_initializer())
-      tf.tables_initializer().run()
-      endpoints = sess.run(endpoints_tf)
-
-      self.assertEqual(endpoints.predicted_text.shape, (self.batch_size,))
-      self.assertEqual(len(endpoints.predicted_text[0]), self.seq_length)
-
-
-class CharsetMapperTest(tf.test.TestCase):
-  def test_text_corresponds_to_ids(self):
-    charset = create_fake_charset(36)
-    ids = tf.constant(
-        [[17, 14, 21, 21, 24], [32, 24, 27, 21, 13]], dtype=tf.int64)
-    charset_mapper = model.CharsetMapper(charset)
-
-    with self.test_session() as sess:
-      tf.tables_initializer().run()
-      text = sess.run(charset_mapper.get_text(ids))
-
-    self.assertAllEqual(text, [b'hello', b'world'])
-
-
-if __name__ == '__main__':
-  tf.test.main()
--- a/research/attention_ocr/python/sequence_layers.py
+++ b/research/attention_ocr/python/sequence_layers.py
-# Copyright 2017 The TensorFlow Authors All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Various implementations of sequence layers for character prediction.
-
-A 'sequence layer' is a part of a computation graph which is responsible of
-producing a sequence of characters using extracted image features. There are
-many reasonable ways to implement such layers. All of them are using RNNs.
-This module provides implementations which uses 'attention' mechanism to
-spatially 'pool' image features and also can use a previously predicted
-character to predict the next (aka auto regression).
-
-Usage:
-  Select one of available classes, e.g. Attention or use a wrapper function to
-  pick one based on your requirements:
-  layer_class = sequence_layers.get_layer_class(use_attention=True,
-                                                use_autoregression=True)
-  layer = layer_class(net, labels_one_hot, model_params, method_params)
-  char_logits = layer.create_logits()
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import collections
-import abc
-import logging
-import numpy as np
-
-import tensorflow as tf
-
-from tensorflow.contrib import slim
-
-
-def orthogonal_initializer(shape, dtype=tf.float32, *args, **kwargs):
-  """Generates orthonormal matrices with random values.
-
-  Orthonormal initialization is important for RNNs:
-    http://arxiv.org/abs/1312.6120
-    http://smerity.com/articles/2016/orthogonal_init.html
-
-  For non-square shapes the returned matrix will be semi-orthonormal: if the
-  number of columns exceeds the number of rows, then the rows are orthonormal
-  vectors; but if the number of rows exceeds the number of columns, then the
-  columns are orthonormal vectors.
-
-  We use SVD decomposition to generate an orthonormal matrix with random
-  values. The same way as it is done in the Lasagne library for Theano. Note
-  that both u and v returned by the svd are orthogonal and random. We just need
-  to pick one with the right shape.
-
-  Args:
-    shape: a shape of the tensor matrix to initialize.
-    dtype: a dtype of the initialized tensor.
-    *args: not used.
-    **kwargs: not used.
-
-  Returns:
-    An initialized tensor.
-  """
-  del args
-  del kwargs
-  flat_shape = (shape[0], np.prod(shape[1:]))
-  w = np.random.randn(*flat_shape)
-  u, _, v = np.linalg.svd(w, full_matrices=False)
-  w = u if u.shape == flat_shape else v
-  return tf.constant(w.reshape(shape), dtype=dtype)
-
-
-SequenceLayerParams = collections.namedtuple('SequenceLogitsParams', [
-    'num_lstm_units', 'weight_decay', 'lstm_state_clip_value'
-])
-
-
-class SequenceLayerBase(object):
-  """A base abstruct class for all sequence layers.
-
-  A child class has to define following methods:
-    get_train_input
-    get_eval_input
-    unroll_cell
-  """
-  __metaclass__ = abc.ABCMeta
-
-  def __init__(self, net, labels_one_hot, model_params, method_params):
-    """Stores argument in member variable for further use.
-
-    Args:
-      net: A tensor with shape [batch_size, num_features, feature_size] which
-        contains some extracted image features.
-      labels_one_hot: An optional (can be None) ground truth labels for the
-        input features. Is a tensor with shape
-        [batch_size, seq_length, num_char_classes]
-      model_params: A namedtuple with model parameters (model.ModelParams).
-      method_params: A SequenceLayerParams instance.
-    """
-    self._params = model_params
-    self._mparams = method_params
-    self._net = net
-    self._labels_one_hot = labels_one_hot
-    self._batch_size = net.get_shape().dims[0].value
-
-    # Initialize parameters for char logits which will be computed on the fly
-    # inside an LSTM decoder.
-    self._char_logits = {}
-    regularizer = slim.l2_regularizer(self._mparams.weight_decay)
-    self._softmax_w = slim.model_variable(
-        'softmax_w',
-        [self._mparams.num_lstm_units, self._params.num_char_classes],
-        initializer=orthogonal_initializer,
-        regularizer=regularizer)
-    self._softmax_b = slim.model_variable(
-        'softmax_b', [self._params.num_char_classes],
-        initializer=tf.zeros_initializer(),
-        regularizer=regularizer)
-
-  @abc.abstractmethod
-  def get_train_input(self, prev, i):
-    """Returns a sample to be used to predict a character during training.
-
-    This function is used as a loop_function for an RNN decoder.
-
-    Args:
-      prev: output tensor from previous step of the RNN. A tensor with shape:
-        [batch_size, num_char_classes].
-      i: index of a character in the output sequence.
-
-    Returns:
-      A tensor with shape [batch_size, ?] - depth depends on implementation
-      details.
-    """
-    pass
-
-  @abc.abstractmethod
-  def get_eval_input(self, prev, i):
-    """Returns a sample to be used to predict a character during inference.
-
-    This function is used as a loop_function for an RNN decoder.
-
-    Args:
-      prev: output tensor from previous step of the RNN. A tensor with shape:
-        [batch_size, num_char_classes].
-      i: index of a character in the output sequence.
-
-    Returns:
-      A tensor with shape [batch_size, ?] - depth depends on implementation
-      details.
-    """
-    raise AssertionError('Not implemented')
-
-  @abc.abstractmethod
-  def unroll_cell(self, decoder_inputs, initial_state, loop_function, cell):
-    """Unrolls an RNN cell for all inputs.
-
-    This is a placeholder to call some RNN decoder. It has a similar to
-    tf.seq2seq.rnn_decode interface.
-
-    Args:
-      decoder_inputs: A list of 2D Tensors* [batch_size x input_size]. In fact,
-        most of existing decoders in presence of a loop_function use only the
-        first element to determine batch_size and length of the list to
-        determine number of steps.
-      initial_state: 2D Tensor with shape [batch_size x cell.state_size].
-      loop_function: function will be applied to the i-th output in order to
-        generate the i+1-st input (see self.get_input).
-      cell: rnn_cell.RNNCell defining the cell function and size.
-
-    Returns:
-      A tuple of the form (outputs, state), where:
-        outputs: A list of character logits of the same length as
-        decoder_inputs of 2D Tensors with shape [batch_size x num_characters].
-        state: The state of each cell at the final time-step.
-          It is a 2D Tensor of shape [batch_size x cell.state_size].
-    """
-    pass
-
-  def is_training(self):
-    """Returns True if the layer is created for training stage."""
-    return self._labels_one_hot is not None
-
-  def char_logit(self, inputs, char_index):
-    """Creates logits for a character if required.
-
-    Args:
-      inputs: A tensor with shape [batch_size, ?] (depth is implementation
-        dependent).
-      char_index: A integer index of a character in the output sequence.
-
-    Returns:
-      A tensor with shape [batch_size, num_char_classes]
-    """
-    if char_index not in self._char_logits:
-      self._char_logits[char_index] = tf.nn.xw_plus_b(inputs, self._softmax_w,
-                                                      self._softmax_b)
-    return self._char_logits[char_index]
-
-  def char_one_hot(self, logit):
-    """Creates one hot encoding for a logit of a character.
-
-    Args:
-      logit: A tensor with shape [batch_size, num_char_classes].
-
-    Returns:
-      A tensor with shape [batch_size, num_char_classes]
-    """
-    prediction = tf.argmax(logit, axis=1)
-    return slim.one_hot_encoding(prediction, self._params.num_char_classes)
-
-  def get_input(self, prev, i):
-    """A wrapper for get_train_input and get_eval_input.
-
-    Args:
-      prev: output tensor from previous step of the RNN. A tensor with shape:
-        [batch_size, num_char_classes].
-      i: index of a character in the output sequence.
-
-    Returns:
-      A tensor with shape [batch_size, ?] - depth depends on implementation
-      details.
-    """
-    if self.is_training():
-      return self.get_train_input(prev, i)
-    else:
-      return self.get_eval_input(prev, i)
-
-  def create_logits(self):
-    """Creates character sequence logits for a net specified in the constructor.
-
-    A "main" method for the sequence layer which glues together all pieces.
-
-    Returns:
-      A tensor with shape [batch_size, seq_length, num_char_classes].
-    """
-    with tf.variable_scope('LSTM'):
-      first_label = self.get_input(prev=None, i=0)
-      decoder_inputs = [first_label] + [None] * (self._params.seq_length - 1)
-      lstm_cell = tf.contrib.rnn.LSTMCell(
-          self._mparams.num_lstm_units,
-          use_peepholes=False,
-          cell_clip=self._mparams.lstm_state_clip_value,
-          state_is_tuple=True,
-          initializer=orthogonal_initializer)
-      lstm_outputs, _ = self.unroll_cell(
-          decoder_inputs=decoder_inputs,
-          initial_state=lstm_cell.zero_state(self._batch_size, tf.float32),
-          loop_function=self.get_input,
-          cell=lstm_cell)
-
-    with tf.variable_scope('logits'):
-      logits_list = [
-          tf.expand_dims(self.char_logit(logit, i), dim=1)
-          for i, logit in enumerate(lstm_outputs)
-      ]
-
-    return tf.concat(logits_list, 1)
-
-
-class NetSlice(SequenceLayerBase):
-  """A layer which uses a subset of image features to predict each character.
-  """
-
-  def __init__(self, *args, **kwargs):
-    super(NetSlice, self).__init__(*args, **kwargs)
-    self._zero_label = tf.zeros(
-        [self._batch_size, self._params.num_char_classes])
-
-  def get_image_feature(self, char_index):
-    """Returns a subset of image features for a character.
-
-    Args:
-      char_index: an index of a character.
-
-    Returns:
-      A tensor with shape [batch_size, ?]. The output depth depends on the
-      depth of input net.
-    """
-    batch_size, features_num, _ = [d.value for d in self._net.get_shape()]
-    slice_len = int(features_num / self._params.seq_length)
-    # In case when features_num != seq_length, we just pick a subset of image
-    # features, this choice is arbitrary and there is no intuitive geometrical
-    # interpretation. If features_num is not dividable by seq_length there will
-    # be unused image features.
-    net_slice = self._net[:, char_index:char_index + slice_len, :]
-    feature = tf.reshape(net_slice, [batch_size, -1])
-    logging.debug('Image feature: %s', feature)
-    return feature
-
-  def get_eval_input(self, prev, i):
-    """See SequenceLayerBase.get_eval_input for details."""
-    del prev
-    return self.get_image_feature(i)
-
-  def get_train_input(self, prev, i):
-    """See SequenceLayerBase.get_train_input for details."""
-    return self.get_eval_input(prev, i)
-
-  def unroll_cell(self, decoder_inputs, initial_state, loop_function, cell):
-    """See SequenceLayerBase.unroll_cell for details."""
-    return tf.contrib.legacy_seq2seq.rnn_decoder(
-        decoder_inputs=decoder_inputs,
-        initial_state=initial_state,
-        cell=cell,
-        loop_function=self.get_input)
-
-
-class NetSliceWithAutoregression(NetSlice):
-  """A layer similar to NetSlice, but it also uses auto regression.
-
-  The "auto regression" means that we use network output for previous character
-  as a part of input for the current character.
-  """
-
-  def __init__(self, *args, **kwargs):
-    super(NetSliceWithAutoregression, self).__init__(*args, **kwargs)
-
-  def get_eval_input(self, prev, i):
-    """See SequenceLayerBase.get_eval_input for details."""
-    if i == 0:
-      prev = self._zero_label
-    else:
-      logit = self.char_logit(prev, char_index=i - 1)
-      prev = self.char_one_hot(logit)
-    image_feature = self.get_image_feature(char_index=i)
-    return tf.concat([image_feature, prev], 1)
-
-  def get_train_input(self, prev, i):
-    """See SequenceLayerBase.get_train_input for details."""
-    if i == 0:
-      prev = self._zero_label
-    else:
-      prev = self._labels_one_hot[:, i - 1, :]
-    image_feature = self.get_image_feature(i)
-    return tf.concat([image_feature, prev], 1)
-
-
-class Attention(SequenceLayerBase):
-  """A layer which uses attention mechanism to select image features."""
-
-  def __init__(self, *args, **kwargs):
-    super(Attention, self).__init__(*args, **kwargs)
-    self._zero_label = tf.zeros(
-        [self._batch_size, self._params.num_char_classes])
-
-  def get_eval_input(self, prev, i):
-    """See SequenceLayerBase.get_eval_input for details."""
-    del prev, i
-    # The attention_decoder will fetch image features from the net, no need for
-    # extra inputs.
-    return self._zero_label
-
-  def get_train_input(self, prev, i):
-    """See SequenceLayerBase.get_train_input for details."""
-    return self.get_eval_input(prev, i)
-
-  def unroll_cell(self, decoder_inputs, initial_state, loop_function, cell):
-    return tf.contrib.legacy_seq2seq.attention_decoder(
-        decoder_inputs=decoder_inputs,
-        initial_state=initial_state,
-        attention_states=self._net,
-        cell=cell,
-        loop_function=self.get_input)
-
-
-class AttentionWithAutoregression(Attention):
-  """A layer which uses both attention and auto regression."""
-
-  def __init__(self, *args, **kwargs):
-    super(AttentionWithAutoregression, self).__init__(*args, **kwargs)
-
-  def get_train_input(self, prev, i):
-    """See SequenceLayerBase.get_train_input for details."""
-    if i == 0:
-      return self._zero_label
-    else:
-      # TODO(gorban): update to gradually introduce gt labels.
-      return self._labels_one_hot[:, i - 1, :]
-
-  def get_eval_input(self, prev, i):
-    """See SequenceLayerBase.get_eval_input for details."""
-    if i == 0:
-      return self._zero_label
-    else:
-      logit = self.char_logit(prev, char_index=i - 1)
-      return self.char_one_hot(logit)
-
-
-def get_layer_class(use_attention, use_autoregression):
-  """A convenience function to get a layer class based on requirements.
-
-  Args:
-    use_attention: if True a returned class will use attention.
-    use_autoregression: if True a returned class will use auto regression.
-
-  Returns:
-    One of available sequence layers (child classes for SequenceLayerBase).
-  """
-  if use_attention and use_autoregression:
-    layer_class = AttentionWithAutoregression
-  elif use_attention and not use_autoregression:
-    layer_class = Attention
-  elif not use_attention and not use_autoregression:
-    layer_class = NetSlice
-  elif not use_attention and use_autoregression:
-    layer_class = NetSliceWithAutoregression
-  else:
-    raise AssertionError('Unsupported sequence layer class')
-
-  logging.debug('Use %s as a layer class', layer_class.__name__)
-  return layer_class