Unverified Commit 58856e2b authored by Menglong Zhu's avatar Menglong Zhu Committed by GitHub
Browse files

Merged commit includes the following changes: (#6726)

246873701  by menglong:

    Missing __init__.py under meta_architectures/

--
246857392  by menglong:

    Standardize proto namespace: lstm_object_detection.protos

--
246625127  by menglong:

    Internal changes.

--
246596481  by menglong:

    Add License

--
246580605  by menglong:

    Internal changes

--
246344626  by menglong:

    Open source interleaved mobilenet v2 model.

--
244893883  by menglong:

    Introduce multi_input_decoder for interleaved model.

--
244461016  by menglong:

    Add pre-bottleneck operation to lstm cells to support interleaved model.

--
244052176  by menglong:

    Update README

--
244020495  by menglong:

    Add test to rnn_decoder.

--
243704250  by menglong:

    Duplicate assignment.

--
243091836  by menglong:

    Move LSTMSSD meta arch into separate folder

--
242900337  by menglong:

    Modified mobilenet definition for LSTM-SSD

--
242773195  by menglong:

    Release GroupedConvLSTMCell implementation: https://arxiv.org/abs/1903.10172

--
242574736  by menglong:

    Introduce module for quantizated training.

--
242544306  by menglong:

    lstm_ssd_meta_arch updates, added test
    rename:
    - LSTMMetaArch to LSTMSSDMetaArch
    - LSTMFeatureExtractor to LSTMSSDFeatureExtractor

--
241986236  by menglong:

    Move lstm quantization utils to 3rd party.

--
225922488  by yinxiao:

    Training pipeline fixes.

--
224839137  by yinxiao:

    Issue fix for lstm object detecion sample config.

--
224246947  by menglong:

    Fix logging module import

--

PiperOrigin-RevId: 246873701
parent f5073f49
Tensorflow mobile video object detection implementation proposed in the following paper:
Mobile Video Object Detection with Temporally-Aware Feature Maps (CVPR 2018).
http://openaccess.thecvf.com/content_cvpr_2018/papers/Liu_Mobile_Video_Object_CVPR_2018_paper.pdf
@article{liu2017mobile,
title={Mobile Video Object Detection with Temporally-Aware Feature Maps},
author={Liu, Mason and Zhu, Menglong},
journal={CVPR},
year={2018}
}
If you have any questions regarding this codebase, please contact us:
masonliuw@gmail.com
yinxiao@google.com
menglong@google.com
\ No newline at end of file
# Tensorflow Mobile Video Object Detection
Tensorflow mobile video object detection implementation proposed in the
following papers:
<p align="center">
<img src="g3doc/lstm_ssd_intro.png" width=640 height=360>
</p>
```
"Mobile Video Object Detection with Temporally-Aware Feature Maps",
Liu, Mason and Zhu, Menglong, CVPR 2018.
```
\[[link](http://openaccess.thecvf.com/content_cvpr_2018/papers/Liu_Mobile_Video_Object_CVPR_2018_paper.pdf)\]\[[bibtex](
https://scholar.googleusercontent.com/scholar.bib?q=info:hq5rcMUUXysJ:scholar.google.com/&output=citation&scisig=AAGBfm0AAAAAXLdwXcU5g_wiMQ40EvbHQ9kTyvfUxffh&scisf=4&ct=citation&cd=-1&hl=en)\]
<p align="center">
<img src="g3doc/Interleaved_Intro.png" width=480 height=360>
</p>
```
"Looking Fast and Slow: Memory-Guided Mobile Video Object Detection",
Liu, Mason and Zhu, Menglong and White, Marie and Li, Yinxiao and Kalenichenko, Dmitry
```
\[[link](https://arxiv.org/abs/1903.10172)\]\[[bibtex](
https://scholar.googleusercontent.com/scholar.bib?q=info:rLqvkztmWYgJ:scholar.google.com/&output=citation&scisig=AAGBfm0AAAAAXLdwNf-LJlm2M1ymQHbq2wYA995MHpJu&scisf=4&ct=citation&cd=-1&hl=en)\]
## Maintainers
* masonliuw@gmail.com
* yinxiao@google.com
* menglong@google.com
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Custom version for quantized training and evaluation functions.
The main difference between this and the third_party graph_rewriter_builder.py
is that this version uses experimental_create_training_graph which allows the
customization of freeze_bn_delay.
"""
import re
import tensorflow as tf
from tensorflow.contrib.quantize.python import common
from tensorflow.contrib.quantize.python import input_to_ops
from tensorflow.contrib.quantize.python import quant_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
def build(graph_rewriter_config,
quant_overrides_config=None,
is_training=True,
is_export=False):
"""Returns a function that modifies default graph based on options.
Args:
graph_rewriter_config: graph_rewriter_pb2.GraphRewriter proto.
quant_overrides_config: quant_overrides_pb2.QuantOverrides proto.
is_training: whether in training or eval mode.
is_export: whether exporting the graph.
"""
def graph_rewrite_fn():
"""Function to quantize weights and activation of the default graph."""
if (graph_rewriter_config.quantization.weight_bits != 8 or
graph_rewriter_config.quantization.activation_bits != 8):
raise ValueError('Only 8bit quantization is supported')
graph = tf.get_default_graph()
# Insert custom quant ops.
if quant_overrides_config is not None:
input_to_ops_map = input_to_ops.InputToOps(graph)
for q in quant_overrides_config.quant_configs:
producer = graph.get_operation_by_name(q.op_name)
if producer is None:
raise ValueError('Op name does not exist in graph.')
context = _get_context_from_op(producer)
consumers = input_to_ops_map.ConsumerOperations(producer)
if q.fixed_range:
_insert_fixed_quant_op(
context,
q.quant_op_name,
producer,
consumers,
init_min=q.min,
init_max=q.max,
quant_delay=q.delay if is_training else 0)
else:
raise ValueError('Learned ranges are not yet supported.')
# Quantize the graph by inserting quantize ops for weights and activations
if is_training:
tf.contrib.quantize.experimental_create_training_graph(
input_graph=graph,
quant_delay=graph_rewriter_config.quantization.delay,
freeze_bn_delay=graph_rewriter_config.quantization.delay)
else:
tf.contrib.quantize.experimental_create_eval_graph(
input_graph=graph,
quant_delay=graph_rewriter_config.quantization.delay
if not is_export else 0)
tf.contrib.layers.summarize_collection('quant_vars')
return graph_rewrite_fn
def _get_context_from_op(op):
"""Gets the root context name from the op name."""
context_re = re.search(r'^(.*)/([^/]+)', op.name)
if context_re:
return context_re.group(1)
return ''
def _insert_fixed_quant_op(context,
name,
producer,
consumers,
init_min=-6.0,
init_max=6.0,
quant_delay=None):
"""Adds a fake quant op with fixed ranges.
Args:
context: The parent scope of the op to be quantized.
name: The name of the fake quant op.
producer: The producer op to be quantized.
consumers: The consumer ops to the producer op.
init_min: The minimum range for the fake quant op.
init_max: The maximum range for the fake quant op.
quant_delay: Number of steps to wait before activating the fake quant op.
Raises:
ValueError: When producer operation is not directly connected to the
consumer operation.
"""
name_prefix = name if not context else context + '/' + name
inputs = producer.outputs[0]
quant = quant_ops.FixedQuantize(
inputs, init_min=init_min, init_max=init_max, scope=name_prefix)
if quant_delay and quant_delay > 0:
activate_quant = math_ops.greater_equal(
common.CreateOrGetQuantizationStep(),
quant_delay,
name=name_prefix + '/activate_quant')
quant = control_flow_ops.cond(
activate_quant,
lambda: quant,
lambda: inputs,
name=name_prefix + '/delayed_quant')
if consumers:
tensors_modified_count = common.RerouteTensor(
quant, inputs, can_modify=consumers)
# Some operations can have multiple output tensors going to the same
# consumer. Since consumers is a set, we need to ensure that
# tensors_modified_count is greater than or equal to the length of the set
# of consumers.
if tensors_modified_count < len(consumers):
raise ValueError('No inputs quantized for ops: [%s]' % ', '.join(
[consumer.name for consumer in consumers]))
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for graph_rewriter_builder."""
import mock
import tensorflow as tf
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from lstm_object_detection.builders import graph_rewriter_builder
from lstm_object_detection.protos import quant_overrides_pb2
from object_detection.protos import graph_rewriter_pb2
class QuantizationBuilderTest(tf.test.TestCase):
def testQuantizationBuilderSetsUpCorrectTrainArguments(self):
with mock.patch.object(
tf.contrib.quantize,
'experimental_create_training_graph') as mock_quant_fn:
with mock.patch.object(tf.contrib.layers,
'summarize_collection') as mock_summarize_col:
graph_rewriter_proto = graph_rewriter_pb2.GraphRewriter()
graph_rewriter_proto.quantization.delay = 10
graph_rewriter_proto.quantization.weight_bits = 8
graph_rewriter_proto.quantization.activation_bits = 8
graph_rewrite_fn = graph_rewriter_builder.build(
graph_rewriter_proto, is_training=True)
graph_rewrite_fn()
_, kwargs = mock_quant_fn.call_args
self.assertEqual(kwargs['input_graph'], tf.get_default_graph())
self.assertEqual(kwargs['quant_delay'], 10)
mock_summarize_col.assert_called_with('quant_vars')
def testQuantizationBuilderSetsUpCorrectEvalArguments(self):
with mock.patch.object(tf.contrib.quantize,
'experimental_create_eval_graph') as mock_quant_fn:
with mock.patch.object(tf.contrib.layers,
'summarize_collection') as mock_summarize_col:
graph_rewriter_proto = graph_rewriter_pb2.GraphRewriter()
graph_rewriter_proto.quantization.delay = 10
graph_rewrite_fn = graph_rewriter_builder.build(
graph_rewriter_proto, is_training=False)
graph_rewrite_fn()
_, kwargs = mock_quant_fn.call_args
self.assertEqual(kwargs['input_graph'], tf.get_default_graph())
mock_summarize_col.assert_called_with('quant_vars')
def testQuantizationBuilderAddsQuantOverride(self):
graph = ops.Graph()
with graph.as_default():
self._buildGraph()
quant_overrides_proto = quant_overrides_pb2.QuantOverrides()
quant_config = quant_overrides_proto.quant_configs.add()
quant_config.op_name = 'test_graph/add_ab'
quant_config.quant_op_name = 'act_quant'
quant_config.fixed_range = True
quant_config.min = 0
quant_config.max = 6
quant_config.delay = 100
graph_rewriter_proto = graph_rewriter_pb2.GraphRewriter()
graph_rewriter_proto.quantization.delay = 10
graph_rewriter_proto.quantization.weight_bits = 8
graph_rewriter_proto.quantization.activation_bits = 8
graph_rewrite_fn = graph_rewriter_builder.build(
graph_rewriter_proto,
quant_overrides_config=quant_overrides_proto,
is_training=True)
graph_rewrite_fn()
act_quant_found = False
quant_delay_found = False
for op in graph.get_operations():
if (quant_config.quant_op_name in op.name and
op.type == 'FakeQuantWithMinMaxArgs'):
act_quant_found = True
min_val = op.get_attr('min')
max_val = op.get_attr('max')
self.assertEqual(min_val, quant_config.min)
self.assertEqual(max_val, quant_config.max)
if ('activate_quant' in op.name and
quant_config.quant_op_name in op.name and op.type == 'Const'):
tensor = op.get_attr('value')
if tensor.int64_val[0] == quant_config.delay:
quant_delay_found = True
self.assertTrue(act_quant_found)
self.assertTrue(quant_delay_found)
def _buildGraph(self, scope='test_graph'):
with ops.name_scope(scope):
a = tf.constant(10, dtype=dtypes.float32, name='input_a')
b = tf.constant(20, dtype=dtypes.float32, name='input_b')
ab = tf.add(a, b, name='add_ab')
c = tf.constant(30, dtype=dtypes.float32, name='input_c')
abc = tf.multiply(ab, c, name='mul_ab_c')
return abc
if __name__ == '__main__':
tf.test.main()
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
model { model {
ssd { ssd {
num_classes: 30 num_classes: 30 # Num of class for imagenet vid dataset.
box_coder { box_coder {
faster_rcnn_box_coder { faster_rcnn_box_coder {
y_scale: 10.0 y_scale: 10.0
...@@ -197,9 +197,9 @@ train_input_reader: { ...@@ -197,9 +197,9 @@ train_input_reader: {
min_after_dequeue: 4 min_after_dequeue: 4
label_map_path: "path/to/label_map" label_map_path: "path/to/label_map"
external_input_reader { external_input_reader {
[lstm_object_detection.input_readers.GoogleInputReader.google_input_reader] { [lstm_object_detection.protos.GoogleInputReader.google_input_reader] {
tf_record_video_input_reader: { tf_record_video_input_reader: {
input_path: "your/cns/path" input_path: "path/to/sequence_example/data"
data_type: TF_SEQUENCE_EXAMPLE data_type: TF_SEQUENCE_EXAMPLE
video_length: 4 video_length: 4
} }
...@@ -208,7 +208,7 @@ train_input_reader: { ...@@ -208,7 +208,7 @@ train_input_reader: {
} }
eval_config: { eval_config: {
metrics_set: "coco_evaluation_last_frame" metrics_set: "coco_evaluation_all_frames"
use_moving_averages: true use_moving_averages: true
min_score_threshold: 0.5 min_score_threshold: 0.5
max_num_boxes_to_visualize: 300 max_num_boxes_to_visualize: 300
...@@ -219,9 +219,9 @@ eval_config: { ...@@ -219,9 +219,9 @@ eval_config: {
eval_input_reader: { eval_input_reader: {
label_map_path: "path/to/label_map" label_map_path: "path/to/label_map"
external_input_reader { external_input_reader {
[lstm_object_detection.input_readers.GoogleInputReader.google_input_reader] { [lstm_object_detection.protos.GoogleInputReader.google_input_reader] {
tf_record_video_input_reader: { tf_record_video_input_reader: {
input_path: "your/cns/path" input_path: "path/to/sequence_example/data"
data_type: TF_SEQUENCE_EXAMPLE data_type: TF_SEQUENCE_EXAMPLE
video_length: 4 video_length: 4
} }
......
...@@ -20,7 +20,6 @@ DetectionModel. ...@@ -20,7 +20,6 @@ DetectionModel.
""" """
import logging
import tensorflow as tf import tensorflow as tf
from lstm_object_detection.metrics import coco_evaluation_all_frames from lstm_object_detection.metrics import coco_evaluation_all_frames
from object_detection import eval_util from object_detection import eval_util
...@@ -215,7 +214,7 @@ def evaluate(create_input_dict_fn, ...@@ -215,7 +214,7 @@ def evaluate(create_input_dict_fn,
model = create_model_fn() model = create_model_fn()
if eval_config.ignore_groundtruth and not eval_config.export_path: if eval_config.ignore_groundtruth and not eval_config.export_path:
logging.fatal('If ignore_groundtruth=True then an export_path is ' tf.logging.fatal('If ignore_groundtruth=True then an export_path is '
'required. Aborting!!!') 'required. Aborting!!!')
tensor_dicts = _extract_prediction_tensors( tensor_dicts = _extract_prediction_tensors(
...@@ -252,14 +251,14 @@ def evaluate(create_input_dict_fn, ...@@ -252,14 +251,14 @@ def evaluate(create_input_dict_fn,
third_party eval_util.py. third_party eval_util.py.
""" """
if batch_index % 10 == 0: if batch_index % 10 == 0:
logging.info('Running eval ops batch %d', batch_index) tf.logging.info('Running eval ops batch %d', batch_index)
if not losses_dict: if not losses_dict:
losses_dict = {} losses_dict = {}
try: try:
result_dicts, result_losses_dict = sess.run([tensor_dicts, losses_dict]) result_dicts, result_losses_dict = sess.run([tensor_dicts, losses_dict])
counters['success'] += 1 counters['success'] += 1
except tf.errors.InvalidArgumentError: except tf.errors.InvalidArgumentError:
logging.info('Skipping image') tf.logging.info('Skipping image')
counters['skipped'] += 1 counters['skipped'] += 1
return {} return {}
num_images = len(tensor_dicts) num_images = len(tensor_dicts)
......
...@@ -23,7 +23,6 @@ Detection configuration framework, they should define their own builder function ...@@ -23,7 +23,6 @@ Detection configuration framework, they should define their own builder function
that wraps the build function. that wraps the build function.
""" """
import tensorflow as tf import tensorflow as tf
import tensorflow.google as google_tf
from tensorflow.contrib.training.python.training import sequence_queueing_state_saver as sqss from tensorflow.contrib.training.python.training import sequence_queueing_state_saver as sqss
from lstm_object_detection.inputs import tf_sequence_example_decoder from lstm_object_detection.inputs import tf_sequence_example_decoder
from lstm_object_detection.protos import input_reader_google_pb2 from lstm_object_detection.protos import input_reader_google_pb2
...@@ -116,12 +115,12 @@ def build(input_reader_config, ...@@ -116,12 +115,12 @@ def build(input_reader_config,
'input_reader_pb2.InputReader.') 'input_reader_pb2.InputReader.')
external_reader_config = input_reader_config.external_input_reader external_reader_config = input_reader_config.external_input_reader
google_input_reader_config = external_reader_config.Extensions[ external_input_reader_config = external_reader_config.Extensions[
input_reader_google_pb2.GoogleInputReader.google_input_reader] input_reader_google_pb2.GoogleInputReader.google_input_reader]
input_reader_type = google_input_reader_config.WhichOneof('input_reader') input_reader_type = external_input_reader_config.WhichOneof('input_reader')
if input_reader_type == 'tf_record_video_input_reader': if input_reader_type == 'tf_record_video_input_reader':
config = google_input_reader_config.tf_record_video_input_reader config = external_input_reader_config.tf_record_video_input_reader
reader_type_class = tf.TFRecordReader reader_type_class = tf.TFRecordReader
else: else:
raise ValueError( raise ValueError(
......
...@@ -20,7 +20,6 @@ import numpy as np ...@@ -20,7 +20,6 @@ import numpy as np
import tensorflow as tf import tensorflow as tf
from google.protobuf import text_format from google.protobuf import text_format
from google3.testing.pybase import parameterized
from tensorflow.core.example import example_pb2 from tensorflow.core.example import example_pb2
from tensorflow.core.example import feature_pb2 from tensorflow.core.example import feature_pb2
from lstm_object_detection.inputs import seq_dataset_builder from lstm_object_detection.inputs import seq_dataset_builder
...@@ -32,7 +31,7 @@ from object_detection.protos import pipeline_pb2 ...@@ -32,7 +31,7 @@ from object_detection.protos import pipeline_pb2
from object_detection.protos import preprocessor_pb2 from object_detection.protos import preprocessor_pb2
class DatasetBuilderTest(parameterized.TestCase): class DatasetBuilderTest(tf.test.TestCase):
def _create_tf_record(self): def _create_tf_record(self):
path = os.path.join(self.get_temp_dir(), 'tfrecord') path = os.path.join(self.get_temp_dir(), 'tfrecord')
...@@ -104,7 +103,7 @@ class DatasetBuilderTest(parameterized.TestCase): ...@@ -104,7 +103,7 @@ class DatasetBuilderTest(parameterized.TestCase):
""" """
model_text_proto = """ model_text_proto = """
[object_detection.protos.lstm_model] { [lstm_object_detection.protos.lstm_model] {
train_unroll_length: 4 train_unroll_length: 4
eval_unroll_length: 4 eval_unroll_length: 4
} }
...@@ -211,7 +210,7 @@ class DatasetBuilderTest(parameterized.TestCase): ...@@ -211,7 +210,7 @@ class DatasetBuilderTest(parameterized.TestCase):
def _get_input_proto(self, input_reader): def _get_input_proto(self, input_reader):
return """ return """
external_input_reader { external_input_reader {
[lstm_object_detection.input_readers.GoogleInputReader.google_input_reader] { [lstm_object_detection.protos.GoogleInputReader.google_input_reader] {
%s: { %s: {
input_path: '{0}' input_path: '{0}'
data_type: TF_SEQUENCE_EXAMPLE data_type: TF_SEQUENCE_EXAMPLE
...@@ -221,11 +220,11 @@ class DatasetBuilderTest(parameterized.TestCase): ...@@ -221,11 +220,11 @@ class DatasetBuilderTest(parameterized.TestCase):
} }
""" % input_reader """ % input_reader
@parameterized.named_parameters(('tf_record', 'tf_record_video_input_reader')) def test_video_input_reader(self):
def test_video_input_reader(self, video_input_type):
input_reader_proto = input_reader_pb2.InputReader() input_reader_proto = input_reader_pb2.InputReader()
text_format.Merge( text_format.Merge(
self._get_input_proto(video_input_type), input_reader_proto) self._get_input_proto('tf_record_video_input_reader'),
input_reader_proto)
configs = self._get_model_configs_from_proto() configs = self._get_model_configs_from_proto()
tensor_dict = seq_dataset_builder.build( tensor_dict = seq_dataset_builder.build(
......
...@@ -17,8 +17,6 @@ ...@@ -17,8 +17,6 @@
A decoder to decode string tensors containing serialized A decoder to decode string tensors containing serialized
tensorflow.SequenceExample protos. tensorflow.SequenceExample protos.
TODO(yinxiao): When TensorFlow object detection API officially supports
tensorflow.SequenceExample, merge this decoder.
""" """
import tensorflow as tf import tensorflow as tf
from object_detection.core import data_decoder from object_detection.core import data_decoder
......
...@@ -62,7 +62,7 @@ class BottleneckConvLstmCellsTest(tf.test.TestCase): ...@@ -62,7 +62,7 @@ class BottleneckConvLstmCellsTest(tf.test.TestCase):
filter_size=filter_size, filter_size=filter_size,
output_size=output_size, output_size=output_size,
num_units=num_units, num_units=num_units,
flattened_state=True) flatten_state=True)
init_state = cell.init_state( init_state = cell.init_state(
state_name, batch_size, dtype, learned_state) state_name, batch_size, dtype, learned_state)
output, state_tuple = cell(inputs, init_state) output, state_tuple = cell(inputs, init_state)
...@@ -138,6 +138,275 @@ class BottleneckConvLstmCellsTest(tf.test.TestCase): ...@@ -138,6 +138,275 @@ class BottleneckConvLstmCellsTest(tf.test.TestCase):
self.assertAllEqual([4, 10, 10, 15], init_c.shape.as_list()) self.assertAllEqual([4, 10, 10, 15], init_c.shape.as_list())
self.assertAllEqual([4, 10, 10, 15], init_h.shape.as_list()) self.assertAllEqual([4, 10, 10, 15], init_h.shape.as_list())
def test_unroll(self):
filter_size = [3, 3]
output_size = [10, 10]
num_units = 15
state_name = 'lstm_state'
batch_size = 4
dtype = tf.float32
unroll = 10
learned_state = False
inputs = tf.zeros([4, 10, 10, 3], dtype=tf.float32)
cell = lstm_cells.BottleneckConvLSTMCell(
filter_size=filter_size,
output_size=output_size,
num_units=num_units)
state = cell.init_state(
state_name, batch_size, dtype, learned_state)
for step in range(unroll):
output, state = cell(inputs, state)
self.assertAllEqual([4, 10, 10, 15], output.shape.as_list())
self.assertAllEqual([4, 10, 10, 15], state[0].shape.as_list())
self.assertAllEqual([4, 10, 10, 15], state[1].shape.as_list())
def test_prebottleneck(self):
filter_size = [3, 3]
output_size = [10, 10]
num_units = 15
state_name = 'lstm_state'
batch_size = 4
dtype = tf.float32
unroll = 10
learned_state = False
inputs_large = tf.zeros([4, 10, 10, 5], dtype=tf.float32)
inputs_small = tf.zeros([4, 10, 10, 3], dtype=tf.float32)
cell = lstm_cells.BottleneckConvLSTMCell(
filter_size=filter_size,
output_size=output_size,
num_units=num_units,
pre_bottleneck=True)
state = cell.init_state(
state_name, batch_size, dtype, learned_state)
for step in range(unroll):
if step % 2 == 0:
inputs = cell.pre_bottleneck(inputs_large, state[1], 0)
else:
inputs = cell.pre_bottleneck(inputs_small, state[1], 1)
output, state = cell(inputs, state)
self.assertAllEqual([4, 10, 10, 15], output.shape.as_list())
self.assertAllEqual([4, 10, 10, 15], state[0].shape.as_list())
self.assertAllEqual([4, 10, 10, 15], state[1].shape.as_list())
def test_flatten_state(self):
filter_size = [3, 3]
output_size = [10, 10]
num_units = 15
state_name = 'lstm_state'
batch_size = 4
dtype = tf.float32
unroll = 10
learned_state = False
inputs_large = tf.zeros([4, 10, 10, 5], dtype=tf.float32)
inputs_small = tf.zeros([4, 10, 10, 3], dtype=tf.float32)
cell = lstm_cells.BottleneckConvLSTMCell(
filter_size=filter_size,
output_size=output_size,
num_units=num_units,
pre_bottleneck=True,
flatten_state=True)
state = cell.init_state(
state_name, batch_size, dtype, learned_state)
for step in range(unroll):
if step % 2 == 0:
inputs = cell.pre_bottleneck(inputs_large, state[1], 0)
else:
inputs = cell.pre_bottleneck(inputs_small, state[1], 1)
output, state = cell(inputs, state)
with self.test_session() as sess:
sess.run(tf.global_variables_initializer())
output_result, state_result = sess.run([output, state])
self.assertAllEqual((4, 10, 10, 15), output_result.shape)
self.assertAllEqual((4, 10*10*15), state_result[0].shape)
self.assertAllEqual((4, 10*10*15), state_result[1].shape)
class GroupedConvLstmCellsTest(tf.test.TestCase):
def test_run_lstm_cell(self):
filter_size = [3, 3]
output_size = [10, 10]
num_units = 16
state_name = 'lstm_state'
batch_size = 4
dtype = tf.float32
learned_state = False
inputs = tf.zeros([4, 10, 10, 3], dtype=tf.float32)
cell = lstm_cells.GroupedConvLSTMCell(
filter_size=filter_size,
output_size=output_size,
num_units=num_units,
is_training=True)
init_state = cell.init_state(
state_name, batch_size, dtype, learned_state)
output, state_tuple = cell(inputs, init_state)
self.assertAllEqual([4, 10, 10, 16], output.shape.as_list())
self.assertAllEqual([4, 10, 10, 16], state_tuple[0].shape.as_list())
self.assertAllEqual([4, 10, 10, 16], state_tuple[1].shape.as_list())
def test_run_lstm_cell_with_output_bottleneck(self):
filter_size = [3, 3]
output_dim = 10
output_size = [output_dim] * 2
num_units = 16
state_name = 'lstm_state'
batch_size = 4
dtype = tf.float32
learned_state = False
inputs = tf.zeros([batch_size, output_dim, output_dim, 3], dtype=tf.float32)
cell = lstm_cells.GroupedConvLSTMCell(
filter_size=filter_size,
output_size=output_size,
num_units=num_units,
is_training=True,
output_bottleneck=True)
init_state = cell.init_state(
state_name, batch_size, dtype, learned_state)
output, state_tuple = cell(inputs, init_state)
self.assertAllEqual([4, 10, 10, 32], output.shape.as_list())
self.assertAllEqual([4, 10, 10, 16], state_tuple[0].shape.as_list())
self.assertAllEqual([4, 10, 10, 16], state_tuple[1].shape.as_list())
def test_get_init_state(self):
filter_size = [3, 3]
output_dim = 10
output_size = [output_dim] * 2
num_units = 16
state_name = 'lstm_state'
batch_size = 4
dtype = tf.float32
learned_state = False
cell = lstm_cells.GroupedConvLSTMCell(
filter_size=filter_size,
output_size=output_size,
num_units=num_units,
is_training=True)
init_c, init_h = cell.init_state(
state_name, batch_size, dtype, learned_state)
self.assertEqual(tf.float32, init_c.dtype)
self.assertEqual(tf.float32, init_h.dtype)
with self.test_session() as sess:
init_c_res, init_h_res = sess.run([init_c, init_h])
self.assertAllClose(np.zeros((4, 10, 10, 16)), init_c_res)
self.assertAllClose(np.zeros((4, 10, 10, 16)), init_h_res)
def test_get_init_learned_state(self):
filter_size = [3, 3]
output_size = [10, 10]
num_units = 16
state_name = 'lstm_state'
batch_size = 4
dtype = tf.float32
learned_state = True
cell = lstm_cells.GroupedConvLSTMCell(
filter_size=filter_size,
output_size=output_size,
num_units=num_units,
is_training=True)
init_c, init_h = cell.init_state(
state_name, batch_size, dtype, learned_state)
self.assertEqual(tf.float32, init_c.dtype)
self.assertEqual(tf.float32, init_h.dtype)
self.assertAllEqual([4, 10, 10, 16], init_c.shape.as_list())
self.assertAllEqual([4, 10, 10, 16], init_h.shape.as_list())
def test_unroll(self):
filter_size = [3, 3]
output_size = [10, 10]
num_units = 16
state_name = 'lstm_state'
batch_size = 4
dtype = tf.float32
unroll = 10
learned_state = False
inputs = tf.zeros([4, 10, 10, 3], dtype=tf.float32)
cell = lstm_cells.GroupedConvLSTMCell(
filter_size=filter_size,
output_size=output_size,
num_units=num_units,
is_training=True)
state = cell.init_state(
state_name, batch_size, dtype, learned_state)
for step in range(unroll):
output, state = cell(inputs, state)
self.assertAllEqual([4, 10, 10, 16], output.shape.as_list())
self.assertAllEqual([4, 10, 10, 16], state[0].shape.as_list())
self.assertAllEqual([4, 10, 10, 16], state[1].shape.as_list())
def test_prebottleneck(self):
filter_size = [3, 3]
output_size = [10, 10]
num_units = 16
state_name = 'lstm_state'
batch_size = 4
dtype = tf.float32
unroll = 10
learned_state = False
inputs_large = tf.zeros([4, 10, 10, 5], dtype=tf.float32)
inputs_small = tf.zeros([4, 10, 10, 3], dtype=tf.float32)
cell = lstm_cells.GroupedConvLSTMCell(
filter_size=filter_size,
output_size=output_size,
num_units=num_units,
is_training=True,
pre_bottleneck=True)
state = cell.init_state(
state_name, batch_size, dtype, learned_state)
for step in range(unroll):
if step % 2 == 0:
inputs = cell.pre_bottleneck(inputs_large, state[1], 0)
else:
inputs = cell.pre_bottleneck(inputs_small, state[1], 1)
output, state = cell(inputs, state)
self.assertAllEqual([4, 10, 10, 16], output.shape.as_list())
self.assertAllEqual([4, 10, 10, 16], state[0].shape.as_list())
self.assertAllEqual([4, 10, 10, 16], state[1].shape.as_list())
def test_flatten_state(self):
filter_size = [3, 3]
output_size = [10, 10]
num_units = 16
state_name = 'lstm_state'
batch_size = 4
dtype = tf.float32
unroll = 10
learned_state = False
inputs_large = tf.zeros([4, 10, 10, 5], dtype=tf.float32)
inputs_small = tf.zeros([4, 10, 10, 3], dtype=tf.float32)
cell = lstm_cells.GroupedConvLSTMCell(
filter_size=filter_size,
output_size=output_size,
num_units=num_units,
is_training=True,
pre_bottleneck=True,
flatten_state=True)
state = cell.init_state(
state_name, batch_size, dtype, learned_state)
for step in range(unroll):
if step % 2 == 0:
inputs = cell.pre_bottleneck(inputs_large, state[1], 0)
else:
inputs = cell.pre_bottleneck(inputs_small, state[1], 1)
output, state = cell(inputs, state)
with self.test_session() as sess:
sess.run(tf.global_variables_initializer())
output_result, state_result = sess.run([output, state])
self.assertAllEqual((4, 10, 10, 16), output_result.shape)
self.assertAllEqual((4, 10*10*16), state_result[0].shape)
self.assertAllEqual((4, 10*10*16), state_result[1].shape)
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
"""Custom RNN decoder.""" """Custom RNN decoder."""
from tensorflow.python.ops import variable_scope import tensorflow as tf
def rnn_decoder(decoder_inputs, def rnn_decoder(decoder_inputs,
...@@ -23,7 +23,7 @@ def rnn_decoder(decoder_inputs, ...@@ -23,7 +23,7 @@ def rnn_decoder(decoder_inputs,
cell, cell,
loop_function=None, loop_function=None,
scope=None): scope=None):
"""RNN decoder for the sequence-to-sequence model. """RNN decoder for the LSTM-SSD model.
This decoder returns a list of all states, rather than only the final state. This decoder returns a list of all states, rather than only the final state.
Args: Args:
...@@ -43,24 +43,205 @@ def rnn_decoder(decoder_inputs, ...@@ -43,24 +43,205 @@ def rnn_decoder(decoder_inputs,
A tuple of the form (outputs, state), where: A tuple of the form (outputs, state), where:
outputs: A list of the same length as decoder_inputs of 4D Tensors with outputs: A list of the same length as decoder_inputs of 4D Tensors with
shape [batch_size x output_size] containing generated outputs. shape [batch_size x output_size] containing generated outputs.
state: A list of the same length as decoder_inputs of the state of each states: A list of the same length as decoder_inputs of the state of each
cell at each time-step. It is a 2D Tensor of shape cell at each time-step. It is a 2D Tensor of shape
[batch_size x cell.state_size]. [batch_size x cell.state_size].
""" """
with variable_scope.variable_scope(scope or 'rnn_decoder'): with tf.variable_scope(scope or 'rnn_decoder'):
state = initial_state state_tuple = initial_state
outputs = [] outputs = []
states = [] states = []
prev = None prev = None
for i, decoder_input in enumerate(decoder_inputs): for local_step, decoder_input in enumerate(decoder_inputs):
if loop_function is not None and prev is not None: if loop_function is not None and prev is not None:
with variable_scope.variable_scope('loop_function', reuse=True): with tf.variable_scope('loop_function', reuse=True):
decoder_input = loop_function(prev, i) decoder_input = loop_function(prev, local_step)
if i > 0: output, state_tuple = cell(decoder_input, state_tuple)
variable_scope.get_variable_scope().reuse_variables()
output, state = cell(decoder_input, state)
outputs.append(output) outputs.append(output)
states.append(state) states.append(state_tuple)
if loop_function is not None: if loop_function is not None:
prev = output prev = output
return outputs, states return outputs, states
def multi_input_rnn_decoder(decoder_inputs,
initial_state,
cell,
sequence_step,
selection_strategy='RANDOM',
is_training=None,
is_quantized=False,
preprocess_fn_list=None,
pre_bottleneck=False,
flatten_state=False,
scope=None):
"""RNN decoder for the Interleaved LSTM-SSD model.
This decoder takes multiple sequences of inputs and selects the input to feed
to the rnn at each timestep using its selection_strategy, which can be random,
learned, or deterministic.
This decoder returns a list of all states, rather than only the final state.
Args:
decoder_inputs: A list of lists of 2D Tensors [batch_size x input_size].
initial_state: 2D Tensor with shape [batch_size x cell.state_size].
cell: rnn_cell.RNNCell defining the cell function and size.
sequence_step: Tensor [batch_size] of the step number of the first elements
in the sequence.
selection_strategy: Method for picking the decoder_input to use at each
timestep. Must be 'RANDOM', 'SKIPX' for integer X, where X is the number
of times to use the second input before using the first.
is_training: boolean, whether the network is training. When using learned
selection, attempts exploration if training.
is_quantized: flag to enable/disable quantization mode.
preprocess_fn_list: List of functions accepting two tensor arguments: one
timestep of decoder_inputs and the lstm state. If not None,
decoder_inputs[i] will be updated with preprocess_fn[i] at the start of
each timestep.
pre_bottleneck: if True, use separate bottleneck weights for each sequence.
Useful when input sequences have differing numbers of channels. Final
bottlenecks will have the same dimension.
flatten_state: Whether the LSTM state is flattened.
scope: VariableScope for the created subgraph; defaults to "rnn_decoder".
Returns:
A tuple of the form (outputs, state), where:
outputs: A list of the same length as decoder_inputs of 2D Tensors with
shape [batch_size x output_size] containing generated outputs.
states: A list of the same length as decoder_inputs of the state of each
cell at each time-step. It is a 2D Tensor of shape
[batch_size x cell.state_size].
Raises:
ValueError: If selection_strategy is not recognized or unexpected unroll
length.
"""
if flatten_state and len(decoder_inputs[0]) > 1:
raise ValueError('In export mode, unroll length should not be more than 1')
with tf.variable_scope(scope or 'rnn_decoder'):
state_tuple = initial_state
outputs = []
states = []
batch_size = decoder_inputs[0][0].shape[0].value
num_sequences = len(decoder_inputs)
sequence_length = len(decoder_inputs[0])
for local_step in range(sequence_length):
for sequence_index in range(num_sequences):
if preprocess_fn_list is not None:
decoder_inputs[sequence_index][local_step] = (
preprocess_fn_list[sequence_index](
decoder_inputs[sequence_index][local_step], state_tuple[0]))
if pre_bottleneck:
decoder_inputs[sequence_index][local_step] = cell.pre_bottleneck(
inputs=decoder_inputs[sequence_index][local_step],
state=state_tuple[1],
input_index=sequence_index)
action = generate_action(selection_strategy, local_step, sequence_step,
[batch_size, 1, 1, 1])
inputs, _ = select_inputs(decoder_inputs, action, local_step)
# Mark base network endpoints under raw_inputs/
with tf.name_scope(None):
inputs = tf.identity(inputs, 'raw_inputs/base_endpoint')
output, state_tuple_out = cell(inputs, state_tuple)
state_tuple = select_state(state_tuple, state_tuple_out, action)
outputs.append(output)
states.append(state_tuple)
return outputs, states
def generate_action(selection_strategy, local_step, sequence_step,
action_shape):
"""Generate current (binary) action based on selection strategy.
Args:
selection_strategy: Method for picking the decoder_input to use at each
timestep. Must be 'RANDOM', 'SKIPX' for integer X, where X is the number
of times to use the second input before using the first.
local_step: Tensor [batch_size] of the step number within the current
unrolled batch.
sequence_step: Tensor [batch_size] of the step number of the first elements
in the sequence.
action_shape: The shape of action tensor to be generated.
Returns:
A tensor of shape action_shape, each element is an individual action.
Raises:
ValueError: if selection_strategy is not supported or if 'SKIP' is not
followed by numerics.
"""
if selection_strategy.startswith('RANDOM'):
action = tf.random.uniform(action_shape, maxval=2, dtype=tf.int32)
action = tf.minimum(action, 1)
# First step always runs large network.
if local_step == 0 and sequence_step is not None:
action *= tf.minimum(
tf.reshape(tf.cast(sequence_step, tf.int32), action_shape), 1)
elif selection_strategy.startswith('SKIP'):
inter_count = int(selection_strategy[4:])
if local_step % (inter_count + 1) == 0:
action = tf.zeros(action_shape)
else:
action = tf.ones(action_shape)
else:
raise ValueError('Selection strategy %s not recognized' %
selection_strategy)
return tf.cast(action, tf.int32)
def select_inputs(decoder_inputs, action, local_step, get_alt_inputs=False):
"""Selects sequence from decoder_inputs based on 1D actions.
Given multiple input batches, creates a single output batch by
selecting from the action[i]-ith input for the i-th batch element.
Args:
decoder_inputs: A 2-D list of tensor inputs.
action: A tensor of shape [batch_size]. Each element corresponds to an index
of decoder_inputs to choose.
step: The current timestep.
get_alt_inputs: Whether the non-chosen inputs should also be returned.
Returns:
The constructed output. Also outputs the elements that were not chosen
if get_alt_inputs is True, otherwise None.
Raises:
ValueError: if the decoder inputs contains other than two sequences.
"""
num_seqs = len(decoder_inputs)
if not num_seqs == 2:
raise ValueError('Currently only supports two sets of inputs.')
stacked_inputs = tf.stack(
[decoder_inputs[seq_index][local_step] for seq_index in range(num_seqs)],
axis=-1)
action_index = tf.one_hot(action, num_seqs)
inputs = tf.reduce_sum(stacked_inputs * action_index, axis=-1)
inputs_alt = None
# Only works for 2 models.
if get_alt_inputs:
# Reverse of action_index.
action_index_alt = tf.one_hot(action, num_seqs, on_value=0.0, off_value=1.0)
inputs_alt = tf.reduce_sum(stacked_inputs * action_index_alt, axis=-1)
return inputs, inputs_alt
def select_state(previous_state, new_state, action):
"""Select state given action.
Currently only supports binary action. If action is 0, it means the state is
generated from the large model, and thus we will update the state. Otherwise,
if the action is 1, it means the state is generated from the small model, and
in interleaved model, we skip this state update.
Args:
previous_state: A state tuple representing state from previous step.
new_state: A state tuple representing newly computed state.
action: A tensor the same shape as state.
Returns:
A state tuple selected based on the given action.
"""
action = tf.cast(action, tf.float32)
state_c = previous_state[0] * action + new_state[0] * (1 - action)
state_h = previous_state[1] * action + new_state[1] * (1 - action)
return (state_c, state_h)
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for lstm_object_detection.lstm.rnn_decoder."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import numpy as np
from lstm_object_detection.lstm import rnn_decoder
class MockRnnCell(tf.contrib.rnn.RNNCell):
def __init__(self, input_size, num_units):
self._input_size = input_size
self._num_units = num_units
self._filter_size = [3, 3]
def __call__(self, inputs, state_tuple):
outputs = tf.concat([inputs, state_tuple[0]], axis=3)
new_state_tuple = (tf.multiply(state_tuple[0], 2), state_tuple[1])
return outputs, new_state_tuple
def state_size(self):
return self._num_units
def output_size(self):
return self._input_size + self._num_units
def pre_bottleneck(self, inputs, state, input_index):
with tf.variable_scope('bottleneck_%d' % input_index, reuse=tf.AUTO_REUSE):
inputs = tf.contrib.layers.separable_conv2d(
tf.concat([inputs, state], 3),
self._input_size,
self._filter_size,
depth_multiplier=1,
activation_fn=tf.nn.relu6,
normalizer_fn=None)
return inputs
class RnnDecoderTest(tf.test.TestCase):
def test_rnn_decoder_single_unroll(self):
batch_size = 2
num_unroll = 1
num_units = 64
width = 8
height = 10
input_channels = 128
initial_state = tf.random_normal((batch_size, width, height, num_units))
inputs = tf.random_normal([batch_size, width, height, input_channels])
rnn_cell = MockRnnCell(input_channels, num_units)
outputs, states = rnn_decoder.rnn_decoder(
decoder_inputs=[inputs] * num_unroll,
initial_state=(initial_state, initial_state),
cell=rnn_cell)
self.assertEqual(len(outputs), num_unroll)
self.assertEqual(len(states), num_unroll)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
results = sess.run((outputs, states, inputs, initial_state))
outputs_results = results[0]
states_results = results[1]
inputs_results = results[2]
initial_states_results = results[3]
self.assertEqual(outputs_results[0].shape,
(batch_size, width, height, input_channels + num_units))
self.assertAllEqual(
outputs_results[0],
np.concatenate((inputs_results, initial_states_results), axis=3))
self.assertEqual(states_results[0][0].shape,
(batch_size, width, height, num_units))
self.assertEqual(states_results[0][1].shape,
(batch_size, width, height, num_units))
self.assertAllEqual(states_results[0][0],
np.multiply(initial_states_results, 2.0))
self.assertAllEqual(states_results[0][1], initial_states_results)
def test_rnn_decoder_multiple_unroll(self):
batch_size = 2
num_unroll = 3
num_units = 64
width = 8
height = 10
input_channels = 128
initial_state = tf.random_normal((batch_size, width, height, num_units))
inputs = tf.random_normal([batch_size, width, height, input_channels])
rnn_cell = MockRnnCell(input_channels, num_units)
outputs, states = rnn_decoder.rnn_decoder(
decoder_inputs=[inputs] * num_unroll,
initial_state=(initial_state, initial_state),
cell=rnn_cell)
self.assertEqual(len(outputs), num_unroll)
self.assertEqual(len(states), num_unroll)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
results = sess.run((outputs, states, inputs, initial_state))
outputs_results = results[0]
states_results = results[1]
inputs_results = results[2]
initial_states_results = results[3]
for i in range(num_unroll):
previous_state = ([initial_states_results, initial_states_results]
if i == 0 else states_results[i - 1])
self.assertEqual(
outputs_results[i].shape,
(batch_size, width, height, input_channels + num_units))
self.assertAllEqual(
outputs_results[i],
np.concatenate((inputs_results, previous_state[0]), axis=3))
self.assertEqual(states_results[i][0].shape,
(batch_size, width, height, num_units))
self.assertEqual(states_results[i][1].shape,
(batch_size, width, height, num_units))
self.assertAllEqual(states_results[i][0],
np.multiply(previous_state[0], 2.0))
self.assertAllEqual(states_results[i][1], previous_state[1])
class MultiInputRnnDecoderTest(tf.test.TestCase):
def test_rnn_decoder_single_unroll(self):
batch_size = 2
num_unroll = 1
num_units = 12
width = 8
height = 10
input_channels_large = 24
input_channels_small = 12
bottleneck_channels = 20
initial_state_c = tf.random_normal((batch_size, width, height, num_units))
initial_state_h = tf.random_normal((batch_size, width, height, num_units))
initial_state = (initial_state_c, initial_state_h)
inputs_large = tf.random_normal(
[batch_size, width, height, input_channels_large])
inputs_small = tf.random_normal(
[batch_size, width, height, input_channels_small])
rnn_cell = MockRnnCell(bottleneck_channels, num_units)
outputs, states = rnn_decoder.multi_input_rnn_decoder(
decoder_inputs=[[inputs_large] * num_unroll,
[inputs_small] * num_unroll],
initial_state=initial_state,
cell=rnn_cell,
sequence_step=tf.zeros([batch_size]),
pre_bottleneck=True)
self.assertEqual(len(outputs), num_unroll)
self.assertEqual(len(states), num_unroll)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
results = sess.run(
(outputs, states, inputs_large, inputs_small, initial_state))
outputs_results = results[0]
states_results = results[1]
inputs_large_results = results[2]
inputs_small_results = results[3]
initial_states_results = results[4]
self.assertEqual(
outputs_results[0].shape,
(batch_size, width, height, bottleneck_channels + num_units))
self.assertEqual(states_results[0][0].shape,
(batch_size, width, height, num_units))
self.assertEqual(states_results[0][1].shape,
(batch_size, width, height, num_units))
# The first step should always update state.
self.assertAllEqual(states_results[0][0],
np.multiply(initial_states_results[0], 2))
self.assertAllEqual(states_results[0][1], initial_states_results[1])
def test_rnn_decoder_multiple_unroll(self):
batch_size = 2
num_unroll = 3
num_units = 12
width = 8
height = 10
input_channels_large = 24
input_channels_small = 12
bottleneck_channels = 20
initial_state_c = tf.random_normal((batch_size, width, height, num_units))
initial_state_h = tf.random_normal((batch_size, width, height, num_units))
initial_state = (initial_state_c, initial_state_h)
inputs_large = tf.random_normal(
[batch_size, width, height, input_channels_large])
inputs_small = tf.random_normal(
[batch_size, width, height, input_channels_small])
rnn_cell = MockRnnCell(bottleneck_channels, num_units)
outputs, states = rnn_decoder.multi_input_rnn_decoder(
decoder_inputs=[[inputs_large] * num_unroll,
[inputs_small] * num_unroll],
initial_state=initial_state,
cell=rnn_cell,
sequence_step=tf.zeros([batch_size]),
pre_bottleneck=True)
self.assertEqual(len(outputs), num_unroll)
self.assertEqual(len(states), num_unroll)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
results = sess.run(
(outputs, states, inputs_large, inputs_small, initial_state))
outputs_results = results[0]
states_results = results[1]
inputs_large_results = results[2]
inputs_small_results = results[3]
initial_states_results = results[4]
# The first step should always update state.
self.assertAllEqual(states_results[0][0],
np.multiply(initial_states_results[0], 2))
self.assertAllEqual(states_results[0][1], initial_states_results[1])
for i in range(num_unroll):
self.assertEqual(
outputs_results[i].shape,
(batch_size, width, height, bottleneck_channels + num_units))
self.assertEqual(states_results[i][0].shape,
(batch_size, width, height, num_units))
self.assertEqual(states_results[i][1].shape,
(batch_size, width, height, num_units))
def test_rnn_decoder_multiple_unroll_with_skip(self):
batch_size = 2
num_unroll = 5
num_units = 12
width = 8
height = 10
input_channels_large = 24
input_channels_small = 12
bottleneck_channels = 20
skip = 2
initial_state_c = tf.random_normal((batch_size, width, height, num_units))
initial_state_h = tf.random_normal((batch_size, width, height, num_units))
initial_state = (initial_state_c, initial_state_h)
inputs_large = tf.random_normal(
[batch_size, width, height, input_channels_large])
inputs_small = tf.random_normal(
[batch_size, width, height, input_channels_small])
rnn_cell = MockRnnCell(bottleneck_channels, num_units)
outputs, states = rnn_decoder.multi_input_rnn_decoder(
decoder_inputs=[[inputs_large] * num_unroll,
[inputs_small] * num_unroll],
initial_state=initial_state,
cell=rnn_cell,
sequence_step=tf.zeros([batch_size]),
pre_bottleneck=True,
selection_strategy='SKIP%d' % skip)
self.assertEqual(len(outputs), num_unroll)
self.assertEqual(len(states), num_unroll)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
results = sess.run(
(outputs, states, inputs_large, inputs_small, initial_state))
outputs_results = results[0]
states_results = results[1]
inputs_large_results = results[2]
inputs_small_results = results[3]
initial_states_results = results[4]
for i in range(num_unroll):
self.assertEqual(
outputs_results[i].shape,
(batch_size, width, height, bottleneck_channels + num_units))
self.assertEqual(states_results[i][0].shape,
(batch_size, width, height, num_units))
self.assertEqual(states_results[i][1].shape,
(batch_size, width, height, num_units))
previous_state = (
initial_states_results if i == 0 else states_results[i - 1])
# State only updates during key frames
if i % (skip + 1) == 0:
self.assertAllEqual(states_results[i][0],
np.multiply(previous_state[0], 2))
self.assertAllEqual(states_results[i][1], previous_state[1])
else:
self.assertAllEqual(states_results[i][0], previous_state[0])
self.assertAllEqual(states_results[i][1], previous_state[1])
if __name__ == '__main__':
tf.test.main()
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Quantization related ops for LSTM."""
from __future__ import absolute_import
from __future__ import division
import tensorflow as tf
from tensorflow.python.training import moving_averages
def _quant_var(
name,
initializer_val,
vars_collection=tf.GraphKeys.MOVING_AVERAGE_VARIABLES,
):
"""Create an var for storing the min/max quantization range."""
return tf.contrib.framework.model_variable(
name,
shape=[],
initializer=tf.constant_initializer(initializer_val),
collections=[vars_collection],
trainable=False)
def quantizable_concat(inputs,
axis,
is_training,
is_quantized=True,
default_min=0,
default_max=6,
ema_decay=0.999,
scope='quantized_concat'):
"""Concat replacement with quantization option.
Allows concat inputs to share the same min max ranges,
from experimental/gazelle/synthetic/model/tpu/utils.py.
Args:
inputs: list of tensors to concatenate.
axis: dimension along which to concatenate.
is_training: true if the graph is a training graph.
is_quantized: flag to enable/disable quantization.
default_min: default min value for fake quant op.
default_max: default max value for fake quant op.
ema_decay: the moving average decay for the quantization variables.
scope: Optional scope for variable_scope.
Returns:
Tensor resulting from concatenation of input tensors
"""
if is_quantized:
with tf.variable_scope(scope):
tf.logging.info('inputs: {}'.format(inputs))
for t in inputs:
tf.logging.info(t)
min_var = _quant_var('min', default_min)
max_var = _quant_var('max', default_max)
if not is_training:
# If we are building an eval graph just use the values in the variables.
quant_inputs = [
tf.fake_quant_with_min_max_vars(t, min_var, max_var) for t in inputs
]
tf.logging.info('min_val: {}'.format(min_var))
tf.logging.info('max_val: {}'.format(max_var))
else:
concat_tensors = tf.concat(inputs, axis=axis)
tf.logging.info('concat_tensors: {}'.format(concat_tensors))
# Otherwise we need to keep track of the moving averages of the min and
# of the elements of the input tensor max.
min_val = moving_averages.assign_moving_average(
min_var,
tf.reduce_min(concat_tensors),
ema_decay,
name='AssignMinEma')
max_val = moving_averages.assign_moving_average(
max_var,
tf.reduce_max(concat_tensors),
ema_decay,
name='AssignMaxEma')
tf.logging.info('min_val: {}'.format(min_val))
tf.logging.info('max_val: {}'.format(max_val))
quant_inputs = [
tf.fake_quant_with_min_max_vars(t, min_val, max_val) for t in inputs
]
tf.logging.info('quant_inputs: {}'.format(quant_inputs))
outputs = tf.concat(quant_inputs, axis=axis)
tf.logging.info('outputs: {}'.format(outputs))
else:
outputs = tf.concat(inputs, axis=axis)
return outputs
def quantizable_separable_conv2d(inputs,
num_outputs,
kernel_size,
is_quantized=True,
depth_multiplier=1,
stride=1,
activation_fn=tf.nn.relu6,
normalizer_fn=None,
scope=None):
"""Quantization friendly backward compatible separable conv2d.
This op has the same API is separable_conv2d. The main difference is that an
additional BiasAdd is manually inserted after the depthwise conv, such that
the depthwise bias will not have name conflict with pointwise bias. The
motivation of this op is that quantization script need BiasAdd in order to
recognize the op, in which a native call to separable_conv2d do not create
for the depthwise conv.
Args:
inputs: A tensor of size [batch_size, height, width, channels].
num_outputs: The number of pointwise convolution output filters. If is
None, then we skip the pointwise convolution stage.
kernel_size: A list of length 2: [kernel_height, kernel_width] of the
filters. Can be an int if both values are the same.
is_quantized: flag to enable/disable quantization.
depth_multiplier: The number of depthwise convolution output channels for
each input channel. The total number of depthwise convolution output
channels will be equal to num_filters_in * depth_multiplier.
stride: A list of length 2: [stride_height, stride_width], specifying the
depthwise convolution stride. Can be an int if both strides are the same.
activation_fn: Activation function. The default value is a ReLU function.
Explicitly set it to None to skip it and maintain a linear activation.
normalizer_fn: Normalization function to use instead of biases.
scope: Optional scope for variable_scope.
Returns:
Tensor resulting from concatenation of input tensors
"""
if is_quantized:
outputs = tf.contrib.layers.separable_conv2d(
inputs,
None,
kernel_size,
depth_multiplier=depth_multiplier,
stride=1,
activation_fn=None,
normalizer_fn=None,
biases_initializer=None,
scope=scope)
outputs = tf.contrib.layers.bias_add(
outputs, trainable=True, scope='%s_bias' % scope)
outputs = tf.contrib.layers.conv2d(
outputs,
num_outputs, [1, 1],
activation_fn=activation_fn,
stride=stride,
normalizer_fn=normalizer_fn,
scope=scope)
else:
outputs = tf.contrib.layers.separable_conv2d(
inputs,
num_outputs,
kernel_size,
depth_multiplier=depth_multiplier,
stride=stride,
activation_fn=activation_fn,
normalizer_fn=normalizer_fn,
scope=scope)
return outputs
def quantize_op(inputs,
is_training=True,
is_quantized=True,
default_min=0,
default_max=6,
ema_decay=0.999,
scope='quant'):
"""Inserts a fake quantization op after inputs.
Args:
inputs: A tensor of size [batch_size, height, width, channels].
is_training: true if the graph is a training graph.
is_quantized: flag to enable/disable quantization.
default_min: default min value for fake quant op.
default_max: default max value for fake quant op.
ema_decay: the moving average decay for the quantization variables.
scope: Optional scope for variable_scope.
Returns:
Tensor resulting from quantizing the input tensors.
"""
if is_quantized:
with tf.variable_scope(scope):
min_var = _quant_var('min', default_min)
max_var = _quant_var('max', default_max)
if is_training:
min_val = moving_averages.assign_moving_average(
min_var, tf.reduce_min(inputs), ema_decay, name='AssignMinEma')
max_val = moving_averages.assign_moving_average(
max_var, tf.reduce_max(inputs), ema_decay, name='AssignMaxEma')
inputs = tf.fake_quant_with_min_max_vars(inputs, min_val, max_val)
else:
inputs = tf.fake_quant_with_min_max_vars(inputs, min_var, max_var)
return inputs
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for lstm_object_detection.lstm.utils."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from lstm_object_detection.lstm import utils
class QuantizableUtilsTest(tf.test.TestCase):
def test_quantizable_concat_is_training(self):
inputs_1 = tf.zeros([4, 10, 10, 1], dtype=tf.float32)
inputs_2 = tf.ones([4, 10, 10, 2], dtype=tf.float32)
concat_in_train = utils.quantizable_concat([inputs_1, inputs_2],
axis=3,
is_training=True)
self.assertAllEqual([4, 10, 10, 3], concat_in_train.shape.as_list())
self._check_min_max_ema(tf.get_default_graph())
self._check_min_max_vars(tf.get_default_graph())
def test_quantizable_concat_inference(self):
inputs_1 = tf.zeros([4, 10, 10, 1], dtype=tf.float32)
inputs_2 = tf.ones([4, 10, 10, 2], dtype=tf.float32)
concat_in_train = utils.quantizable_concat([inputs_1, inputs_2],
axis=3,
is_training=False)
self.assertAllEqual([4, 10, 10, 3], concat_in_train.shape.as_list())
self._check_no_min_max_ema(tf.get_default_graph())
self._check_min_max_vars(tf.get_default_graph())
def test_quantizable_concat_not_quantized_is_training(self):
inputs_1 = tf.zeros([4, 10, 10, 1], dtype=tf.float32)
inputs_2 = tf.ones([4, 10, 10, 2], dtype=tf.float32)
concat_in_train = utils.quantizable_concat([inputs_1, inputs_2],
axis=3,
is_training=True,
is_quantized=False)
self.assertAllEqual([4, 10, 10, 3], concat_in_train.shape.as_list())
self._check_no_min_max_ema(tf.get_default_graph())
self._check_no_min_max_vars(tf.get_default_graph())
def test_quantizable_concat_not_quantized_inference(self):
inputs_1 = tf.zeros([4, 10, 10, 1], dtype=tf.float32)
inputs_2 = tf.ones([4, 10, 10, 2], dtype=tf.float32)
concat_in_train = utils.quantizable_concat([inputs_1, inputs_2],
axis=3,
is_training=False,
is_quantized=False)
self.assertAllEqual([4, 10, 10, 3], concat_in_train.shape.as_list())
self._check_no_min_max_ema(tf.get_default_graph())
self._check_no_min_max_vars(tf.get_default_graph())
def test_quantize_op_is_training(self):
inputs = tf.zeros([4, 10, 10, 128], dtype=tf.float32)
outputs = utils.quantize_op(inputs)
self.assertAllEqual(inputs.shape.as_list(), outputs.shape.as_list())
self._check_min_max_ema(tf.get_default_graph())
self._check_min_max_vars(tf.get_default_graph())
def test_quantize_op_inferene(self):
inputs = tf.zeros([4, 10, 10, 128], dtype=tf.float32)
outputs = utils.quantize_op(inputs, is_training=False)
self.assertAllEqual(inputs.shape.as_list(), outputs.shape.as_list())
self._check_no_min_max_ema(tf.get_default_graph())
self._check_min_max_vars(tf.get_default_graph())
def _check_min_max_vars(self, graph):
op_types = [op.type for op in graph.get_operations()]
self.assertTrue(
any('FakeQuantWithMinMaxVars' in op_type for op_type in op_types))
def _check_min_max_ema(self, graph):
op_names = [op.name for op in graph.get_operations()]
self.assertTrue(any('AssignMinEma' in name for name in op_names))
self.assertTrue(any('AssignMaxEma' in name for name in op_names))
def _check_no_min_max_vars(self, graph):
op_types = [op.type for op in graph.get_operations()]
self.assertFalse(
any('FakeQuantWithMinMaxVars' in op_type for op_type in op_types))
def _check_no_min_max_ema(self, graph):
op_names = [op.name for op in graph.get_operations()]
self.assertFalse(any('AssignMinEma' in name for name in op_names))
self.assertFalse(any('AssignMaxEma' in name for name in op_names))
class QuantizableSeparableConv2dTest(tf.test.TestCase):
def test_quantizable_separable_conv2d(self):
inputs = tf.zeros([4, 10, 10, 128], dtype=tf.float32)
num_outputs = 64
kernel_size = [3, 3]
scope = 'QuantSeparable'
outputs = utils.quantizable_separable_conv2d(
inputs, num_outputs, kernel_size, scope=scope)
self.assertAllEqual([4, 10, 10, num_outputs], outputs.shape.as_list())
self._check_depthwise_bias_add(tf.get_default_graph(), scope)
def test_quantizable_separable_conv2d_not_quantized(self):
inputs = tf.zeros([4, 10, 10, 128], dtype=tf.float32)
num_outputs = 64
kernel_size = [3, 3]
scope = 'QuantSeparable'
outputs = utils.quantizable_separable_conv2d(
inputs, num_outputs, kernel_size, is_quantized=False, scope=scope)
self.assertAllEqual([4, 10, 10, num_outputs], outputs.shape.as_list())
self._check_no_depthwise_bias_add(tf.get_default_graph(), scope)
def _check_depthwise_bias_add(self, graph, scope):
op_names = [op.name for op in graph.get_operations()]
self.assertTrue(
any('%s_bias/BiasAdd' % scope in name for name in op_names))
def _check_no_depthwise_bias_add(self, graph, scope):
op_names = [op.name for op in graph.get_operations()]
self.assertFalse(
any('%s_bias/BiasAdd' % scope in name for name in op_names))
if __name__ == '__main__':
tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment