Commit 22e20f84 authored by Skirmantas Kligys's avatar Skirmantas Kligys Committed by Yongzhe Wang
Browse files

Push CLs 283653640, 284011539 (#7921)

* Manually quantize selected inputs before summing them up.

PiperOrigin-RevId: 283653640

* Fix messy variable paths in LSTD head.

With a VariableScope passed as parameter into tf.variable_scope(),
TF gets all confused and starts placing variables into scopes different
from operations (variable_scope != name_scope).  That leads to very messy
TF graphs.

Before CL:
  FeatureExtractor/LSTM/conv_lstm_cell/bottleneck_0/depthwise_weights
  FeatureExtractor/LSTM/LSTM/conv_lstm_cell/bottleneck_0/separable_conv2d/depthwise

After CL:
  FeatureExtractor/LSTM/conv_lstm_cell/bottleneck_0/depthwise_weights
  FeatureExtractor/LSTM/conv_lstm_cell/bottleneck_0/separable_conv2d/depthwise

PiperOrigin-RevId: 284011539
parent 8a8baded
...@@ -16,6 +16,16 @@ ...@@ -16,6 +16,16 @@
"""Custom RNN decoder.""" """Custom RNN decoder."""
import tensorflow as tf import tensorflow as tf
import lstm_object_detection.lstm.utils as lstm_utils
class _NoVariableScope(object):
def __enter__(self):
return
def __exit__(self, exc_type, exc_value, traceback):
return False
def rnn_decoder(decoder_inputs, def rnn_decoder(decoder_inputs,
...@@ -38,7 +48,7 @@ def rnn_decoder(decoder_inputs, ...@@ -38,7 +48,7 @@ def rnn_decoder(decoder_inputs,
* prev is a 2D Tensor of shape [batch_size x output_size], * prev is a 2D Tensor of shape [batch_size x output_size],
* i is an integer, the step number (when advanced control is needed), * i is an integer, the step number (when advanced control is needed),
* next is a 2D Tensor of shape [batch_size x input_size]. * next is a 2D Tensor of shape [batch_size x input_size].
scope: VariableScope for the created subgraph; defaults to "rnn_decoder". scope: optional VariableScope for the created subgraph.
Returns: Returns:
A tuple of the form (outputs, state), where: A tuple of the form (outputs, state), where:
outputs: A list of the same length as decoder_inputs of 4D Tensors with outputs: A list of the same length as decoder_inputs of 4D Tensors with
...@@ -47,7 +57,7 @@ def rnn_decoder(decoder_inputs, ...@@ -47,7 +57,7 @@ def rnn_decoder(decoder_inputs,
cell at each time-step. It is a 2D Tensor of shape cell at each time-step. It is a 2D Tensor of shape
[batch_size x cell.state_size]. [batch_size x cell.state_size].
""" """
with tf.variable_scope(scope or 'rnn_decoder'): with tf.variable_scope(scope) if scope else _NoVariableScope():
state_tuple = initial_state state_tuple = initial_state
outputs = [] outputs = []
states = [] states = []
...@@ -100,7 +110,7 @@ def multi_input_rnn_decoder(decoder_inputs, ...@@ -100,7 +110,7 @@ def multi_input_rnn_decoder(decoder_inputs,
Useful when input sequences have differing numbers of channels. Final Useful when input sequences have differing numbers of channels. Final
bottlenecks will have the same dimension. bottlenecks will have the same dimension.
flatten_state: Whether the LSTM state is flattened. flatten_state: Whether the LSTM state is flattened.
scope: VariableScope for the created subgraph; defaults to "rnn_decoder". scope: optional VariableScope for the created subgraph.
Returns: Returns:
A tuple of the form (outputs, state), where: A tuple of the form (outputs, state), where:
outputs: A list of the same length as decoder_inputs of 2D Tensors with outputs: A list of the same length as decoder_inputs of 2D Tensors with
...@@ -114,7 +124,7 @@ def multi_input_rnn_decoder(decoder_inputs, ...@@ -114,7 +124,7 @@ def multi_input_rnn_decoder(decoder_inputs,
""" """
if flatten_state and len(decoder_inputs[0]) > 1: if flatten_state and len(decoder_inputs[0]) > 1:
raise ValueError('In export mode, unroll length should not be more than 1') raise ValueError('In export mode, unroll length should not be more than 1')
with tf.variable_scope(scope or 'rnn_decoder'): with tf.variable_scope(scope) if scope else _NoVariableScope():
state_tuple = initial_state state_tuple = initial_state
outputs = [] outputs = []
states = [] states = []
...@@ -136,7 +146,9 @@ def multi_input_rnn_decoder(decoder_inputs, ...@@ -136,7 +146,9 @@ def multi_input_rnn_decoder(decoder_inputs,
action = generate_action(selection_strategy, local_step, sequence_step, action = generate_action(selection_strategy, local_step, sequence_step,
[batch_size, 1, 1, 1]) [batch_size, 1, 1, 1])
inputs, _ = select_inputs(decoder_inputs, action, local_step) inputs, _ = (
select_inputs(decoder_inputs, action, local_step, is_training,
is_quantized))
# Mark base network endpoints under raw_inputs/ # Mark base network endpoints under raw_inputs/
with tf.name_scope(None): with tf.name_scope(None):
inputs = tf.identity(inputs, 'raw_inputs/base_endpoint') inputs = tf.identity(inputs, 'raw_inputs/base_endpoint')
...@@ -189,7 +201,8 @@ def generate_action(selection_strategy, local_step, sequence_step, ...@@ -189,7 +201,8 @@ def generate_action(selection_strategy, local_step, sequence_step,
return tf.cast(action, tf.int32) return tf.cast(action, tf.int32)
def select_inputs(decoder_inputs, action, local_step, get_alt_inputs=False): def select_inputs(decoder_inputs, action, local_step, is_training, is_quantized,
get_alt_inputs=False):
"""Selects sequence from decoder_inputs based on 1D actions. """Selects sequence from decoder_inputs based on 1D actions.
Given multiple input batches, creates a single output batch by Given multiple input batches, creates a single output batch by
...@@ -199,7 +212,10 @@ def select_inputs(decoder_inputs, action, local_step, get_alt_inputs=False): ...@@ -199,7 +212,10 @@ def select_inputs(decoder_inputs, action, local_step, get_alt_inputs=False):
decoder_inputs: A 2-D list of tensor inputs. decoder_inputs: A 2-D list of tensor inputs.
action: A tensor of shape [batch_size]. Each element corresponds to an index action: A tensor of shape [batch_size]. Each element corresponds to an index
of decoder_inputs to choose. of decoder_inputs to choose.
step: The current timestep. local_step: The current timestep.
is_training: boolean, whether the network is training. When using learned
selection, attempts exploration if training.
is_quantized: flag to enable/disable quantization mode.
get_alt_inputs: Whether the non-chosen inputs should also be returned. get_alt_inputs: Whether the non-chosen inputs should also be returned.
Returns: Returns:
...@@ -216,13 +232,19 @@ def select_inputs(decoder_inputs, action, local_step, get_alt_inputs=False): ...@@ -216,13 +232,19 @@ def select_inputs(decoder_inputs, action, local_step, get_alt_inputs=False):
[decoder_inputs[seq_index][local_step] for seq_index in range(num_seqs)], [decoder_inputs[seq_index][local_step] for seq_index in range(num_seqs)],
axis=-1) axis=-1)
action_index = tf.one_hot(action, num_seqs) action_index = tf.one_hot(action, num_seqs)
inputs = tf.reduce_sum(stacked_inputs * action_index, axis=-1) selected_inputs = (
lstm_utils.quantize_op(stacked_inputs * action_index, is_training,
is_quantized, scope='quant_selected_inputs'))
inputs = tf.reduce_sum(selected_inputs, axis=-1)
inputs_alt = None inputs_alt = None
# Only works for 2 models. # Only works for 2 models.
if get_alt_inputs: if get_alt_inputs:
# Reverse of action_index. # Reverse of action_index.
action_index_alt = tf.one_hot(action, num_seqs, on_value=0.0, off_value=1.0) action_index_alt = tf.one_hot(action, num_seqs, on_value=0.0, off_value=1.0)
inputs_alt = tf.reduce_sum(stacked_inputs * action_index_alt, axis=-1) selected_inputs = (
lstm_utils.quantize_op(stacked_inputs * action_index_alt, is_training,
is_quantized, scope='quant_selected_inputs_alt'))
inputs_alt = tf.reduce_sum(selected_inputs, axis=-1)
return inputs, inputs_alt return inputs, inputs_alt
def select_state(previous_state, new_state, action): def select_state(previous_state, new_state, action):
......
...@@ -241,7 +241,7 @@ class LSTMSSDInterleavedMobilenetV2FeatureExtractor( ...@@ -241,7 +241,7 @@ class LSTMSSDInterleavedMobilenetV2FeatureExtractor(
'not equal!') 'not equal!')
with slim.arg_scope(self._conv_hyperparams_fn()): with slim.arg_scope(self._conv_hyperparams_fn()):
with tf.variable_scope('LSTM', reuse=self._reuse_weights) as lstm_scope: with tf.variable_scope('LSTM', reuse=self._reuse_weights):
output_size = (large_base_feature_shape[1], large_base_feature_shape[2]) output_size = (large_base_feature_shape[1], large_base_feature_shape[2])
lstm_cell, init_state, step = self.create_lstm_cell( lstm_cell, init_state, step = self.create_lstm_cell(
batch_size, output_size, state_saver, state_name) batch_size, output_size, state_saver, state_name)
...@@ -257,9 +257,10 @@ class LSTMSSDInterleavedMobilenetV2FeatureExtractor( ...@@ -257,9 +257,10 @@ class LSTMSSDInterleavedMobilenetV2FeatureExtractor(
step, step,
selection_strategy=self._interleave_method, selection_strategy=self._interleave_method,
is_training=self._is_training, is_training=self._is_training,
is_quantized=self._is_quantized,
pre_bottleneck=self._pre_bottleneck, pre_bottleneck=self._pre_bottleneck,
flatten_state=self._flatten_state, flatten_state=self._flatten_state,
scope=lstm_scope) scope=None)
self._states_out = states_out self._states_out = states_out
batcher_ops = None batcher_ops = None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment