Commit b1025b3b authored by syiming's avatar syiming
Browse files

Merge remote-tracking branch 'upstream/master' into fasterrcnn_fpn_keras_feature_extractor

parents 69ce1c45 e9df75ab
......@@ -41,12 +41,13 @@ class BatchTimestamp(object):
class TimeHistory(tf.keras.callbacks.Callback):
"""Callback for Keras models."""
def __init__(self, batch_size, log_steps, logdir=None):
def __init__(self, batch_size, log_steps, initial_step=0, logdir=None):
"""Callback for logging performance.
Args:
batch_size: Total batch size.
log_steps: Interval of steps between logging of batch level stats.
initial_step: Optional, initial step.
logdir: Optional directory to write TensorBoard summaries.
"""
# TODO(wcromar): remove this parameter and rely on `logs` parameter of
......@@ -54,8 +55,8 @@ class TimeHistory(tf.keras.callbacks.Callback):
self.batch_size = batch_size
super(TimeHistory, self).__init__()
self.log_steps = log_steps
self.last_log_step = 0
self.steps_before_epoch = 0
self.last_log_step = initial_step
self.steps_before_epoch = initial_step
self.steps_in_epoch = 0
self.start_time = None
......
......@@ -100,6 +100,9 @@ class DatasetConfig(base_config.Config):
skip_decoding: Whether to skip image decoding when loading from TFDS.
cache: whether to cache to dataset examples. Can be used to avoid re-reading
from disk on the second epoch. Requires significant memory overhead.
tf_data_service: The URI of a tf.data service to offload preprocessing onto
during training. The URI should be in the format "protocol://address",
e.g. "grpc://tf-data-service:5050".
mean_subtract: whether or not to apply mean subtraction to the dataset.
standardize: whether or not to apply standardization to the dataset.
"""
......@@ -123,6 +126,7 @@ class DatasetConfig(base_config.Config):
file_shuffle_buffer_size: int = 1024
skip_decoding: bool = True
cache: bool = False
tf_data_service: Optional[str] = None
mean_subtract: bool = False
standardize: bool = False
......@@ -449,6 +453,18 @@ class DatasetBuilder:
# Prefetch overlaps in-feed with training
dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
if self.config.tf_data_service:
if not hasattr(tf.data.experimental, 'service'):
raise ValueError('The tf_data_service flag requires Tensorflow version '
'>= 2.3.0, but the version is {}'.format(
tf.__version__))
dataset = dataset.apply(
tf.data.experimental.service.distribute(
processing_mode='parallel_epochs',
service=self.config.tf_data_service,
job_name='resnet_train'))
dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
return dataset
def parse_record(self, record: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]:
......
......@@ -25,7 +25,6 @@ from delf.protos import delf_config_pb2
from delf.protos import feature_pb2
from delf.python import box_io
from delf.python import datum_io
from delf.python import delf_v1
from delf.python import feature_aggregation_extractor
from delf.python import feature_aggregation_similarity
from delf.python import feature_extractor
......
......@@ -20,11 +20,14 @@ from __future__ import print_function
import os
from absl import flags
import numpy as np
import tensorflow as tf
from delf import box_io
FLAGS = flags.FLAGS
class BoxesIoTest(tf.test.TestCase):
......@@ -57,8 +60,7 @@ class BoxesIoTest(tf.test.TestCase):
def testWriteAndReadToFile(self):
boxes, scores, class_indices = self._create_data()
tmpdir = tf.compat.v1.test.get_temp_dir()
filename = os.path.join(tmpdir, 'test.boxes')
filename = os.path.join(FLAGS.test_tmpdir, 'test.boxes')
box_io.WriteToFile(filename, boxes, scores, class_indices)
data_read = box_io.ReadFromFile(filename)
......@@ -67,8 +69,7 @@ class BoxesIoTest(tf.test.TestCase):
self.assertAllEqual(class_indices, data_read[2])
def testWriteAndReadToFileEmptyFile(self):
tmpdir = tf.compat.v1.test.get_temp_dir()
filename = os.path.join(tmpdir, 'test.box')
filename = os.path.join(FLAGS.test_tmpdir, 'test.box')
box_io.WriteToFile(filename, np.array([]), np.array([]), np.array([]))
data_read = box_io.ReadFromFile(filename)
......
......@@ -20,11 +20,14 @@ from __future__ import print_function
import os
from absl import flags
import numpy as np
import tensorflow as tf
from delf import datum_io
FLAGS = flags.FLAGS
class DatumIoTest(tf.test.TestCase):
......@@ -69,8 +72,7 @@ class DatumIoTest(tf.test.TestCase):
def testWriteAndReadToFile(self):
data = np.array([[[-1.0, 125.0, -2.5], [14.5, 3.5, 0.0]],
[[20.0, 0.0, 30.0], [25.5, 36.0, 42.0]]])
tmpdir = tf.compat.v1.test.get_temp_dir()
filename = os.path.join(tmpdir, 'test.datum')
filename = os.path.join(FLAGS.test_tmpdir, 'test.datum')
datum_io.WriteToFile(data, filename)
data_read = datum_io.ReadFromFile(filename)
self.assertAllEqual(data_read, data)
......@@ -84,8 +86,7 @@ class DatumIoTest(tf.test.TestCase):
data_2 = np.array(
[[[255, 0, 5], [10, 300, 0]], [[20, 1, 100], [255, 360, 420]]],
dtype='uint32')
tmpdir = tf.compat.v1.test.get_temp_dir()
filename = os.path.join(tmpdir, 'test.datum_pair')
filename = os.path.join(FLAGS.test_tmpdir, 'test.datum_pair')
datum_io.WritePairToFile(data_1, data_2, filename)
data_read_1, data_read_2 = datum_io.ReadPairFromFile(filename)
self.assertAllEqual(data_read_1, data_1)
......
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""DELF model implementation based on the following paper.
Large-Scale Image Retrieval with Attentive Deep Local Features
https://arxiv.org/abs/1612.06321
Please refer to the README.md file for detailed explanations on using the DELF
model.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tf_slim import layers
from tf_slim.nets import resnet_v1
from tf_slim.ops.arg_scope import arg_scope
_SUPPORTED_TARGET_LAYER = ['resnet_v1_50/block3', 'resnet_v1_50/block4']
# The variable scope for the attention portion of the model.
_ATTENTION_VARIABLE_SCOPE = 'attention_block'
# The attention_type determines whether the attention based feature aggregation
# is performed on the L2-normalized feature map or on the default feature map
# where L2-normalization is not applied. Note that in both cases, attention
# functions are built on the un-normalized feature map. This is only relevant
# for the training stage.
# Currently supported options are as follows:
# * use_l2_normalized_feature:
# The option use_l2_normalized_feature first applies L2-normalization on the
# feature map and then applies attention based feature aggregation. This
# option is used for the DELF+FT+Att model in the paper.
# * use_default_input_feature:
# The option use_default_input_feature aggregates unnormalized feature map
# directly.
_SUPPORTED_ATTENTION_TYPES = [
'use_l2_normalized_feature', 'use_default_input_feature'
]
# Supported types of non-lineary for the attention score function.
_SUPPORTED_ATTENTION_NONLINEARITY = ['softplus']
class DelfV1(object):
"""Creates a DELF model.
Args:
target_layer_type: The name of target CNN architecture and its layer.
Raises:
ValueError: If an unknown target_layer_type is provided.
"""
def __init__(self, target_layer_type=_SUPPORTED_TARGET_LAYER[0]):
print('Creating model %s ' % target_layer_type)
self._target_layer_type = target_layer_type
if self._target_layer_type not in _SUPPORTED_TARGET_LAYER:
raise ValueError('Unknown model type.')
@property
def target_layer_type(self):
return self._target_layer_type
def _PerformAttention(self,
attention_feature_map,
feature_map,
attention_nonlinear,
kernel=1):
"""Helper function to construct the attention part of the model.
Computes attention score map and aggregates the input feature map based on
the attention score map.
Args:
attention_feature_map: Potentially normalized feature map that will be
aggregated with attention score map.
feature_map: Unnormalized feature map that will be used to compute
attention score map.
attention_nonlinear: Type of non-linearity that will be applied to
attention value.
kernel: Convolutional kernel to use in attention layers (eg: 1, [3, 3]).
Returns:
attention_feat: Aggregated feature vector.
attention_prob: Attention score map after the non-linearity.
attention_score: Attention score map before the non-linearity.
Raises:
ValueError: If unknown attention non-linearity type is provided.
"""
with tf.compat.v1.variable_scope(
'attention', values=[attention_feature_map, feature_map]):
with tf.compat.v1.variable_scope('compute', values=[feature_map]):
activation_fn_conv1 = tf.nn.relu
feature_map_conv1 = layers.conv2d(
feature_map,
512,
kernel,
rate=1,
activation_fn=activation_fn_conv1,
scope='conv1')
attention_score = layers.conv2d(
feature_map_conv1,
1,
kernel,
rate=1,
activation_fn=None,
normalizer_fn=None,
scope='conv2')
# Set activation of conv2 layer of attention model.
with tf.compat.v1.variable_scope(
'merge', values=[attention_feature_map, attention_score]):
if attention_nonlinear not in _SUPPORTED_ATTENTION_NONLINEARITY:
raise ValueError('Unknown attention non-linearity.')
if attention_nonlinear == 'softplus':
with tf.compat.v1.variable_scope(
'softplus_attention',
values=[attention_feature_map, attention_score]):
attention_prob = tf.nn.softplus(attention_score)
attention_feat = tf.reduce_mean(
tf.multiply(attention_feature_map, attention_prob), [1, 2])
attention_feat = tf.expand_dims(tf.expand_dims(attention_feat, 1), 2)
return attention_feat, attention_prob, attention_score
def _GetAttentionSubnetwork(
self,
feature_map,
end_points,
attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0],
attention_type=_SUPPORTED_ATTENTION_TYPES[0],
kernel=1,
reuse=False):
"""Constructs the part of the model performing attention.
Args:
feature_map: A tensor of size [batch, height, width, channels]. Usually it
corresponds to the output feature map of a fully-convolutional network.
end_points: Set of activations of the network constructed so far.
attention_nonlinear: Type of non-linearity on top of the attention
function.
attention_type: Type of the attention structure.
kernel: Convolutional kernel to use in attention layers (eg, [3, 3]).
reuse: Whether or not the layer and its variables should be reused.
Returns:
prelogits: A tensor of size [batch, 1, 1, channels].
attention_prob: Attention score after the non-linearity.
attention_score: Attention score before the non-linearity.
end_points: Updated set of activations, for external use.
Raises:
ValueError: If unknown attention_type is provided.
"""
with tf.compat.v1.variable_scope(
_ATTENTION_VARIABLE_SCOPE,
values=[feature_map, end_points],
reuse=reuse):
if attention_type not in _SUPPORTED_ATTENTION_TYPES:
raise ValueError('Unknown attention_type.')
if attention_type == 'use_l2_normalized_feature':
attention_feature_map = tf.nn.l2_normalize(
feature_map, 3, name='l2_normalize')
elif attention_type == 'use_default_input_feature':
attention_feature_map = feature_map
end_points['attention_feature_map'] = attention_feature_map
attention_outputs = self._PerformAttention(attention_feature_map,
feature_map,
attention_nonlinear, kernel)
prelogits, attention_prob, attention_score = attention_outputs
end_points['prelogits'] = prelogits
end_points['attention_prob'] = attention_prob
end_points['attention_score'] = attention_score
return prelogits, attention_prob, attention_score, end_points
def GetResnet50Subnetwork(self,
images,
is_training=False,
global_pool=False,
reuse=None):
"""Constructs resnet_v1_50 part of the DELF model.
Args:
images: A tensor of size [batch, height, width, channels].
is_training: Whether or not the model is in training mode.
global_pool: If True, perform global average pooling after feature
extraction. This may be useful for DELF's descriptor fine-tuning stage.
reuse: Whether or not the layer and its variables should be reused.
Returns:
net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
If global_pool is True, height_out = width_out = 1.
end_points: A set of activations for external use.
"""
block = resnet_v1.resnet_v1_block
blocks = [
block('block1', base_depth=64, num_units=3, stride=2),
block('block2', base_depth=128, num_units=4, stride=2),
block('block3', base_depth=256, num_units=6, stride=2),
]
if self._target_layer_type == 'resnet_v1_50/block4':
blocks.append(block('block4', base_depth=512, num_units=3, stride=1))
net, end_points = resnet_v1.resnet_v1(
images,
blocks,
is_training=is_training,
global_pool=global_pool,
reuse=reuse,
scope='resnet_v1_50')
return net, end_points
def GetAttentionPrelogit(
self,
images,
weight_decay=0.0001,
attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0],
attention_type=_SUPPORTED_ATTENTION_TYPES[0],
kernel=1,
training_resnet=False,
training_attention=False,
reuse=False,
use_batch_norm=True):
"""Constructs attention model on resnet_v1_50.
Args:
images: A tensor of size [batch, height, width, channels].
weight_decay: The parameters for weight_decay regularizer.
attention_nonlinear: Type of non-linearity on top of the attention
function.
attention_type: Type of the attention structure.
kernel: Convolutional kernel to use in attention layers (eg, [3, 3]).
training_resnet: Whether or not the Resnet blocks from the model are in
training mode.
training_attention: Whether or not the attention part of the model is in
training mode.
reuse: Whether or not the layer and its variables should be reused.
use_batch_norm: Whether or not to use batch normalization.
Returns:
prelogits: A tensor of size [batch, 1, 1, channels].
attention_prob: Attention score after the non-linearity.
attention_score: Attention score before the non-linearity.
feature_map: Features extracted from the model, which are not
l2-normalized.
end_points: Set of activations for external use.
"""
# Construct Resnet50 features.
with arg_scope(resnet_v1.resnet_arg_scope(use_batch_norm=use_batch_norm)):
_, end_points = self.GetResnet50Subnetwork(
images, is_training=training_resnet, reuse=reuse)
feature_map = end_points[self._target_layer_type]
# Construct attention subnetwork on top of features.
with arg_scope(
resnet_v1.resnet_arg_scope(
weight_decay=weight_decay, use_batch_norm=use_batch_norm)):
with arg_scope([layers.batch_norm], is_training=training_attention):
(prelogits, attention_prob, attention_score,
end_points) = self._GetAttentionSubnetwork(
feature_map,
end_points,
attention_nonlinear=attention_nonlinear,
attention_type=attention_type,
kernel=kernel,
reuse=reuse)
return prelogits, attention_prob, attention_score, feature_map, end_points
def _GetAttentionModel(
self,
images,
num_classes,
weight_decay=0.0001,
attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0],
attention_type=_SUPPORTED_ATTENTION_TYPES[0],
kernel=1,
training_resnet=False,
training_attention=False,
reuse=False):
"""Constructs attention model on resnet_v1_50.
Args:
images: A tensor of size [batch, height, width, channels]
num_classes: The number of output classes.
weight_decay: The parameters for weight_decay regularizer.
attention_nonlinear: Type of non-linearity on top of the attention
function.
attention_type: Type of the attention structure.
kernel: Convolutional kernel to use in attention layers (eg, [3, 3]).
training_resnet: Whether or not the Resnet blocks from the model are in
training mode.
training_attention: Whether or not the attention part of the model is in
training mode.
reuse: Whether or not the layer and its variables should be reused.
Returns:
logits: A tensor of size [batch, num_classes].
attention_prob: Attention score after the non-linearity.
attention_score: Attention score before the non-linearity.
feature_map: Features extracted from the model, which are not
l2-normalized.
"""
attention_feat, attention_prob, attention_score, feature_map, _ = (
self.GetAttentionPrelogit(
images,
weight_decay,
attention_nonlinear=attention_nonlinear,
attention_type=attention_type,
kernel=kernel,
training_resnet=training_resnet,
training_attention=training_attention,
reuse=reuse))
with arg_scope(
resnet_v1.resnet_arg_scope(
weight_decay=weight_decay, batch_norm_scale=True)):
with arg_scope([layers.batch_norm], is_training=training_attention):
with tf.compat.v1.variable_scope(
_ATTENTION_VARIABLE_SCOPE, values=[attention_feat], reuse=reuse):
logits = layers.conv2d(
attention_feat,
num_classes, [1, 1],
activation_fn=None,
normalizer_fn=None,
scope='logits')
logits = tf.squeeze(logits, [1, 2], name='spatial_squeeze')
return logits, attention_prob, attention_score, feature_map
def AttentionModel(self,
images,
num_classes,
weight_decay=0.0001,
attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0],
attention_type=_SUPPORTED_ATTENTION_TYPES[0],
kernel=1,
training_resnet=False,
training_attention=False,
reuse=False):
"""Constructs attention based classification model for training.
Args:
images: A tensor of size [batch, height, width, channels]
num_classes: The number of output classes.
weight_decay: The parameters for weight_decay regularizer.
attention_nonlinear: Type of non-linearity on top of the attention
function.
attention_type: Type of the attention structure.
kernel: Convolutional kernel to use in attention layers (eg, [3, 3]).
training_resnet: Whether or not the Resnet blocks from the model are in
training mode.
training_attention: Whether or not the model is in training mode. Note
that this function only supports training the attention part of the
model, ie, the feature extraction layers are not trained.
reuse: Whether or not the layer and its variables should be reused.
Returns:
logit: A tensor of size [batch, num_classes]
attention: Attention score after the non-linearity.
feature_map: Features extracted from the model, which are not
l2-normalized.
Raises:
ValueError: If unknown target_layer_type is provided.
"""
if 'resnet_v1_50' in self._target_layer_type:
net_outputs = self._GetAttentionModel(
images,
num_classes,
weight_decay,
attention_nonlinear=attention_nonlinear,
attention_type=attention_type,
kernel=kernel,
training_resnet=training_resnet,
training_attention=training_attention,
reuse=reuse)
logits, attention, _, feature_map = net_outputs
else:
raise ValueError('Unknown target_layer_type.')
return logits, attention, feature_map
......@@ -124,71 +124,70 @@ def ExtractAggregatedRepresentationsToFiles(image_names, features_dir,
if not tf.io.gfile.exists(output_aggregation_dir):
tf.io.gfile.makedirs(output_aggregation_dir)
with tf.compat.v1.Session() as sess:
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
sess, config)
start = time.clock()
for i in range(num_images):
if i == 0:
print('Starting to extract aggregation from images...')
elif i % _STATUS_CHECK_ITERATIONS == 0:
elapsed = (time.clock() - start)
print('Processing image %d out of %d, last %d '
'images took %f seconds' %
(i, num_images, _STATUS_CHECK_ITERATIONS, elapsed))
start = time.clock()
image_name = image_names[i]
# Compose output file name, skip extraction for this image if it already
# exists.
output_aggregation_filename = os.path.join(output_aggregation_dir,
image_name + output_extension)
if tf.io.gfile.exists(output_aggregation_filename):
print('Skipping %s' % image_name)
continue
# Load DELF features.
if config.use_regional_aggregation:
if not mapping_path:
raise ValueError(
'Requested regional aggregation, but mapping_path was not '
'provided')
descriptors_list = []
num_features_per_box = []
for box_feature_file in images_to_box_feature_files[image_name]:
delf_filename = os.path.join(features_dir,
box_feature_file + _DELF_EXTENSION)
_, _, box_descriptors, _, _ = feature_io.ReadFromFile(delf_filename)
# If `box_descriptors` is empty, reshape it such that it can be
# concatenated with other descriptors.
if not box_descriptors.shape[0]:
box_descriptors = np.reshape(box_descriptors,
[0, config.feature_dimensionality])
descriptors_list.append(box_descriptors)
num_features_per_box.append(box_descriptors.shape[0])
descriptors = np.concatenate(descriptors_list)
else:
input_delf_filename = os.path.join(features_dir,
image_name + _DELF_EXTENSION)
_, _, descriptors, _, _ = feature_io.ReadFromFile(input_delf_filename)
# If `descriptors` is empty, reshape it to avoid extraction failure.
if not descriptors.shape[0]:
descriptors = np.reshape(descriptors,
[0, config.feature_dimensionality])
num_features_per_box = None
# Extract and save aggregation. If using VLAD, only
# `aggregated_descriptors` needs to be saved.
(aggregated_descriptors,
feature_visual_words) = extractor.Extract(descriptors,
num_features_per_box)
if config.aggregation_type == _VLAD:
datum_io.WriteToFile(aggregated_descriptors,
output_aggregation_filename)
else:
datum_io.WritePairToFile(aggregated_descriptors,
feature_visual_words.astype('uint32'),
output_aggregation_filename)
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
config)
start = time.time()
for i in range(num_images):
if i == 0:
print('Starting to extract aggregation from images...')
elif i % _STATUS_CHECK_ITERATIONS == 0:
elapsed = (time.time() - start)
print('Processing image %d out of %d, last %d '
'images took %f seconds' %
(i, num_images, _STATUS_CHECK_ITERATIONS, elapsed))
start = time.time()
image_name = image_names[i]
# Compose output file name, skip extraction for this image if it already
# exists.
output_aggregation_filename = os.path.join(output_aggregation_dir,
image_name + output_extension)
if tf.io.gfile.exists(output_aggregation_filename):
print('Skipping %s' % image_name)
continue
# Load DELF features.
if config.use_regional_aggregation:
if not mapping_path:
raise ValueError(
'Requested regional aggregation, but mapping_path was not '
'provided')
descriptors_list = []
num_features_per_box = []
for box_feature_file in images_to_box_feature_files[image_name]:
delf_filename = os.path.join(features_dir,
box_feature_file + _DELF_EXTENSION)
_, _, box_descriptors, _, _ = feature_io.ReadFromFile(delf_filename)
# If `box_descriptors` is empty, reshape it such that it can be
# concatenated with other descriptors.
if not box_descriptors.shape[0]:
box_descriptors = np.reshape(box_descriptors,
[0, config.feature_dimensionality])
descriptors_list.append(box_descriptors)
num_features_per_box.append(box_descriptors.shape[0])
descriptors = np.concatenate(descriptors_list)
else:
input_delf_filename = os.path.join(features_dir,
image_name + _DELF_EXTENSION)
_, _, descriptors, _, _ = feature_io.ReadFromFile(input_delf_filename)
# If `descriptors` is empty, reshape it to avoid extraction failure.
if not descriptors.shape[0]:
descriptors = np.reshape(descriptors,
[0, config.feature_dimensionality])
num_features_per_box = None
# Extract and save aggregation. If using VLAD, only
# `aggregated_descriptors` needs to be saved.
(aggregated_descriptors,
feature_visual_words) = extractor.Extract(descriptors,
num_features_per_box)
if config.aggregation_type == _VLAD:
datum_io.WriteToFile(aggregated_descriptors,
output_aggregation_filename)
else:
datum_io.WritePairToFile(aggregated_descriptors,
feature_visual_words.astype('uint32'),
output_aggregation_filename)
......@@ -40,7 +40,6 @@ class ExtractAggregatedRepresentation(object):
"""Class for extraction of aggregated local feature representation.
Args:
sess: TensorFlow session to use.
aggregation_config: AggregationConfig object defining type of aggregation to
use.
......@@ -48,65 +47,28 @@ class ExtractAggregatedRepresentation(object):
ValueError: If aggregation type is invalid.
"""
def __init__(self, sess, aggregation_config):
self._sess = sess
def __init__(self, aggregation_config):
self._codebook_size = aggregation_config.codebook_size
self._feature_dimensionality = aggregation_config.feature_dimensionality
self._aggregation_type = aggregation_config.aggregation_type
self._feature_batch_size = aggregation_config.feature_batch_size
self._codebook_path = aggregation_config.codebook_path
self._use_regional_aggregation = aggregation_config.use_regional_aggregation
self._use_l2_normalization = aggregation_config.use_l2_normalization
self._num_assignments = aggregation_config.num_assignments
# Inputs to extraction function.
self._features = tf.compat.v1.placeholder(tf.float32, [None, None])
self._num_features_per_region = tf.compat.v1.placeholder(tf.int32, [None])
# Load codebook into graph.
codebook = tf.compat.v1.get_variable(
"codebook",
shape=[
aggregation_config.codebook_size,
aggregation_config.feature_dimensionality
])
tf.compat.v1.train.init_from_checkpoint(
aggregation_config.codebook_path, {_CLUSTER_CENTERS_VAR_NAME: codebook})
# Construct extraction graph based on desired options.
if self._aggregation_type == _VLAD:
# Feature visual words are unused in the case of VLAD, so just return
# dummy constant.
self._feature_visual_words = tf.constant(-1, dtype=tf.int32)
if aggregation_config.use_regional_aggregation:
self._aggregated_descriptors = self._ComputeRvlad(
self._features,
self._num_features_per_region,
codebook,
use_l2_normalization=aggregation_config.use_l2_normalization,
num_assignments=aggregation_config.num_assignments)
else:
self._aggregated_descriptors = self._ComputeVlad(
self._features,
codebook,
use_l2_normalization=aggregation_config.use_l2_normalization,
num_assignments=aggregation_config.num_assignments)
elif (self._aggregation_type == _ASMK or
self._aggregation_type == _ASMK_STAR):
if aggregation_config.use_regional_aggregation:
(self._aggregated_descriptors,
self._feature_visual_words) = self._ComputeRasmk(
self._features,
self._num_features_per_region,
codebook,
num_assignments=aggregation_config.num_assignments)
else:
(self._aggregated_descriptors,
self._feature_visual_words) = self._ComputeAsmk(
self._features,
codebook,
num_assignments=aggregation_config.num_assignments)
else:
if self._aggregation_type not in [_VLAD, _ASMK, _ASMK_STAR]:
raise ValueError("Invalid aggregation type: %d" % self._aggregation_type)
# Initialize variables in the TF graph.
sess.run(tf.compat.v1.global_variables_initializer())
# Load codebook
codebook = tf.Variable(
tf.zeros([self._codebook_size, self._feature_dimensionality],
dtype=tf.float32),
name=_CLUSTER_CENTERS_VAR_NAME)
ckpt = tf.train.Checkpoint(codebook=codebook)
ckpt.restore(self._codebook_path)
self._codebook = codebook
def Extract(self, features, num_features_per_region=None):
"""Extracts aggregated representation.
......@@ -127,10 +89,13 @@ class ExtractAggregatedRepresentation(object):
Raises:
ValueError: If inputs are misconfigured.
"""
features = tf.cast(features, dtype=tf.float32)
if num_features_per_region is None:
# Use dummy value since it is unused.
num_features_per_region = []
else:
num_features_per_region = tf.cast(num_features_per_region, dtype=tf.int32)
if len(num_features_per_region
) and sum(num_features_per_region) != features.shape[0]:
raise ValueError(
......@@ -138,12 +103,41 @@ class ExtractAggregatedRepresentation(object):
"features.shape[0] are different: %d vs %d" %
(sum(num_features_per_region), features.shape[0]))
aggregated_descriptors, feature_visual_words = self._sess.run(
[self._aggregated_descriptors, self._feature_visual_words],
feed_dict={
self._features: features,
self._num_features_per_region: num_features_per_region
})
# Extract features based on desired options.
if self._aggregation_type == _VLAD:
# Feature visual words are unused in the case of VLAD, so just return
# dummy constant.
feature_visual_words = tf.constant(-1, dtype=tf.int32)
if self._use_regional_aggregation:
aggregated_descriptors = self._ComputeRvlad(
features,
num_features_per_region,
self._codebook,
use_l2_normalization=self._use_l2_normalization,
num_assignments=self._num_assignments)
else:
aggregated_descriptors = self._ComputeVlad(
features,
self._codebook,
use_l2_normalization=self._use_l2_normalization,
num_assignments=self._num_assignments)
elif (self._aggregation_type == _ASMK or
self._aggregation_type == _ASMK_STAR):
if self._use_regional_aggregation:
(aggregated_descriptors,
feature_visual_words) = self._ComputeRasmk(
features,
num_features_per_region,
self._codebook,
num_assignments=self._num_assignments)
else:
(aggregated_descriptors,
feature_visual_words) = self._ComputeAsmk(
features,
self._codebook,
num_assignments=self._num_assignments)
feature_visual_words_output = feature_visual_words.numpy()
# If using ASMK*/RASMK*, binarize the aggregated descriptors.
if self._aggregation_type == _ASMK_STAR:
......@@ -151,9 +145,11 @@ class ExtractAggregatedRepresentation(object):
aggregated_descriptors, [-1, self._feature_dimensionality])
packed_descriptors = np.packbits(
reshaped_aggregated_descriptors > 0, axis=1)
aggregated_descriptors = np.reshape(packed_descriptors, [-1])
aggregated_descriptors_output = np.reshape(packed_descriptors, [-1])
else:
aggregated_descriptors_output = aggregated_descriptors.numpy()
return aggregated_descriptors, feature_visual_words
return aggregated_descriptors_output, feature_visual_words_output
def _ComputeVlad(self,
features,
......@@ -268,11 +264,13 @@ class ExtractAggregatedRepresentation(object):
output_vlad: VLAD descriptor updated to take into account contribution
from ind-th feature.
"""
diff = tf.tile(
tf.expand_dims(features[ind],
axis=0), [num_assignments, 1]) - tf.gather(
codebook, selected_visual_words[ind])
return ind + 1, tf.tensor_scatter_nd_add(
vlad, tf.expand_dims(selected_visual_words[ind], axis=1),
tf.tile(
tf.expand_dims(features[ind], axis=0), [num_assignments, 1]) -
tf.gather(codebook, selected_visual_words[ind]))
tf.cast(diff, dtype=tf.float32))
ind_vlad = tf.constant(0, dtype=tf.int32)
keep_going = lambda j, vlad: tf.less(j, num_features)
......@@ -398,7 +396,9 @@ class ExtractAggregatedRepresentation(object):
visual_words = tf.reshape(
tf.where(
tf.greater(per_centroid_norms, tf.sqrt(_NORM_SQUARED_TOLERANCE))),
tf.greater(
per_centroid_norms,
tf.cast(tf.sqrt(_NORM_SQUARED_TOLERANCE), dtype=tf.float32))),
[-1])
per_centroid_normalized_vector = tf.math.l2_normalize(
......
......@@ -20,12 +20,15 @@ from __future__ import print_function
import os
from absl import flags
import numpy as np
import tensorflow as tf
from delf import aggregation_config_pb2
from delf import feature_aggregation_extractor
FLAGS = flags.FLAGS
class FeatureAggregationTest(tf.test.TestCase):
......@@ -35,17 +38,15 @@ class FeatureAggregationTest(tf.test.TestCase):
Args:
checkpoint_path: Directory where codebook is saved to.
"""
with tf.Graph().as_default() as g, self.session(graph=g) as sess:
codebook = tf.Variable(
[[0.5, 0.5], [0.0, 0.0], [1.0, 0.0], [-0.5, -0.5], [0.0, 1.0]],
name='clusters')
saver = tf.compat.v1.train.Saver([codebook])
sess.run(tf.compat.v1.global_variables_initializer())
saver.save(sess, checkpoint_path)
codebook = tf.Variable(
[[0.5, 0.5], [0.0, 0.0], [1.0, 0.0], [-0.5, -0.5], [0.0, 1.0]],
name='clusters',
dtype=tf.float32)
ckpt = tf.train.Checkpoint(codebook=codebook)
ckpt.write(checkpoint_path)
def setUp(self):
self._codebook_path = os.path.join(tf.compat.v1.test.get_temp_dir(),
'test_codebook')
self._codebook_path = os.path.join(FLAGS.test_tmpdir, 'test_codebook')
self._CreateCodebook(self._codebook_path)
def testComputeNormalizedVladWorks(self):
......@@ -61,10 +62,9 @@ class FeatureAggregationTest(tf.test.TestCase):
config.num_assignments = 1
# Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess:
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
sess, config)
vlad, extra_output = extractor.Extract(features)
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
config)
vlad, extra_output = extractor.Extract(features)
# Define expected results.
exp_vlad = [
......@@ -90,10 +90,9 @@ class FeatureAggregationTest(tf.test.TestCase):
config.feature_batch_size = 2
# Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess:
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
sess, config)
vlad, extra_output = extractor.Extract(features)
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
config)
vlad, extra_output = extractor.Extract(features)
# Define expected results.
exp_vlad = [
......@@ -118,10 +117,9 @@ class FeatureAggregationTest(tf.test.TestCase):
config.num_assignments = 1
# Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess:
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
sess, config)
vlad, extra_output = extractor.Extract(features)
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
config)
vlad, extra_output = extractor.Extract(features)
# Define expected results.
exp_vlad = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.5, 0.5, 1.0, 1.0]
......@@ -144,10 +142,9 @@ class FeatureAggregationTest(tf.test.TestCase):
config.num_assignments = 3
# Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess:
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
sess, config)
vlad, extra_output = extractor.Extract(features)
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
config)
vlad, extra_output = extractor.Extract(features)
# Define expected results.
exp_vlad = [1.0, 1.0, 0.0, 0.0, 0.0, 2.0, -0.5, 0.5, 0.0, 0.0]
......@@ -168,10 +165,9 @@ class FeatureAggregationTest(tf.test.TestCase):
config.codebook_path = self._codebook_path
# Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess:
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
sess, config)
vlad, extra_output = extractor.Extract(features)
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
config)
vlad, extra_output = extractor.Extract(features)
# Define expected results.
exp_vlad = np.zeros([10], dtype=float)
......@@ -197,10 +193,9 @@ class FeatureAggregationTest(tf.test.TestCase):
config.use_regional_aggregation = True
# Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess:
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
sess, config)
rvlad, extra_output = extractor.Extract(features, num_features_per_region)
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
config)
rvlad, extra_output = extractor.Extract(features, num_features_per_region)
# Define expected results.
exp_rvlad = [
......@@ -228,10 +223,9 @@ class FeatureAggregationTest(tf.test.TestCase):
config.use_regional_aggregation = True
# Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess:
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
sess, config)
rvlad, extra_output = extractor.Extract(features, num_features_per_region)
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
config)
rvlad, extra_output = extractor.Extract(features, num_features_per_region)
# Define expected results.
exp_rvlad = [
......@@ -256,10 +250,9 @@ class FeatureAggregationTest(tf.test.TestCase):
config.use_regional_aggregation = True
# Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess:
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
sess, config)
rvlad, extra_output = extractor.Extract(features, num_features_per_region)
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
config)
rvlad, extra_output = extractor.Extract(features, num_features_per_region)
# Define expected results.
exp_rvlad = np.zeros([10], dtype=float)
......@@ -286,10 +279,9 @@ class FeatureAggregationTest(tf.test.TestCase):
config.use_regional_aggregation = True
# Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess:
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
sess, config)
rvlad, extra_output = extractor.Extract(features, num_features_per_region)
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
config)
rvlad, extra_output = extractor.Extract(features, num_features_per_region)
# Define expected results.
exp_rvlad = [
......@@ -318,10 +310,9 @@ class FeatureAggregationTest(tf.test.TestCase):
config.use_regional_aggregation = True
# Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess:
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
sess, config)
rvlad, extra_output = extractor.Extract(features, num_features_per_region)
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
config)
rvlad, extra_output = extractor.Extract(features, num_features_per_region)
# Define expected results.
exp_rvlad = [
......@@ -349,14 +340,13 @@ class FeatureAggregationTest(tf.test.TestCase):
config.use_regional_aggregation = True
# Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess:
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
sess, config)
with self.assertRaisesRegex(
ValueError,
r'Incorrect arguments: sum\(num_features_per_region\) and '
r'features.shape\[0\] are different'):
extractor.Extract(features, num_features_per_region)
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
config)
with self.assertRaisesRegex(
ValueError,
r'Incorrect arguments: sum\(num_features_per_region\) and '
r'features.shape\[0\] are different'):
extractor.Extract(features, num_features_per_region)
def testComputeAsmkWorks(self):
# Construct inputs.
......@@ -370,10 +360,9 @@ class FeatureAggregationTest(tf.test.TestCase):
config.num_assignments = 1
# Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess:
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
sess, config)
asmk, visual_words = extractor.Extract(features)
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
config)
asmk, visual_words = extractor.Extract(features)
# Define expected results.
exp_asmk = [-0.707107, 0.707107, 0.707107, 0.707107]
......@@ -395,10 +384,9 @@ class FeatureAggregationTest(tf.test.TestCase):
config.num_assignments = 1
# Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess:
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
sess, config)
asmk_star, visual_words = extractor.Extract(features)
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
config)
asmk_star, visual_words = extractor.Extract(features)
# Define expected results.
exp_asmk_star = [64, 192]
......@@ -420,10 +408,9 @@ class FeatureAggregationTest(tf.test.TestCase):
config.num_assignments = 3
# Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess:
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
sess, config)
asmk, visual_words = extractor.Extract(features)
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
config)
asmk, visual_words = extractor.Extract(features)
# Define expected results.
exp_asmk = [0.707107, 0.707107, 0.0, 1.0, -0.707107, 0.707107]
......@@ -448,10 +435,9 @@ class FeatureAggregationTest(tf.test.TestCase):
config.use_regional_aggregation = True
# Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess:
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
sess, config)
rasmk, visual_words = extractor.Extract(features, num_features_per_region)
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
config)
rasmk, visual_words = extractor.Extract(features, num_features_per_region)
# Define expected results.
exp_rasmk = [-0.707107, 0.707107, 0.361261, 0.932465]
......@@ -476,11 +462,10 @@ class FeatureAggregationTest(tf.test.TestCase):
config.use_regional_aggregation = True
# Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess:
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
sess, config)
rasmk_star, visual_words = extractor.Extract(features,
num_features_per_region)
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
config)
rasmk_star, visual_words = extractor.Extract(features,
num_features_per_region)
# Define expected results.
exp_rasmk_star = [64, 192]
......@@ -500,10 +485,9 @@ class FeatureAggregationTest(tf.test.TestCase):
config.use_regional_aggregation = True
# Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess:
with self.assertRaisesRegex(ValueError, 'Invalid aggregation type'):
feature_aggregation_extractor.ExtractAggregatedRepresentation(
sess, config)
with self.assertRaisesRegex(ValueError, 'Invalid aggregation type'):
feature_aggregation_extractor.ExtractAggregatedRepresentation(
config)
if __name__ == '__main__':
......
......@@ -19,10 +19,6 @@ from __future__ import print_function
import tensorflow as tf
from delf import delf_v1
from object_detection.core import box_list
from object_detection.core import box_list_ops
def NormalizePixelValues(image,
pixel_value_offset=128.0,
......@@ -81,219 +77,6 @@ def CalculateKeypointCenters(boxes):
2.0)
def ExtractKeypointDescriptor(image, layer_name, image_scales, iou,
max_feature_num, abs_thres, model_fn):
"""Extract keypoint descriptor for input image.
Args:
image: A image tensor with shape [h, w, channels].
layer_name: The endpoint of feature extraction layer.
image_scales: A 1D float tensor which contains the scales.
iou: A float scalar denoting the IOU threshold for NMS.
max_feature_num: An int tensor denoting the maximum selected feature points.
abs_thres: A float tensor denoting the score threshold for feature
selection.
model_fn: Model function. Follows the signature:
* Args:
* `images`: Image tensor which is re-scaled.
* `normalized_image`: Whether or not the images are normalized.
* `reuse`: Whether or not the layer and its variables should be reused.
* Returns:
* `attention`: Attention score after the non-linearity.
* `feature_map`: Feature map obtained from the ResNet model.
Returns:
boxes: [N, 4] float tensor which denotes the selected receptive box. N is
the number of final feature points which pass through keypoint selection
and NMS steps.
feature_scales: [N] float tensor. It is the inverse of the input image
scales such that larger image scales correspond to larger image regions,
which is compatible with scale-space keypoint detection convention.
features: [N, depth] float tensor with feature descriptors.
scores: [N, 1] float tensor denoting the attention score.
Raises:
ValueError: If the layer_name is unsupported.
"""
original_image_shape_float = tf.gather(
tf.cast(tf.shape(image), dtype=tf.float32), [0, 1])
image_tensor = NormalizePixelValues(image)
image_tensor = tf.expand_dims(image_tensor, 0, name='image/expand_dims')
# Feature depth and receptive field parameters for each network version.
if layer_name == 'resnet_v1_50/block3':
feature_depth = 1024
rf, stride, padding = [291.0, 32.0, 145.0]
elif layer_name == 'resnet_v1_50/block4':
feature_depth = 2048
rf, stride, padding = [483.0, 32.0, 241.0]
else:
raise ValueError('Unsupported layer_name.')
def _ProcessSingleScale(scale_index,
boxes,
features,
scales,
scores,
reuse=True):
"""Resize the image and run feature extraction and keypoint selection.
This function will be passed into tf.while_loop() and be called
repeatedly. The input boxes are collected from the previous iteration
[0: scale_index -1]. We get the current scale by
image_scales[scale_index], and run image resizing, feature extraction and
keypoint selection. Then we will get a new set of selected_boxes for
current scale. In the end, we concat the previous boxes with current
selected_boxes as the output.
Args:
scale_index: A valid index in the image_scales.
boxes: Box tensor with the shape of [N, 4].
features: Feature tensor with the shape of [N, depth].
scales: Scale tensor with the shape of [N].
scores: Attention score tensor with the shape of [N].
reuse: Whether or not the layer and its variables should be reused.
Returns:
scale_index: The next scale index for processing.
boxes: Concatenated box tensor with the shape of [K, 4]. K >= N.
features: Concatenated feature tensor with the shape of [K, depth].
scales: Concatenated scale tensor with the shape of [K].
scores: Concatenated attention score tensor with the shape of [K].
"""
scale = tf.gather(image_scales, scale_index)
new_image_size = tf.cast(
tf.round(original_image_shape_float * scale), dtype=tf.int32)
resized_image = tf.compat.v1.image.resize_bilinear(image_tensor,
new_image_size)
attention, feature_map = model_fn(
resized_image, normalized_image=True, reuse=reuse)
rf_boxes = CalculateReceptiveBoxes(
tf.shape(feature_map)[1],
tf.shape(feature_map)[2], rf, stride, padding)
# Re-project back to the original image space.
rf_boxes = tf.divide(rf_boxes, scale)
attention = tf.reshape(attention, [-1])
feature_map = tf.reshape(feature_map, [-1, feature_depth])
# Use attention score to select feature vectors.
indices = tf.reshape(tf.where(attention >= abs_thres), [-1])
selected_boxes = tf.gather(rf_boxes, indices)
selected_features = tf.gather(feature_map, indices)
selected_scores = tf.gather(attention, indices)
selected_scales = tf.ones_like(selected_scores, tf.float32) / scale
# Concat with the previous result from different scales.
boxes = tf.concat([boxes, selected_boxes], 0)
features = tf.concat([features, selected_features], 0)
scales = tf.concat([scales, selected_scales], 0)
scores = tf.concat([scores, selected_scores], 0)
return scale_index + 1, boxes, features, scales, scores
output_boxes = tf.zeros([0, 4], dtype=tf.float32)
output_features = tf.zeros([0, feature_depth], dtype=tf.float32)
output_scales = tf.zeros([0], dtype=tf.float32)
output_scores = tf.zeros([0], dtype=tf.float32)
# Process the first scale separately, the following scales will reuse the
# graph variables.
(_, output_boxes, output_features, output_scales,
output_scores) = _ProcessSingleScale(
0,
output_boxes,
output_features,
output_scales,
output_scores,
reuse=False)
i = tf.constant(1, dtype=tf.int32)
num_scales = tf.shape(image_scales)[0]
keep_going = lambda j, boxes, features, scales, scores: tf.less(j, num_scales)
(_, output_boxes, output_features, output_scales,
output_scores) = tf.while_loop(
cond=keep_going,
body=_ProcessSingleScale,
loop_vars=[
i, output_boxes, output_features, output_scales, output_scores
],
shape_invariants=[
i.get_shape(),
tf.TensorShape([None, 4]),
tf.TensorShape([None, feature_depth]),
tf.TensorShape([None]),
tf.TensorShape([None])
],
back_prop=False)
feature_boxes = box_list.BoxList(output_boxes)
feature_boxes.add_field('features', output_features)
feature_boxes.add_field('scales', output_scales)
feature_boxes.add_field('scores', output_scores)
nms_max_boxes = tf.minimum(max_feature_num, feature_boxes.num_boxes())
final_boxes = box_list_ops.non_max_suppression(feature_boxes, iou,
nms_max_boxes)
return (final_boxes.get(), final_boxes.get_field('scales'),
final_boxes.get_field('features'),
tf.expand_dims(final_boxes.get_field('scores'), 1))
def BuildModel(layer_name, attention_nonlinear, attention_type,
attention_kernel_size):
"""Build the DELF model.
This function is helpful for constructing the model function which will be fed
to ExtractKeypointDescriptor().
Args:
layer_name: the endpoint of feature extraction layer.
attention_nonlinear: Type of the non-linearity for the attention function.
Currently, only 'softplus' is supported.
attention_type: Type of the attention used. Options are:
'use_l2_normalized_feature' and 'use_default_input_feature'. Note that
this is irrelevant during inference time.
attention_kernel_size: Size of attention kernel (kernel is square).
Returns:
Attention model function.
"""
def _ModelFn(images, normalized_image, reuse):
"""Attention model to get feature map and attention score map.
Args:
images: Image tensor.
normalized_image: Whether or not the images are normalized.
reuse: Whether or not the layer and its variables should be reused.
Returns:
attention: Attention score after the non-linearity.
feature_map: Feature map after ResNet convolution.
"""
if normalized_image:
image_tensor = images
else:
image_tensor = NormalizePixelValues(images)
# Extract features and attention scores.
model = delf_v1.DelfV1(layer_name)
_, attention, _, feature_map, _ = model.GetAttentionPrelogit(
image_tensor,
attention_nonlinear=attention_nonlinear,
attention_type=attention_type,
kernel=[attention_kernel_size, attention_kernel_size],
training_resnet=False,
training_attention=False,
reuse=reuse)
return attention, feature_map
return _ModelFn
def ApplyPcaAndWhitening(data,
pca_matrix,
pca_mean,
......@@ -345,22 +128,21 @@ def PostProcessDescriptors(descriptors, use_pca, pca_parameters=None):
normalization and (possibly) PCA/whitening.
"""
# L2-normalize, and if desired apply PCA (followed by L2-normalization).
with tf.compat.v1.variable_scope('postprocess'):
final_descriptors = tf.nn.l2_normalize(
descriptors, axis=1, name='l2_normalization')
if use_pca:
# Apply PCA, and whitening if desired.
final_descriptors = ApplyPcaAndWhitening(final_descriptors,
pca_parameters['matrix'],
pca_parameters['mean'],
pca_parameters['dim'],
pca_parameters['use_whitening'],
pca_parameters['variances'])
# Re-normalize.
final_descriptors = tf.nn.l2_normalize(
descriptors, axis=1, name='l2_normalization')
if use_pca:
# Apply PCA, and whitening if desired.
final_descriptors = ApplyPcaAndWhitening(final_descriptors,
pca_parameters['matrix'],
pca_parameters['mean'],
pca_parameters['dim'],
pca_parameters['use_whitening'],
pca_parameters['variances'])
# Re-normalize.
final_descriptors = tf.nn.l2_normalize(
final_descriptors, axis=1, name='pca_l2_normalization')
final_descriptors, axis=1, name='pca_l2_normalization')
return final_descriptors
......
......@@ -18,7 +18,6 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf
from delf import feature_extractor
......@@ -34,78 +33,24 @@ class FeatureExtractorTest(tf.test.TestCase):
image, pixel_value_offset=5.0, pixel_value_scale=2.0)
exp_normalized_image = [[[-1.0, 125.0, -2.5], [14.5, 3.5, 0.0]],
[[20.0, 0.0, 30.0], [25.5, 36.0, 42.0]]]
with self.session() as sess:
normalized_image_out = sess.run(normalized_image)
self.assertAllEqual(normalized_image_out, exp_normalized_image)
self.assertAllEqual(normalized_image, exp_normalized_image)
def testCalculateReceptiveBoxes(self):
boxes = feature_extractor.CalculateReceptiveBoxes(
height=1, width=2, rf=291, stride=32, padding=145)
exp_boxes = [[-145., -145., 145., 145.], [-145., -113., 145., 177.]]
with self.session() as sess:
boxes_out = sess.run(boxes)
self.assertAllEqual(exp_boxes, boxes_out)
self.assertAllEqual(exp_boxes, boxes)
def testCalculateKeypointCenters(self):
boxes = [[-10.0, 0.0, 11.0, 21.0], [-2.5, 5.0, 18.5, 26.0],
[45.0, -2.5, 66.0, 18.5]]
centers = feature_extractor.CalculateKeypointCenters(boxes)
with self.session() as sess:
centers_out = sess.run(centers)
exp_centers = [[0.5, 10.5], [8.0, 15.5], [55.5, 8.0]]
self.assertAllEqual(exp_centers, centers_out)
def testExtractKeypointDescriptor(self):
image = tf.constant(
[[[0, 255, 255], [128, 64, 196]], [[0, 0, 32], [32, 128, 16]]],
dtype=tf.uint8)
# Arbitrary model function used to test ExtractKeypointDescriptor. The
# generated feature_map is a replicated version of the image, concatenated
# with zeros to achieve the required dimensionality. The attention is simply
# the norm of the input image pixels.
def _test_model_fn(image, normalized_image, reuse):
del normalized_image, reuse # Unused variables in the test.
image_shape = tf.shape(image)
attention = tf.squeeze(tf.norm(image, axis=3))
feature_map = tf.concat([
tf.tile(image, [1, 1, 1, 341]),
tf.zeros([1, image_shape[1], image_shape[2], 1])
],
axis=3)
return attention, feature_map
boxes, feature_scales, features, scores = (
feature_extractor.ExtractKeypointDescriptor(
image,
layer_name='resnet_v1_50/block3',
image_scales=tf.constant([1.0]),
iou=1.0,
max_feature_num=10,
abs_thres=1.5,
model_fn=_test_model_fn))
exp_boxes = [[-145.0, -145.0, 145.0, 145.0], [-113.0, -145.0, 177.0, 145.0]]
exp_feature_scales = [1.0, 1.0]
exp_features = np.array(
np.concatenate(
(np.tile([[-1.0, 127.0 / 128.0, 127.0 / 128.0], [-1.0, -1.0, -0.75]
], [1, 341]), np.zeros([2, 1])),
axis=1))
exp_scores = [[1.723042], [1.600781]]
with self.session() as sess:
boxes_out, feature_scales_out, features_out, scores_out = sess.run(
[boxes, feature_scales, features, scores])
self.assertAllEqual(exp_boxes, boxes_out)
self.assertAllEqual(exp_feature_scales, feature_scales_out)
self.assertAllClose(exp_features, features_out)
self.assertAllClose(exp_scores, scores_out)
self.assertAllEqual(exp_centers, centers)
def testPcaWhitening(self):
data = tf.constant([[1.0, 2.0, -2.0], [-5.0, 0.0, 3.0], [-1.0, 2.0, 0.0],
......@@ -123,12 +68,8 @@ class FeatureExtractorTest(tf.test.TestCase):
exp_output = [[2.5, -5.0], [-6.0, -2.0], [-0.5, -3.0], [1.0, -2.0]]
with self.session() as sess:
output_out = sess.run(output)
self.assertAllEqual(exp_output, output_out)
self.assertAllEqual(exp_output, output)
if __name__ == '__main__':
tf.compat.v1.disable_eager_execution()
tf.test.main()
......@@ -20,11 +20,14 @@ from __future__ import print_function
import os
from absl import flags
import numpy as np
import tensorflow as tf
from delf import feature_io
FLAGS = flags.FLAGS
def create_data():
"""Creates data to be used in tests.
......@@ -81,8 +84,7 @@ class DelfFeaturesIoTest(tf.test.TestCase):
def testWriteAndReadToFile(self):
locations, scales, descriptors, attention, orientations = create_data()
tmpdir = tf.compat.v1.test.get_temp_dir()
filename = os.path.join(tmpdir, 'test.delf')
filename = os.path.join(FLAGS.test_tmpdir, 'test.delf')
feature_io.WriteToFile(filename, locations, scales, descriptors, attention,
orientations)
data_read = feature_io.ReadFromFile(filename)
......@@ -94,8 +96,7 @@ class DelfFeaturesIoTest(tf.test.TestCase):
self.assertAllEqual(orientations, data_read[4])
def testWriteAndReadToFileEmptyFile(self):
tmpdir = tf.compat.v1.test.get_temp_dir()
filename = os.path.join(tmpdir, 'test.delf')
filename = os.path.join(FLAGS.test_tmpdir, 'test.delf')
feature_io.WriteToFile(filename, np.array([]), np.array([]), np.array([]),
np.array([]), np.array([]))
data_read = feature_io.ReadFromFile(filename)
......
......@@ -27,6 +27,15 @@ import functools
import tensorflow as tf
class _GoogleLandmarksInfo(object):
"""Metadata about the Google Landmarks dataset."""
num_classes = {
'gld_v1': 14951,
'gld_v2': 203094,
'gld_v2_clean': 81313
}
class _DataAugmentationParams(object):
"""Default parameters for augmentation."""
# The following are used for training.
......@@ -167,3 +176,12 @@ def CreateDataset(file_pattern,
dataset = dataset.batch(batch_size)
return dataset
def GoogleLandmarksInfo():
"""Returns metadata information on the Google Landmarks dataset.
Returns:
object _GoogleLandmarksInfo containing metadata about the GLD dataset.
"""
return _GoogleLandmarksInfo()
......@@ -43,6 +43,10 @@ flags.DEFINE_string('train_file_pattern', '/tmp/data/train*',
'File pattern of training dataset files.')
flags.DEFINE_string('validation_file_pattern', '/tmp/data/validation*',
'File pattern of validation dataset files.')
flags.DEFINE_enum('dataset_version', 'gld_v1',
['gld_v1', 'gld_v2', 'gld_v2_clean'],
'Google Landmarks dataset version, used to determine the'
'number of classes.')
flags.DEFINE_integer('seed', 0, 'Seed to training dataset.')
flags.DEFINE_float('initial_lr', 0.001, 'Initial learning rate.')
flags.DEFINE_integer('batch_size', 32, 'Global batch size.')
......@@ -136,9 +140,9 @@ def main(argv):
save_interval = 1
report_interval = 1
# TODO(andrearaujo): Using placeholder, replace with actual value using
# GoogleLandmarksInfo() from datasets/googlelandmarks.py.
num_classes = 14951
# Determine the number of classes based on the version of the dataset.
gld_info = gld.GoogleLandmarksInfo()
num_classes = gld_info.num_classes[FLAGS.dataset_version]
# ------------------------------------------------------------
# Create the distributed train/validation sets.
......
......@@ -4,20 +4,23 @@
# Neural Programmer
Implementation of the Neural Programmer model described in [paper](https://openreview.net/pdf?id=ry2YOrcge)
Implementation of the Neural Programmer model as described in this [paper](https://openreview.net/pdf?id=ry2YOrcge).
Download and extract the data from [dropbox](https://www.dropbox.com/s/9tvtcv6lmy51zfw/data.zip?dl=0). Change the ``data_dir FLAG`` to the location of the data.
Download and extract the data from the [WikiTableQuestions](https://ppasupat.github.io/WikiTableQuestions/) site. The dataset contains
11321, 2831, and 4344 examples for training, development, and testing respectively. We use their tokenization, number and date pre-processing. Please note that the above paper used the [initial release](https://github.com/ppasupat/WikiTableQuestions/releases/tag/v0.2) for training, development and testing.
Change the `data_dir FLAG` to the location of the data.
### Training
``python neural_programmer.py``
Run `python neural_programmer.py`
The models are written to FLAGS.output_dir
The models are written to `FLAGS.output_dir`.
### Testing
``python neural_programmer.py --evaluator_job=True``
Run `python neural_programmer.py --evaluator_job=True`
The models are loaded from ``FLAGS.output_dir``. The evaluation is done on development data.
The models are loaded from `FLAGS.output_dir`. The evaluation is done on development data.
In case of errors because of encoding, add ``"# -*- coding: utf-8 -*-"`` as the first line in ``wiki_data.py``
In case of errors because of encoding, add `"# -*- coding: utf-8 -*-"` as the first line in `wiki_data.py`
Maintained by Arvind Neelakantan (arvind2505)
......@@ -16,6 +16,7 @@
"""Tests for box_predictor_builder."""
import unittest
import mock
import tensorflow.compat.v1 as tf
......@@ -25,8 +26,10 @@ from object_detection.builders import hyperparams_builder
from object_detection.predictors import mask_rcnn_box_predictor
from object_detection.protos import box_predictor_pb2
from object_detection.protos import hyperparams_pb2
from object_detection.utils import tf_version
@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only Tests.')
class ConvolutionalBoxPredictorBuilderTest(tf.test.TestCase):
def test_box_predictor_calls_conv_argscope_fn(self):
......@@ -161,6 +164,7 @@ class ConvolutionalBoxPredictorBuilderTest(tf.test.TestCase):
self.assertFalse(class_head._use_depthwise)
@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only Tests.')
class WeightSharedConvolutionalBoxPredictorBuilderTest(tf.test.TestCase):
def test_box_predictor_calls_conv_argscope_fn(self):
......@@ -357,6 +361,7 @@ class WeightSharedConvolutionalBoxPredictorBuilderTest(tf.test.TestCase):
@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only Tests.')
class MaskRCNNBoxPredictorBuilderTest(tf.test.TestCase):
def test_box_predictor_builder_calls_fc_argscope_fn(self):
......@@ -537,6 +542,7 @@ class MaskRCNNBoxPredictorBuilderTest(tf.test.TestCase):
._convolve_then_upsample)
@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only Tests.')
class RfcnBoxPredictorBuilderTest(tf.test.TestCase):
def test_box_predictor_calls_fc_argscope_fn(self):
......
......@@ -25,31 +25,34 @@ from six.moves import zip
import tensorflow.compat.v1 as tf
from object_detection.builders import calibration_builder
from object_detection.protos import calibration_pb2
from object_detection.utils import test_case
class CalibrationBuilderTest(tf.test.TestCase):
class CalibrationBuilderTest(test_case.TestCase):
def test_tf_linear_interp1d_map(self):
"""Tests TF linear interpolation mapping to a single number."""
with self.test_session() as sess:
def graph_fn():
tf_x = tf.constant([0., 0.5, 1.])
tf_y = tf.constant([0.5, 0.5, 0.5])
new_x = tf.constant([0., 0.25, 0.5, 0.75, 1.])
tf_map_outputs = calibration_builder._tf_linear_interp1d(
new_x, tf_x, tf_y)
tf_map_outputs_np = sess.run([tf_map_outputs])
self.assertAllClose(tf_map_outputs_np, [[0.5, 0.5, 0.5, 0.5, 0.5]])
return tf_map_outputs
tf_map_outputs_np = self.execute(graph_fn, [])
self.assertAllClose(tf_map_outputs_np, [0.5, 0.5, 0.5, 0.5, 0.5])
def test_tf_linear_interp1d_interpolate(self):
"""Tests TF 1d linear interpolation not mapping to a single number."""
with self.test_session() as sess:
def graph_fn():
tf_x = tf.constant([0., 0.5, 1.])
tf_y = tf.constant([0.6, 0.7, 1.0])
new_x = tf.constant([0., 0.25, 0.5, 0.75, 1.])
tf_interpolate_outputs = calibration_builder._tf_linear_interp1d(
new_x, tf_x, tf_y)
tf_interpolate_outputs_np = sess.run([tf_interpolate_outputs])
self.assertAllClose(tf_interpolate_outputs_np, [[0.6, 0.65, 0.7, 0.85, 1.]])
return tf_interpolate_outputs
tf_interpolate_outputs_np = self.execute(graph_fn, [])
self.assertAllClose(tf_interpolate_outputs_np, [0.6, 0.65, 0.7, 0.85, 1.])
@staticmethod
def _get_scipy_interp1d(new_x, x, y):
......@@ -59,12 +62,13 @@ class CalibrationBuilderTest(tf.test.TestCase):
def _get_tf_interp1d(self, new_x, x, y):
"""Helper performing 1d linear interpolation using Tensorflow."""
with self.test_session() as sess:
def graph_fn():
tf_interp_outputs = calibration_builder._tf_linear_interp1d(
tf.convert_to_tensor(new_x, dtype=tf.float32),
tf.convert_to_tensor(x, dtype=tf.float32),
tf.convert_to_tensor(y, dtype=tf.float32))
np_tf_interp_outputs = sess.run(tf_interp_outputs)
return tf_interp_outputs
np_tf_interp_outputs = self.execute(graph_fn, [])
return np_tf_interp_outputs
def test_tf_linear_interp1d_against_scipy_map(self):
......@@ -128,8 +132,7 @@ class CalibrationBuilderTest(tf.test.TestCase):
self._add_function_approximation_to_calibration_proto(
calibration_config, class_agnostic_x, class_agnostic_y, class_id=None)
od_graph = tf.Graph()
with self.test_session(graph=od_graph) as sess:
def graph_fn():
calibration_fn = calibration_builder.build(calibration_config)
# batch_size = 2, num_classes = 2, num_anchors = 2.
class_predictions_with_background = tf.constant(
......@@ -140,7 +143,8 @@ class CalibrationBuilderTest(tf.test.TestCase):
# Everything should map to 0.5 if classes are ignored.
calibrated_scores = calibration_fn(class_predictions_with_background)
calibrated_scores_np = sess.run(calibrated_scores)
return calibrated_scores
calibrated_scores_np = self.execute(graph_fn, [])
self.assertAllClose(calibrated_scores_np, [[[0.05, 0.1, 0.15],
[0.2, 0.25, 0.0]],
[[0.35, 0.45, 0.55],
......@@ -161,8 +165,7 @@ class CalibrationBuilderTest(tf.test.TestCase):
self._add_function_approximation_to_calibration_proto(
calibration_config, class_1_x, class_1_y, class_id=1)
od_graph = tf.Graph()
with self.test_session(graph=od_graph) as sess:
def graph_fn():
calibration_fn = calibration_builder.build(calibration_config)
# batch_size = 2, num_classes = 2, num_anchors = 2.
class_predictions_with_background = tf.constant(
......@@ -170,7 +173,8 @@ class CalibrationBuilderTest(tf.test.TestCase):
[[0.6, 0.4], [0.08, 0.92]]],
dtype=tf.float32)
calibrated_scores = calibration_fn(class_predictions_with_background)
calibrated_scores_np = sess.run(calibrated_scores)
return calibrated_scores
calibrated_scores_np = self.execute(graph_fn, [])
self.assertAllClose(calibrated_scores_np, [[[0.5, 0.6], [0.5, 0.3]],
[[0.5, 0.7], [0.5, 0.96]]])
......@@ -179,8 +183,7 @@ class CalibrationBuilderTest(tf.test.TestCase):
calibration_config = calibration_pb2.CalibrationConfig()
calibration_config.temperature_scaling_calibration.scaler = 2.0
od_graph = tf.Graph()
with self.test_session(graph=od_graph) as sess:
def graph_fn():
calibration_fn = calibration_builder.build(calibration_config)
# batch_size = 2, num_classes = 2, num_anchors = 2.
class_predictions_with_background = tf.constant(
......@@ -188,7 +191,8 @@ class CalibrationBuilderTest(tf.test.TestCase):
[[0.6, 0.7, 0.8], [0.9, 1.0, 1.0]]],
dtype=tf.float32)
calibrated_scores = calibration_fn(class_predictions_with_background)
calibrated_scores_np = sess.run(calibrated_scores)
return calibrated_scores
calibrated_scores_np = self.execute(graph_fn, [])
self.assertAllClose(calibrated_scores_np,
[[[0.05, 0.1, 0.15], [0.2, 0.25, 0.0]],
[[0.3, 0.35, 0.4], [0.45, 0.5, 0.5]]])
......@@ -212,8 +216,7 @@ class CalibrationBuilderTest(tf.test.TestCase):
calibration_config = calibration_pb2.CalibrationConfig()
self._add_function_approximation_to_calibration_proto(
calibration_config, class_0_x, class_0_y, class_id=0)
od_graph = tf.Graph()
with self.test_session(graph=od_graph) as sess:
def graph_fn():
calibration_fn = calibration_builder.build(calibration_config)
# batch_size = 2, num_classes = 2, num_anchors = 2.
class_predictions_with_background = tf.constant(
......@@ -221,7 +224,8 @@ class CalibrationBuilderTest(tf.test.TestCase):
[[0.6, 0.4], [0.08, 0.92]]],
dtype=tf.float32)
calibrated_scores = calibration_fn(class_predictions_with_background)
calibrated_scores_np = sess.run(calibrated_scores)
return calibrated_scores
calibrated_scores_np = self.execute(graph_fn, [])
self.assertAllClose(calibrated_scores_np, [[[0.5, 0.2], [0.5, 0.1]],
[[0.5, 0.4], [0.5, 0.92]]])
......
......@@ -29,7 +29,6 @@ from __future__ import print_function
import functools
import tensorflow.compat.v1 as tf
from tensorflow.contrib import data as tf_data
from object_detection.builders import decoder_builder
from object_detection.protos import input_reader_pb2
......@@ -94,7 +93,7 @@ def read_dataset(file_read_func, input_files, config,
filename_dataset = filename_dataset.repeat(config.num_epochs or None)
records_dataset = filename_dataset.apply(
tf_data.parallel_interleave(
tf.data.experimental.parallel_interleave(
file_read_func,
cycle_length=num_readers,
block_length=config.read_block_length,
......@@ -153,6 +152,30 @@ def build(input_reader_config, batch_size=None, transform_input_data_fn=None,
if not config.input_path:
raise ValueError('At least one input path must be specified in '
'`input_reader_config`.')
def dataset_map_fn(dataset, fn_to_map, batch_size=None,
input_reader_config=None):
"""Handles whether or not to use the legacy map function.
Args:
dataset: A tf.Dataset.
fn_to_map: The function to be mapped for that dataset.
batch_size: Batch size. If batch size is None, no batching is performed.
input_reader_config: A input_reader_pb2.InputReader object.
Returns:
A tf.data.Dataset mapped with fn_to_map.
"""
if hasattr(dataset, 'map_with_legacy_function'):
if batch_size:
num_parallel_calls = batch_size * (
input_reader_config.num_parallel_batches)
else:
num_parallel_calls = input_reader_config.num_parallel_map_calls
dataset = dataset.map_with_legacy_function(
fn_to_map, num_parallel_calls=num_parallel_calls)
else:
dataset = dataset.map(fn_to_map, tf.data.experimental.AUTOTUNE)
return dataset
shard_fn = shard_function_for_context(input_context)
if input_context is not None:
batch_size = input_context.get_per_replica_batch_size(batch_size)
......@@ -163,15 +186,16 @@ def build(input_reader_config, batch_size=None, transform_input_data_fn=None,
dataset = dataset.shard(input_reader_config.sample_1_of_n_examples, 0)
# TODO(rathodv): make batch size a required argument once the old binaries
# are deleted.
dataset = dataset.map(decoder.decode, tf.data.experimental.AUTOTUNE)
dataset = dataset_map_fn(dataset, decoder.decode, batch_size,
input_reader_config)
if reduce_to_frame_fn:
dataset = reduce_to_frame_fn(dataset)
dataset = reduce_to_frame_fn(dataset, dataset_map_fn, batch_size,
input_reader_config)
if transform_input_data_fn is not None:
dataset = dataset.map(transform_input_data_fn,
tf.data.experimental.AUTOTUNE)
dataset = dataset_map_fn(dataset, transform_input_data_fn,
batch_size, input_reader_config)
if batch_size:
dataset = dataset.apply(
tf_data.batch_and_drop_remainder(batch_size))
dataset = dataset.batch(batch_size, drop_remainder=True)
dataset = dataset.prefetch(input_reader_config.num_prefetch_batches)
return dataset
......
......@@ -197,13 +197,13 @@ class DatasetBuilderTest(test_case.TestCase):
output_dict[fields.InputDataFields.groundtruth_boxes][0][0])
def get_mock_reduce_to_frame_fn(self):
def mock_reduce_to_frame_fn(dataset):
def mock_reduce_to_frame_fn(dataset, dataset_map_fn, batch_size, config):
def get_frame(tensor_dict):
out_tensor_dict = {}
out_tensor_dict[fields.InputDataFields.source_id] = (
tensor_dict[fields.InputDataFields.source_id][0])
return out_tensor_dict
return dataset.map(get_frame, tf.data.experimental.AUTOTUNE)
return dataset_map_fn(dataset, get_frame, batch_size, config)
return mock_reduce_to_frame_fn
def test_build_tf_record_input_reader_sequence_example_train(self):
......@@ -537,8 +537,15 @@ class ReadDatasetTest(test_case.TestCase):
def graph_fn():
keys = [1, 0, -1]
dataset = tf.data.Dataset.from_tensor_slices([[1, 2, -1, 5]])
table = contrib_lookup.HashTable(
initializer=contrib_lookup.KeyValueTensorInitializer(
try:
# Dynamically try to load the tf v2 lookup, falling back to contrib
lookup = tf.compat.v2.lookup
hash_table_class = tf.compat.v2.lookup.StaticHashTable
except AttributeError:
lookup = contrib_lookup
hash_table_class = contrib_lookup.HashTable
table = hash_table_class(
initializer=lookup.KeyValueTensorInitializer(
keys=keys, values=list(reversed(keys))),
default_value=100)
dataset = dataset.map(table.lookup)
......@@ -559,7 +566,7 @@ class ReadDatasetTest(test_case.TestCase):
data = self.execute(graph_fn, [])
# Note that the execute function extracts single outputs if the return
# value is of size 1.
self.assertAllEqual(
self.assertCountEqual(
data, [
1, 10, 2, 20, 3, 30, 4, 40, 5, 50, 1, 10, 2, 20, 3, 30, 4, 40, 5,
50
......@@ -577,7 +584,7 @@ class ReadDatasetTest(test_case.TestCase):
data = self.execute(graph_fn, [])
# Note that the execute function extracts single outputs if the return
# value is of size 1.
self.assertAllEqual(
self.assertCountEqual(
data, [
1, 10, 2, 20, 3, 30, 4, 40, 5, 50, 1, 10, 2, 20, 3, 30, 4, 40, 5,
50
......@@ -607,12 +614,14 @@ class ReadDatasetTest(test_case.TestCase):
def graph_fn():
return self._get_dataset_next(
[self._shuffle_path_template % '*'], config, batch_size=10)
expected_non_shuffle_output = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
expected_non_shuffle_output1 = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
expected_non_shuffle_output2 = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]
# Note that the execute function extracts single outputs if the return
# value is of size 1.
data = self.execute(graph_fn, [])
self.assertAllEqual(data, expected_non_shuffle_output)
self.assertTrue(all(data == expected_non_shuffle_output1) or
all(data == expected_non_shuffle_output2))
def test_read_dataset_single_epoch(self):
config = input_reader_pb2.InputReader()
......
......@@ -48,7 +48,7 @@ def build(input_reader_config):
if input_reader_config.HasField('label_map_path'):
label_map_proto_file = input_reader_config.label_map_path
input_type = input_reader_config.input_type
if input_type == input_reader_pb2.InputType.TF_EXAMPLE:
if input_type == input_reader_pb2.InputType.Value('TF_EXAMPLE'):
decoder = tf_example_decoder.TfExampleDecoder(
load_instance_masks=input_reader_config.load_instance_masks,
load_multiclass_scores=input_reader_config.load_multiclass_scores,
......@@ -60,7 +60,7 @@ def build(input_reader_config):
num_keypoints=input_reader_config.num_keypoints,
expand_hierarchy_labels=input_reader_config.expand_labels_hierarchy)
return decoder
elif input_type == input_reader_pb2.InputType.TF_SEQUENCE_EXAMPLE:
elif input_type == input_reader_pb2.InputType.Value('TF_SEQUENCE_EXAMPLE'):
decoder = tf_sequence_example_decoder.TfSequenceExampleDecoder(
label_map_proto_file=label_map_proto_file,
load_context_features=input_reader_config.load_context_features)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment