"doc/vscode:/vscode.git/clone" did not exist on "69fa2f4395db9b2ee2f94cdfb186467cc20c5e61"
Commit a27bad9b authored by André Araujo's avatar André Araujo Committed by aquariusjay
Browse files

DELF feature aggregation extraction: VLAD, ASMK, ASMK* (#6780)

* Initial feature aggregation code for Detect-to-Retrieve paper.

PiperOrigin-RevId: 246043144

* Add support for ASMK/ASMK*/R-ASMK/R-ASMK*.

PiperOrigin-RevId: 247337028

* Add DatumProto uint32 field, and limit datum_io to uint32 and float32/float64 types.

Also, introduce DatumPairProto, to be used for ASMK variants. Functions to read/write in this new format are added and tested.

PiperOrigin-RevId: 247515205

* Add batching option to feature aggregation extraction.

PiperOrigin-RevId: 247614627

* Script to perform local feature aggregation, with associated configs.

Also small edits to the aggregation extractor, for better handling of input features / avoiding OOM.

PiperOrigin-RevId: 248150750

* Tests to check that aggregation using regions with no local features works.

PiperOrigin-RevId: 248153275

* Include new library/proto for aggregation

* Merged commit includes the following changes:

PiperOrigin-RevId: 248176511

* Merged commit includes the following changes:
248194572  by Andre Araujo:

    Change tf.tensor_scatter_nd_add --> tf.compat.v1.tensor_scatter_add to make it compatible with TF 1.X.

--

PiperOrigin-RevId: 248194572
parent 8d97814e
......@@ -18,6 +18,7 @@ from __future__ import division
from __future__ import print_function
# pylint: disable=unused-import
from delf.protos import aggregation_config_pb2
from delf.protos import box_pb2
from delf.protos import datum_pb2
from delf.protos import delf_config_pb2
......@@ -26,6 +27,7 @@ from delf.python import box_io
from delf.python import datum_io
from delf.python import delf_v1
from delf.python import detect_to_retrieve
from delf.python import feature_aggregation_extractor
from delf.python import feature_extractor
from delf.python import feature_io
from delf.python.examples import extract_boxes
......
// Protocol buffer for feature aggregation configuration.
//
// Used for both extraction and comparison of aggregated representations. Note
// that some options are only relevant for the former or the latter.
//
// For more details, please refer to the paper:
// "Detect-to-Retrieve: Efficient Regional Aggregation for Image Search",
// Proc. CVPR'19 (https://arxiv.org/abs/1812.01584).
syntax = "proto2";
package delf.protos;
message AggregationConfig {
// Number of codewords (ie, visual words) in the codebook.
optional int32 codebook_size = 1 [default = 65536];
// Dimensionality of local features (eg, 128 for DELF used in
// Detect-to-Retrieve paper).
optional int32 feature_dimensionality = 2 [default = 128];
// Type of aggregation to use.
// For example, to use R-ASMK*, `aggregation_type` should be set to ASMK_STAR
// and `use_regional_aggregation` should be set to true.
enum AggregationType {
INVALID = 0;
VLAD = 1;
ASMK = 2;
ASMK_STAR = 3;
}
optional AggregationType aggregation_type = 3 [default = ASMK_STAR];
// L2 normalization option.
// - For vanilla aggregated kernels (eg, VLAD/ASMK/ASMK*), this should be
// set to true.
// - For regional aggregated kernels (ie, if `use_regional_aggregation` is
// true, leading to R-VLAD/R-ASMK/R-ASMK*), this should be set to false.
// Note that it is used differently depending on the `aggregation_type`:
// - For VLAD, this option is only used for extraction.
// - For ASMK/ASMK*, this option is only used for comparisons.
optional bool use_l2_normalization = 4 [default = true];
// Additional options used only for extraction.
// - Path to codebook checkpoint for aggregation.
optional string codebook_path = 5;
// - Number of visual words to assign each feature.
optional int32 num_assignments = 6 [default = 1];
// - Whether to use regional aggregation.
optional bool use_regional_aggregation = 7 [default = false];
// - Batch size to use for local features when computing aggregated
// representations. Particularly useful if `codebook_size` and
// `feature_dimensionality` are large, to avoid OOM. A value of zero or
// lower indicates that no batching is used.
optional int32 feature_batch_size = 10 [default = 100];
// Additional options used only for comparison.
// Only relevant if `aggregation_type` is ASMK or ASMK_STAR.
// - Power-law exponent for similarity of visual word descriptors.
optional float alpha = 8 [default = 3.0];
// - Threshold above which similarity of visual word descriptors are
// considered; below this, similarity is set to zero.
optional float tau = 9 [default = 0.0];
}
......@@ -39,15 +39,28 @@ message DatumShape {
repeated int64 dim = 1 [packed = true];
}
// FloatList is the container of tensor values. The tensor values are saved as
// a list of floating point values.
// FloatList is a container of tensor values, which are saved as a list of
// floating point values.
message FloatList {
repeated float value = 1 [packed = true];
}
// Uint32List is a container of tensor values, which are saved as a list of
// uint32 values.
message Uint32List {
repeated uint32 value = 1 [packed = true];
}
message DatumProto {
optional DatumShape shape = 1;
oneof kind_oneof {
FloatList float_list = 2;
Uint32List uint32_list = 3;
}
}
// Groups two DatumProto's.
message DatumPairProto {
optional DatumProto first = 1;
optional DatumProto second = 2;
}
......@@ -17,7 +17,7 @@
DatumProto is protocol buffer used to serialize tensor with arbitrary shape.
Please refer to datum.proto for details.
Support read and write of DatumProto from/to numpy array and file.
Support read and write of DatumProto from/to NumPy array and file.
"""
from __future__ import absolute_import
......@@ -31,37 +31,95 @@ from delf import datum_pb2
def ArrayToDatum(arr):
"""Converts numpy array to DatumProto.
"""Converts NumPy array to DatumProto.
Supports arrays of types:
- float16 (it is converted into a float32 in DatumProto)
- float32
- float64 (it is converted into a float32 in DatumProto)
- uint8 (it is converted into a uint32 in DatumProto)
- uint16 (it is converted into a uint32 in DatumProto)
- uint32
- uint64 (it is converted into a uint32 in DatumProto)
Args:
arr: Numpy array of arbitrary shape.
arr: NumPy array of arbitrary shape.
Returns:
datum: DatumProto object.
Raises:
ValueError: If array type is unsupported.
"""
datum = datum_pb2.DatumProto()
datum.float_list.value.extend(arr.astype(float).flat)
if arr.dtype in ('float16', 'float32', 'float64'):
datum.float_list.value.extend(arr.astype('float32').flat)
elif arr.dtype in ('uint8', 'uint16', 'uint32', 'uint64'):
datum.uint32_list.value.extend(arr.astype('uint32').flat)
else:
raise ValueError('Unsupported array type: %s' % arr.dtype)
datum.shape.dim.extend(arr.shape)
return datum
def ArraysToDatumPair(arr_1, arr_2):
"""Converts numpy arrays to DatumPairProto.
Supports same formats as `ArrayToDatum`, see documentation therein.
Args:
arr_1: NumPy array of arbitrary shape.
arr_2: NumPy array of arbitrary shape.
Returns:
datum_pair: DatumPairProto object.
"""
datum_pair = datum_pb2.DatumPairProto()
datum_pair.first.CopyFrom(ArrayToDatum(arr_1))
datum_pair.second.CopyFrom(ArrayToDatum(arr_2))
return datum_pair
def DatumToArray(datum):
"""Converts data saved in DatumProto to numpy array.
"""Converts data saved in DatumProto to NumPy array.
Args:
datum: DatumProto object.
Returns:
Numpy array of arbitrary shape.
NumPy array of arbitrary shape.
"""
return np.array(datum.float_list.value).astype(float).reshape(datum.shape.dim)
if datum.HasField('float_list'):
return np.array(datum.float_list.value).astype('float32').reshape(
datum.shape.dim)
elif datum.HasField('uint32_list'):
return np.array(datum.uint32_list.value).astype('uint32').reshape(
datum.shape.dim)
else:
raise ValueError('Input DatumProto does not have float_list or uint32_list')
def DatumPairToArrays(datum_pair):
"""Converts data saved in DatumPairProto to NumPy arrays.
Args:
datum_pair: DatumPairProto object.
Returns:
Two NumPy arrays of arbitrary shape.
"""
first_datum = DatumToArray(datum_pair.first)
second_datum = DatumToArray(datum_pair.second)
return first_datum, second_datum
def SerializeToString(arr):
"""Converts numpy array to serialized DatumProto.
"""Converts NumPy array to serialized DatumProto.
Args:
arr: Numpy array of arbitrary shape.
arr: NumPy array of arbitrary shape.
Returns:
Serialized DatumProto string.
......@@ -70,20 +128,48 @@ def SerializeToString(arr):
return datum.SerializeToString()
def SerializePairToString(arr_1, arr_2):
"""Converts pair of NumPy arrays to serialized DatumPairProto.
Args:
arr_1: NumPy array of arbitrary shape.
arr_2: NumPy array of arbitrary shape.
Returns:
Serialized DatumPairProto string.
"""
datum_pair = ArraysToDatumPair(arr_1, arr_2)
return datum_pair.SerializeToString()
def ParseFromString(string):
"""Converts serialized DatumProto string to numpy array.
"""Converts serialized DatumProto string to NumPy array.
Args:
string: Serialized DatumProto string.
Returns:
Numpy array.
NumPy array.
"""
datum = datum_pb2.DatumProto()
datum.ParseFromString(string)
return DatumToArray(datum)
def ParsePairFromString(string):
"""Converts serialized DatumPairProto string to NumPy arrays.
Args:
string: Serialized DatumProto string.
Returns:
Two NumPy arrays.
"""
datum_pair = datum_pb2.DatumPairProto()
datum_pair.ParseFromString(string)
return DatumPairToArrays(datum_pair)
def ReadFromFile(file_path):
"""Helper function to load data from a DatumProto format in a file.
......@@ -91,19 +177,45 @@ def ReadFromFile(file_path):
file_path: Path to file containing data.
Returns:
data: Numpy array.
data: NumPy array.
"""
with tf.gfile.FastGFile(file_path, 'rb') as f:
with tf.gfile.GFile(file_path, 'rb') as f:
return ParseFromString(f.read())
def ReadPairFromFile(file_path):
"""Helper function to load data from a DatumPairProto format in a file.
Args:
file_path: Path to file containing data.
Returns:
Two NumPy arrays.
"""
with tf.gfile.GFile(file_path, 'rb') as f:
return ParsePairFromString(f.read())
def WriteToFile(data, file_path):
"""Helper function to write data to a file in DatumProto format.
Args:
data: Numpy array.
data: NumPy array.
file_path: Path to file that will be written.
"""
serialized_data = SerializeToString(data)
with tf.gfile.FastGFile(file_path, 'w') as f:
with tf.gfile.GFile(file_path, 'w') as f:
f.write(serialized_data)
def WritePairToFile(arr_1, arr_2, file_path):
"""Helper function to write pair of arrays to a file in DatumPairProto format.
Args:
arr_1: NumPy array of arbitrary shape.
arr_2: NumPy array of arbitrary shape.
file_path: Path to file that will be written.
"""
serialized_data = SerializePairToString(arr_1, arr_2)
with tf.gfile.GFile(file_path, 'w') as f:
f.write(serialized_data)
......@@ -40,24 +40,32 @@ class DatumIoTest(tf.test.TestCase):
retrieved_data = datum_io.ParseFromString(serialized)
self.assertTrue(np.array_equal(original_data, retrieved_data))
# This test covers the following functions: ArrayToDatum, SerializeToString,
# ParseFromString, DatumToArray.
def testConversion2dWithType(self):
self.Conversion2dTestWithType(np.int8)
self.Conversion2dTestWithType(np.int16)
self.Conversion2dTestWithType(np.int32)
self.Conversion2dTestWithType(np.int64)
self.Conversion2dTestWithType(np.uint16)
self.Conversion2dTestWithType(np.uint32)
self.Conversion2dTestWithType(np.uint64)
self.Conversion2dTestWithType(np.float16)
self.Conversion2dTestWithType(np.float32)
self.Conversion2dTestWithType(np.float64)
# This test covers the following functions: ArrayToDatum, SerializeToString,
# ParseFromString, DatumToArray.
def testConversion3dWithType(self):
self.Conversion3dTestWithType(np.int8)
self.Conversion3dTestWithType(np.int16)
self.Conversion3dTestWithType(np.int32)
self.Conversion3dTestWithType(np.int64)
self.Conversion3dTestWithType(np.uint16)
self.Conversion3dTestWithType(np.uint32)
self.Conversion3dTestWithType(np.uint64)
self.Conversion3dTestWithType(np.float16)
self.Conversion3dTestWithType(np.float32)
self.Conversion3dTestWithType(np.float64)
def testConversionWithUnsupportedType(self):
with self.assertRaisesRegex(ValueError, 'Unsupported array type'):
self.Conversion3dTestWithType(int)
# This test covers the following functions: ArrayToDatum, SerializeToString,
# WriteToFile, ReadFromFile, ParseFromString, DatumToArray.
def testWriteAndReadToFile(self):
data = np.array([[[-1.0, 125.0, -2.5], [14.5, 3.5, 0.0]],
[[20.0, 0.0, 30.0], [25.5, 36.0, 42.0]]])
......@@ -67,6 +75,22 @@ class DatumIoTest(tf.test.TestCase):
data_read = datum_io.ReadFromFile(filename)
self.assertAllEqual(data_read, data)
# This test covers the following functions: ArraysToDatumPair,
# SerializePairToString, WritePairToFile, ReadPairFromFile,
# ParsePairFromString, DatumPairToArrays.
def testWriteAndReadPairToFile(self):
data_1 = np.array([[[-1.0, 125.0, -2.5], [14.5, 3.5, 0.0]],
[[20.0, 0.0, 30.0], [25.5, 36.0, 42.0]]])
data_2 = np.array(
[[[255, 0, 5], [10, 300, 0]], [[20, 1, 100], [255, 360, 420]]],
dtype='uint32')
tmpdir = tf.test.get_temp_dir()
filename = os.path.join(tmpdir, 'test.datum_pair')
datum_io.WritePairToFile(data_1, data_2, filename)
data_read_1, data_read_2 = datum_io.ReadPairFromFile(filename)
self.assertAllEqual(data_read_1, data_1)
self.assertAllEqual(data_read_2, data_2)
if __name__ == '__main__':
tf.test.main()
......@@ -38,7 +38,7 @@ import tensorflow as tf
from tensorflow.python.platform import app
from delf import feature_io
from delf.detect_to_retrieve import dataset
from delf.python.detect_to_retrieve import dataset
cmd_args = None
......
# Copyright 2019 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Extracts aggregation for images from Revisited Oxford/Paris datasets.
The program checks if the aggregated representation for an image already exists,
and skips computation for those.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import csv
import os
import sys
import time
import numpy as np
import tensorflow as tf
from google.protobuf import text_format
from tensorflow.python.platform import app
from delf import aggregation_config_pb2
from delf import datum_io
from delf import feature_aggregation_extractor
from delf import feature_io
from delf.python.detect_to_retrieve import dataset
cmd_args = None
# Aliases for aggregation types.
_VLAD = aggregation_config_pb2.AggregationConfig.VLAD
_ASMK = aggregation_config_pb2.AggregationConfig.ASMK
_ASMK_STAR = aggregation_config_pb2.AggregationConfig.ASMK_STAR
# Extensions.
_DELF_EXTENSION = '.delf'
_VLAD_EXTENSION_SUFFIX = 'vlad'
_ASMK_EXTENSION_SUFFIX = 'asmk'
_ASMK_STAR_EXTENSION_SUFFIX = 'asmk_star'
# Pace to report extraction log.
_STATUS_CHECK_ITERATIONS = 50
def _ReadMappingBasenameToBoxNames(input_path, index_image_names):
"""Reads mapping from image name to DELF file names for each box.
Args:
input_path: Path to CSV file containing mapping.
index_image_names: List containing index image names, in order, for the
dataset under consideration.
Returns:
images_to_box_feature_files: Dict. key=string (image name); value=list of
strings (file names containing DELF features for boxes).
"""
images_to_box_feature_files = {}
with tf.gfile.GFile(input_path, 'r') as f:
reader = csv.DictReader(f)
for row in reader:
index_image_name = index_image_names[int(row['index_image_id'])]
if index_image_name not in images_to_box_feature_files:
images_to_box_feature_files[index_image_name] = []
images_to_box_feature_files[index_image_name].append(row['name'])
return images_to_box_feature_files
def main(argv):
if len(argv) > 1:
raise RuntimeError('Too many command-line arguments.')
# Read list of images from dataset file.
print('Reading list of images from dataset file...')
query_list, index_list, _ = dataset.ReadDatasetFile(
cmd_args.dataset_file_path)
if cmd_args.use_query_images:
image_list = query_list
else:
image_list = index_list
num_images = len(image_list)
print('done! Found %d images' % num_images)
# Parse AggregationConfig proto, and select output extension.
config = aggregation_config_pb2.AggregationConfig()
with tf.gfile.GFile(cmd_args.aggregation_config_path, 'r') as f:
text_format.Merge(f.read(), config)
output_extension = '.'
if config.use_regional_aggregation:
output_extension += 'r'
if config.aggregation_type == _VLAD:
output_extension += _VLAD_EXTENSION_SUFFIX
elif config.aggregation_type == _ASMK:
output_extension += _ASMK_EXTENSION_SUFFIX
elif config.aggregation_type == _ASMK_STAR:
output_extension += _ASMK_STAR_EXTENSION_SUFFIX
else:
raise ValueError('Invalid aggregation type: %d' % config.aggregation_type)
# Read index mapping path, if provided.
if cmd_args.index_mapping_path:
images_to_box_feature_files = _ReadMappingBasenameToBoxNames(
cmd_args.index_mapping_path, image_list)
# Create output directory if necessary.
if not os.path.exists(cmd_args.output_aggregation_dir):
os.makedirs(cmd_args.output_aggregation_dir)
with tf.Session() as sess:
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
sess, config)
start = time.clock()
for i in range(num_images):
if i == 0:
print('Starting to extract aggregation from images...')
elif i % _STATUS_CHECK_ITERATIONS == 0:
elapsed = (time.clock() - start)
print('Processing image %d out of %d, last %d '
'images took %f seconds' %
(i, num_images, _STATUS_CHECK_ITERATIONS, elapsed))
start = time.clock()
image_name = image_list[i]
# Compose output file name, skip extraction for this image if it already
# exists.
output_aggregation_filename = os.path.join(
cmd_args.output_aggregation_dir, image_name + output_extension)
if tf.io.gfile.exists(output_aggregation_filename):
print('Skipping %s' % image_name)
continue
# Load DELF features.
if config.use_regional_aggregation:
if not cmd_args.index_mapping_path:
raise ValueError(
'Requested regional aggregation, but index_mapping_path was not '
'provided')
descriptors_list = []
num_features_per_box = []
for box_feature_file in images_to_box_feature_files[image_name]:
delf_filename = os.path.join(cmd_args.features_dir,
box_feature_file + _DELF_EXTENSION)
_, _, box_descriptors, _, _ = feature_io.ReadFromFile(delf_filename)
# If `box_descriptors` is empty, reshape it such that it can be
# concatenated with other descriptors.
if not box_descriptors.shape[0]:
box_descriptors = np.reshape(box_descriptors,
[0, config.feature_dimensionality])
descriptors_list.append(box_descriptors)
num_features_per_box.append(box_descriptors.shape[0])
descriptors = np.concatenate(descriptors_list)
else:
input_delf_filename = os.path.join(cmd_args.features_dir,
image_name + _DELF_EXTENSION)
_, _, descriptors, _, _ = feature_io.ReadFromFile(input_delf_filename)
num_features_per_box = None
# Extract and save aggregation. If using VLAD, only
# `aggregated_descriptors` needs to be saved.
(aggregated_descriptors,
feature_visual_words) = extractor.Extract(descriptors,
num_features_per_box)
if config.aggregation_type == _VLAD:
datum_io.WriteToFile(aggregated_descriptors,
output_aggregation_filename)
else:
datum_io.WritePairToFile(aggregated_descriptors,
feature_visual_words.astype('uint32'),
output_aggregation_filename)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.register('type', 'bool', lambda v: v.lower() == 'true')
parser.add_argument(
'--aggregation_config_path',
type=str,
default='/tmp/aggregation_config.pbtxt',
help="""
Path to AggregationConfig proto text file with configuration to be used
for extraction.
""")
parser.add_argument(
'--dataset_file_path',
type=str,
default='/tmp/gnd_roxford5k.mat',
help="""
Dataset file for Revisited Oxford or Paris dataset, in .mat format.
""")
parser.add_argument(
'--use_query_images',
type=bool,
default=False,
help="""
If True, processes the query images of the dataset. If False, processes
the database (ie, index) images.
""")
parser.add_argument(
'--features_dir',
type=str,
default='/tmp/features',
help="""
Directory where query image features are located, all in .delf format.
""")
parser.add_argument(
'--index_mapping_path',
type=str,
default='',
help="""
Optional CSV file which maps each .delf file name to the index image ID
and detected box ID. If regional aggregation is performed, this should be
set. Otherwise, this is ignored.
Usually this file is obtained as an output from the
`extract_index_boxes_and_features.py` script.
""")
parser.add_argument(
'--output_aggregation_dir',
type=str,
default='/tmp/aggregation',
help="""
Directory where aggregation output will be written to. Each image's
features will be written to a file with same name, and extension replaced
by one of
['.vlad', '.asmk', '.asmk_star', '.rvlad', '.rasmk', '.rasmk_star'].
""")
cmd_args, unparsed = parser.parse_known_args()
app.run(main=main, argv=[sys.argv[0]] + unparsed)
......@@ -44,7 +44,7 @@ from tensorflow.python.platform import app
from delf import delf_config_pb2
from delf import box_io
from delf import feature_io
from delf.detect_to_retrieve import dataset
from delf.python.detect_to_retrieve import dataset
from delf import extract_boxes
from delf import extract_features
......
......@@ -39,7 +39,7 @@ from google.protobuf import text_format
from tensorflow.python.platform import app
from delf import delf_config_pb2
from delf import feature_io
from delf.detect_to_retrieve import dataset
from delf.python.detect_to_retrieve import dataset
from delf import extract_features
cmd_args = None
......
codebook_size: 65536
feature_dimensionality: 128
aggregation_type: ASMK_STAR
use_l2_normalization: false
codebook_path: "parameters/k65536_codebook_tfckpt/codebook"
num_assignments: 1
use_regional_aggregation: true
feature_batch_size: 100
alpha: 3.0
tau: 0.0
codebook_size: 65536
feature_dimensionality: 128
aggregation_type: ASMK_STAR
codebook_path: "parameters/k65536_codebook_tfckpt/codebook"
num_assignments: 1
use_regional_aggregation: false
feature_batch_size: 100
# Copyright 2019 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Local feature aggregation extraction.
For more details, please refer to the paper:
"Detect-to-Retrieve: Efficient Regional Aggregation for Image Search",
Proc. CVPR'19 (https://arxiv.org/abs/1812.01584).
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf
from delf import aggregation_config_pb2
_NORM_SQUARED_TOLERANCE = 1e-12
# Aliases for aggregation types.
_VLAD = aggregation_config_pb2.AggregationConfig.VLAD
_ASMK = aggregation_config_pb2.AggregationConfig.ASMK
_ASMK_STAR = aggregation_config_pb2.AggregationConfig.ASMK_STAR
class ExtractAggregatedRepresentation(object):
"""Class for extraction of aggregated local feature representation.
Args:
sess: TensorFlow session to use.
aggregation_config: AggregationConfig object defining type of aggregation to
use.
Raises:
ValueError: If aggregation type is invalid.
"""
def __init__(self, sess, aggregation_config):
self._sess = sess
self._codebook_size = aggregation_config.codebook_size
self._feature_dimensionality = aggregation_config.feature_dimensionality
self._aggregation_type = aggregation_config.aggregation_type
self._feature_batch_size = aggregation_config.feature_batch_size
# Inputs to extraction function.
self._features = tf.compat.v1.placeholder(tf.float32, [None, None])
self._num_features_per_region = tf.compat.v1.placeholder(tf.int32, [None])
# Load codebook into graph.
codebook = tf.compat.v1.get_variable(
"codebook",
shape=[
aggregation_config.codebook_size,
aggregation_config.feature_dimensionality
])
tf.compat.v1.train.init_from_checkpoint(
aggregation_config.codebook_path, {
tf.contrib.factorization.KMeansClustering.CLUSTER_CENTERS_VAR_NAME:
codebook
})
# Construct extraction graph based on desired options.
if self._aggregation_type == _VLAD:
# Feature visual words are unused in the case of VLAD, so just return
# dummy constant.
self._feature_visual_words = tf.constant(-1, dtype=tf.int32)
if aggregation_config.use_regional_aggregation:
self._aggregated_descriptors = self._ComputeRvlad(
self._features,
self._num_features_per_region,
codebook,
use_l2_normalization=aggregation_config.use_l2_normalization,
num_assignments=aggregation_config.num_assignments)
else:
self._aggregated_descriptors = self._ComputeVlad(
self._features,
codebook,
use_l2_normalization=aggregation_config.use_l2_normalization,
num_assignments=aggregation_config.num_assignments)
elif (self._aggregation_type == _ASMK or
self._aggregation_type == _ASMK_STAR):
if aggregation_config.use_regional_aggregation:
(self._aggregated_descriptors,
self._feature_visual_words) = self._ComputeRasmk(
self._features,
self._num_features_per_region,
codebook,
num_assignments=aggregation_config.num_assignments)
else:
(self._aggregated_descriptors,
self._feature_visual_words) = self._ComputeAsmk(
self._features,
codebook,
num_assignments=aggregation_config.num_assignments)
else:
raise ValueError("Invalid aggregation type: %d" % self._aggregation_type)
# Initialize variables in the TF graph.
sess.run(tf.compat.v1.global_variables_initializer())
def Extract(self, features, num_features_per_region=None):
"""Extracts aggregated representation.
Args:
features: [N, D] float numpy array with N local feature descriptors.
num_features_per_region: Required only if computing regional aggregated
representations, otherwise optional. List of number of features per
region, such that sum(num_features_per_region) = N. It indicates which
features correspond to each region.
Returns:
aggregated_descriptors: 1-D numpy array.
feature_visual_words: Used only for ASMK/ASMK* aggregation type. 1-D
numpy array denoting visual words corresponding to the
`aggregated_descriptors`.
Raises:
ValueError: If inputs are misconfigured.
"""
if num_features_per_region is None:
# Use dummy value since it is unused.
num_features_per_region = []
else:
if len(num_features_per_region
) and sum(num_features_per_region) != features.shape[0]:
raise ValueError(
"Incorrect arguments: sum(num_features_per_region) and "
"features.shape[0] are different: %d vs %d" %
(sum(num_features_per_region), features.shape[0]))
aggregated_descriptors, feature_visual_words = self._sess.run(
[self._aggregated_descriptors, self._feature_visual_words],
feed_dict={
self._features: features,
self._num_features_per_region: num_features_per_region
})
# If using ASMK*/RASMK*, binarize the aggregated descriptors.
if self._aggregation_type == _ASMK_STAR:
reshaped_aggregated_descriptors = np.reshape(
aggregated_descriptors, [-1, self._feature_dimensionality])
packed_descriptors = np.packbits(
reshaped_aggregated_descriptors > 0, axis=1)
aggregated_descriptors = np.reshape(packed_descriptors, [-1])
return aggregated_descriptors, feature_visual_words
def _ComputeVlad(self,
features,
codebook,
use_l2_normalization=True,
num_assignments=1):
"""Compute VLAD representation.
Args:
features: [N, D] float tensor.
codebook: [K, D] float tensor.
use_l2_normalization: If False, does not L2-normalize after aggregation.
num_assignments: Number of visual words to assign a feature to.
Returns:
vlad: [K*D] float tensor.
"""
def _ComputeVladEmptyFeatures():
"""Computes VLAD if `features` is empty.
Returns:
[K*D] all-zeros tensor.
"""
return tf.zeros([self._codebook_size * self._feature_dimensionality],
dtype=tf.float32)
def _ComputeVladNonEmptyFeatures():
"""Computes VLAD if `features` is not empty.
Returns:
[K*D] tensor with VLAD descriptor.
"""
num_features = tf.shape(features)[0]
# Find nearest visual words for each feature. Possibly batch the local
# features to avoid OOM.
if self._feature_batch_size <= 0:
actual_batch_size = num_features
else:
actual_batch_size = self._feature_batch_size
def _BatchNearestVisualWords(ind, selected_visual_words):
"""Compute nearest neighbor visual words for a batch of features.
Args:
ind: Integer index denoting feature.
selected_visual_words: Partial set of visual words.
Returns:
output_ind: Next index.
output_selected_visual_words: Updated set of visual words, including
the visual words for the new batch.
"""
# Handle case of last batch, where there may be fewer than
# `actual_batch_size` features.
batch_size_to_use = tf.cond(
tf.greater(ind + actual_batch_size, num_features),
true_fn=lambda: num_features - ind,
false_fn=lambda: actual_batch_size)
# Denote B = batch_size_to_use.
# K*B x D.
tiled_features = tf.reshape(
tf.tile(
tf.slice(features, [ind, 0],
[batch_size_to_use, self._feature_dimensionality]),
[1, self._codebook_size]), [-1, self._feature_dimensionality])
# K*B x D.
tiled_codebook = tf.reshape(
tf.tile(tf.reshape(codebook, [1, -1]), [batch_size_to_use, 1]),
[-1, self._feature_dimensionality])
# B x K.
squared_distances = tf.reshape(
tf.reduce_sum(
tf.math.squared_difference(tiled_features, tiled_codebook),
axis=1), [batch_size_to_use, self._codebook_size])
# B x K.
nearest_visual_words = tf.argsort(squared_distances)
# B x num_assignments.
batch_selected_visual_words = tf.slice(
nearest_visual_words, [0, 0], [batch_size_to_use, num_assignments])
selected_visual_words = tf.concat(
[selected_visual_words, batch_selected_visual_words], axis=0)
return ind + batch_size_to_use, selected_visual_words
ind_batch = tf.constant(0, dtype=tf.int32)
keep_going = lambda j, selected_visual_words: tf.less(j, num_features)
selected_visual_words = tf.zeros([0, num_assignments], dtype=tf.int32)
_, selected_visual_words = tf.while_loop(
cond=keep_going,
body=_BatchNearestVisualWords,
loop_vars=[ind_batch, selected_visual_words],
shape_invariants=[
ind_batch.get_shape(),
tf.TensorShape([None, num_assignments])
],
parallel_iterations=1,
back_prop=False)
# Helper function to collect residuals for relevant visual words.
def _ConstructVladFromAssignments(ind, vlad):
"""Add contributions of a feature to a VLAD descriptor.
Args:
ind: Integer index denoting feature.
vlad: Partial VLAD descriptor.
Returns:
output_ind: Next index (ie, ind+1).
output_vlad: VLAD descriptor updated to take into account contribution
from ind-th feature.
"""
return ind + 1, tf.compat.v1.tensor_scatter_add(
vlad, tf.expand_dims(selected_visual_words[ind], axis=1),
tf.tile(
tf.expand_dims(features[ind], axis=0), [num_assignments, 1]) -
tf.gather(codebook, selected_visual_words[ind]))
ind_vlad = tf.constant(0, dtype=tf.int32)
keep_going = lambda j, vlad: tf.less(j, num_features)
vlad = tf.zeros([self._codebook_size, self._feature_dimensionality],
dtype=tf.float32)
_, vlad = tf.while_loop(
cond=keep_going,
body=_ConstructVladFromAssignments,
loop_vars=[ind_vlad, vlad],
back_prop=False)
vlad = tf.reshape(vlad,
[self._codebook_size * self._feature_dimensionality])
if use_l2_normalization:
vlad = tf.math.l2_normalize(vlad, epsilon=_NORM_SQUARED_TOLERANCE)
return vlad
return tf.cond(
tf.greater(tf.size(features), 0),
true_fn=_ComputeVladNonEmptyFeatures,
false_fn=_ComputeVladEmptyFeatures)
def _ComputeRvlad(self,
features,
num_features_per_region,
codebook,
use_l2_normalization=False,
num_assignments=1):
"""Compute R-VLAD representation.
Args:
features: [N, D] float tensor.
num_features_per_region: [R] int tensor. Contains number of features per
region, such that sum(num_features_per_region) = N. It indicates which
features correspond to each region.
codebook: [K, D] float tensor.
use_l2_normalization: If True, performs L2-normalization after regional
aggregation; if False (default), performs componentwise division by R
after regional aggregation.
num_assignments: Number of visual words to assign a feature to.
Returns:
rvlad: [K*D] float tensor.
"""
def _ComputeRvladEmptyRegions():
"""Computes R-VLAD if `num_features_per_region` is empty.
Returns:
[K*D] all-zeros tensor.
"""
return tf.zeros([self._codebook_size * self._feature_dimensionality],
dtype=tf.float32)
def _ComputeRvladNonEmptyRegions():
"""Computes R-VLAD if `num_features_per_region` is not empty.
Returns:
[K*D] tensor with R-VLAD descriptor.
"""
# Helper function to compose initial R-VLAD from image regions.
def _ConstructRvladFromVlad(ind, rvlad):
"""Add contributions from different regions into R-VLAD.
Args:
ind: Integer index denoting region.
rvlad: Partial R-VLAD descriptor.
Returns:
output_ind: Next index (ie, ind+1).
output_rvlad: R-VLAD descriptor updated to take into account
contribution from ind-th region.
"""
return ind + 1, rvlad + self._ComputeVlad(
tf.slice(
features, [tf.reduce_sum(num_features_per_region[:ind]), 0],
[num_features_per_region[ind], self._feature_dimensionality]),
codebook,
num_assignments=num_assignments)
i = tf.constant(0, dtype=tf.int32)
num_regions = tf.shape(num_features_per_region)[0]
keep_going = lambda j, rvlad: tf.less(j, num_regions)
rvlad = tf.zeros([self._codebook_size * self._feature_dimensionality],
dtype=tf.float32)
_, rvlad = tf.while_loop(
cond=keep_going,
body=_ConstructRvladFromVlad,
loop_vars=[i, rvlad],
back_prop=False,
parallel_iterations=1)
if use_l2_normalization:
rvlad = tf.math.l2_normalize(rvlad, epsilon=_NORM_SQUARED_TOLERANCE)
else:
rvlad /= tf.cast(num_regions, dtype=tf.float32)
return rvlad
return tf.cond(
tf.greater(tf.size(num_features_per_region), 0),
true_fn=_ComputeRvladNonEmptyRegions,
false_fn=_ComputeRvladEmptyRegions)
def _PerCentroidNormalization(self, unnormalized_vector):
"""Perform per-centroid normalization.
Args:
unnormalized_vector: [KxD] float tensor.
Returns:
per_centroid_normalized_vector: [KxD] float tensor, with normalized
aggregated residuals. Some residuals may be all-zero.
visual_words: Int tensor containing indices of visual words which are
present for the set of features.
"""
unnormalized_vector = tf.reshape(
unnormalized_vector,
[self._codebook_size, self._feature_dimensionality])
per_centroid_norms = tf.norm(unnormalized_vector, axis=1)
visual_words = tf.reshape(
tf.where(
tf.greater(per_centroid_norms, tf.sqrt(_NORM_SQUARED_TOLERANCE))),
[-1])
per_centroid_normalized_vector = tf.math.l2_normalize(
unnormalized_vector, axis=1, epsilon=_NORM_SQUARED_TOLERANCE)
return per_centroid_normalized_vector, visual_words
def _ComputeAsmk(self, features, codebook, num_assignments=1):
"""Compute ASMK representation.
Args:
features: [N, D] float tensor.
codebook: [K, D] float tensor.
num_assignments: Number of visual words to assign a feature to.
Returns:
normalized_residuals: 1-dimensional float tensor with concatenated
residuals which are non-zero. Note that the dimensionality is
input-dependent.
visual_words: 1-dimensional int tensor of sorted visual word ids.
Dimensionality is shape(normalized_residuals)[0] / D.
"""
unnormalized_vlad = self._ComputeVlad(
features,
codebook,
use_l2_normalization=False,
num_assignments=num_assignments)
per_centroid_normalized_vlad, visual_words = self._PerCentroidNormalization(
unnormalized_vlad)
normalized_residuals = tf.reshape(
tf.gather(per_centroid_normalized_vlad, visual_words),
[tf.shape(visual_words)[0] * self._feature_dimensionality])
return normalized_residuals, visual_words
def _ComputeRasmk(self,
features,
num_features_per_region,
codebook,
num_assignments=1):
"""Compute R-ASMK representation.
Args:
features: [N, D] float tensor.
num_features_per_region: [R] int tensor. Contains number of features per
region, such that sum(num_features_per_region) = N. It indicates which
features correspond to each region.
codebook: [K, D] float tensor.
num_assignments: Number of visual words to assign a feature to.
Returns:
normalized_residuals: 1-dimensional float tensor with concatenated
residuals which are non-zero. Note that the dimensionality is
input-dependent.
visual_words: 1-dimensional int tensor of sorted visual word ids.
Dimensionality is shape(normalized_residuals)[0] / D.
"""
unnormalized_rvlad = self._ComputeRvlad(
features,
num_features_per_region,
codebook,
use_l2_normalization=False,
num_assignments=num_assignments)
(per_centroid_normalized_rvlad,
visual_words) = self._PerCentroidNormalization(unnormalized_rvlad)
normalized_residuals = tf.reshape(
tf.gather(per_centroid_normalized_rvlad, visual_words),
[tf.shape(visual_words)[0] * self._feature_dimensionality])
return normalized_residuals, visual_words
# Copyright 2019 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for DELF feature aggregation."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import numpy as np
import tensorflow as tf
from delf import aggregation_config_pb2
from delf import feature_aggregation_extractor
class FeatureAggregationTest(tf.test.TestCase):
def _CreateCodebook(self, checkpoint_path):
"""Creates codebook used in tests.
Args:
checkpoint_path: Directory where codebook is saved to.
"""
with tf.Graph().as_default() as g, self.session(graph=g) as sess:
codebook = tf.Variable(
[[0.5, 0.5], [0.0, 0.0], [1.0, 0.0], [-0.5, -0.5], [0.0, 1.0]],
name='clusters')
saver = tf.compat.v1.train.Saver([codebook])
sess.run(tf.compat.v1.global_variables_initializer())
saver.save(sess, checkpoint_path)
def setUp(self):
self._codebook_path = os.path.join(tf.compat.v1.test.get_temp_dir(),
'test_codebook')
self._CreateCodebook(self._codebook_path)
def testComputeNormalizedVladWorks(self):
# Construct inputs.
# 3 2-D features.
features = np.array([[1.0, 0.0], [-1.0, 0.0], [1.0, 2.0]], dtype=float)
config = aggregation_config_pb2.AggregationConfig()
config.codebook_size = 5
config.feature_dimensionality = 2
config.aggregation_type = aggregation_config_pb2.AggregationConfig.VLAD
config.use_l2_normalization = True
config.codebook_path = self._codebook_path
config.num_assignments = 1
# Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess:
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
sess, config)
vlad, extra_output = extractor.Extract(features)
# Define expected results.
exp_vlad = [
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.316228, 0.316228, 0.632456, 0.632456
]
exp_extra_output = -1
# Compare actual and expected results.
self.assertAllClose(vlad, exp_vlad)
self.assertAllEqual(extra_output, exp_extra_output)
def testComputeNormalizedVladWithBatchingWorks(self):
# Construct inputs.
# 3 2-D features.
features = np.array([[1.0, 0.0], [-1.0, 0.0], [1.0, 2.0]], dtype=float)
config = aggregation_config_pb2.AggregationConfig()
config.codebook_size = 5
config.feature_dimensionality = 2
config.aggregation_type = aggregation_config_pb2.AggregationConfig.VLAD
config.use_l2_normalization = True
config.codebook_path = self._codebook_path
config.num_assignments = 1
config.feature_batch_size = 2
# Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess:
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
sess, config)
vlad, extra_output = extractor.Extract(features)
# Define expected results.
exp_vlad = [
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.316228, 0.316228, 0.632456, 0.632456
]
exp_extra_output = -1
# Compare actual and expected results.
self.assertAllClose(vlad, exp_vlad)
self.assertAllEqual(extra_output, exp_extra_output)
def testComputeUnnormalizedVladWorks(self):
# Construct inputs.
# 3 2-D features.
features = np.array([[1.0, 0.0], [-1.0, 0.0], [1.0, 2.0]], dtype=float)
config = aggregation_config_pb2.AggregationConfig()
config.codebook_size = 5
config.feature_dimensionality = 2
config.aggregation_type = aggregation_config_pb2.AggregationConfig.VLAD
config.use_l2_normalization = False
config.codebook_path = self._codebook_path
config.num_assignments = 1
# Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess:
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
sess, config)
vlad, extra_output = extractor.Extract(features)
# Define expected results.
exp_vlad = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.5, 0.5, 1.0, 1.0]
exp_extra_output = -1
# Compare actual and expected results.
self.assertAllEqual(vlad, exp_vlad)
self.assertAllEqual(extra_output, exp_extra_output)
def testComputeUnnormalizedVladMultipleAssignmentWorks(self):
# Construct inputs.
# 3 2-D features.
features = np.array([[1.0, 0.0], [-1.0, 0.0], [1.0, 2.0]], dtype=float)
config = aggregation_config_pb2.AggregationConfig()
config.codebook_size = 5
config.feature_dimensionality = 2
config.aggregation_type = aggregation_config_pb2.AggregationConfig.VLAD
config.use_l2_normalization = False
config.codebook_path = self._codebook_path
config.num_assignments = 3
# Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess:
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
sess, config)
vlad, extra_output = extractor.Extract(features)
# Define expected results.
exp_vlad = [1.0, 1.0, 0.0, 0.0, 0.0, 2.0, -0.5, 0.5, 0.0, 0.0]
exp_extra_output = -1
# Compare actual and expected results.
self.assertAllEqual(vlad, exp_vlad)
self.assertAllEqual(extra_output, exp_extra_output)
def testComputeVladEmptyFeaturesWorks(self):
# Construct inputs.
# Empty feature array.
features = np.array([[]])
config = aggregation_config_pb2.AggregationConfig()
config.codebook_size = 5
config.feature_dimensionality = 2
config.aggregation_type = aggregation_config_pb2.AggregationConfig.VLAD
config.codebook_path = self._codebook_path
# Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess:
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
sess, config)
vlad, extra_output = extractor.Extract(features)
# Define expected results.
exp_vlad = np.zeros([10], dtype=float)
exp_extra_output = -1
# Compare actual and expected results.
self.assertAllEqual(vlad, exp_vlad)
self.assertAllEqual(extra_output, exp_extra_output)
def testComputeUnnormalizedRvladWorks(self):
# Construct inputs.
# 4 2-D features: 3 in first region, 1 in second region.
features = np.array([[1.0, 0.0], [-1.0, 0.0], [1.0, 2.0], [0.0, 2.0]],
dtype=float)
num_features_per_region = np.array([3, 1])
config = aggregation_config_pb2.AggregationConfig()
config.codebook_size = 5
config.feature_dimensionality = 2
config.aggregation_type = aggregation_config_pb2.AggregationConfig.VLAD
config.use_l2_normalization = False
config.codebook_path = self._codebook_path
config.num_assignments = 1
config.use_regional_aggregation = True
# Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess:
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
sess, config)
rvlad, extra_output = extractor.Extract(features, num_features_per_region)
# Define expected results.
exp_rvlad = [
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.158114, 0.158114, 0.316228, 0.816228
]
exp_extra_output = -1
# Compare actual and expected results.
self.assertAllClose(rvlad, exp_rvlad)
self.assertAllEqual(extra_output, exp_extra_output)
def testComputeNormalizedRvladWorks(self):
# Construct inputs.
# 4 2-D features: 3 in first region, 1 in second region.
features = np.array([[1.0, 0.0], [-1.0, 0.0], [1.0, 2.0], [0.0, 2.0]],
dtype=float)
num_features_per_region = np.array([3, 1])
config = aggregation_config_pb2.AggregationConfig()
config.codebook_size = 5
config.feature_dimensionality = 2
config.aggregation_type = aggregation_config_pb2.AggregationConfig.VLAD
config.use_l2_normalization = True
config.codebook_path = self._codebook_path
config.num_assignments = 1
config.use_regional_aggregation = True
# Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess:
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
sess, config)
rvlad, extra_output = extractor.Extract(features, num_features_per_region)
# Define expected results.
exp_rvlad = [
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.175011, 0.175011, 0.350021, 0.903453
]
exp_extra_output = -1
# Compare actual and expected results.
self.assertAllClose(rvlad, exp_rvlad)
self.assertAllEqual(extra_output, exp_extra_output)
def testComputeRvladEmptyRegionsWorks(self):
# Construct inputs.
# Empty feature array.
features = np.array([[]])
num_features_per_region = np.array([])
config = aggregation_config_pb2.AggregationConfig()
config.codebook_size = 5
config.feature_dimensionality = 2
config.aggregation_type = aggregation_config_pb2.AggregationConfig.VLAD
config.codebook_path = self._codebook_path
config.use_regional_aggregation = True
# Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess:
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
sess, config)
rvlad, extra_output = extractor.Extract(features, num_features_per_region)
# Define expected results.
exp_rvlad = np.zeros([10], dtype=float)
exp_extra_output = -1
# Compare actual and expected results.
self.assertAllEqual(rvlad, exp_rvlad)
self.assertAllEqual(extra_output, exp_extra_output)
def testComputeUnnormalizedRvladSomeEmptyRegionsWorks(self):
# Construct inputs.
# 4 2-D features: 0 in first region, 3 in second region, 0 in third region,
# 1 in fourth region.
features = np.array([[1.0, 0.0], [-1.0, 0.0], [1.0, 2.0], [0.0, 2.0]],
dtype=float)
num_features_per_region = np.array([0, 3, 0, 1])
config = aggregation_config_pb2.AggregationConfig()
config.codebook_size = 5
config.feature_dimensionality = 2
config.aggregation_type = aggregation_config_pb2.AggregationConfig.VLAD
config.use_l2_normalization = False
config.codebook_path = self._codebook_path
config.num_assignments = 1
config.use_regional_aggregation = True
# Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess:
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
sess, config)
rvlad, extra_output = extractor.Extract(features, num_features_per_region)
# Define expected results.
exp_rvlad = [
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.079057, 0.079057, 0.158114, 0.408114
]
exp_extra_output = -1
# Compare actual and expected results.
self.assertAllClose(rvlad, exp_rvlad)
self.assertAllEqual(extra_output, exp_extra_output)
def testComputeNormalizedRvladSomeEmptyRegionsWorks(self):
# Construct inputs.
# 4 2-D features: 0 in first region, 3 in second region, 0 in third region,
# 1 in fourth region.
features = np.array([[1.0, 0.0], [-1.0, 0.0], [1.0, 2.0], [0.0, 2.0]],
dtype=float)
num_features_per_region = np.array([0, 3, 0, 1])
config = aggregation_config_pb2.AggregationConfig()
config.codebook_size = 5
config.feature_dimensionality = 2
config.aggregation_type = aggregation_config_pb2.AggregationConfig.VLAD
config.use_l2_normalization = True
config.codebook_path = self._codebook_path
config.num_assignments = 1
config.use_regional_aggregation = True
# Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess:
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
sess, config)
rvlad, extra_output = extractor.Extract(features, num_features_per_region)
# Define expected results.
exp_rvlad = [
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.175011, 0.175011, 0.350021, 0.903453
]
exp_extra_output = -1
# Compare actual and expected results.
self.assertAllClose(rvlad, exp_rvlad)
self.assertAllEqual(extra_output, exp_extra_output)
def testComputeRvladMisconfiguredFeatures(self):
# Construct inputs.
# 4 2-D features: 3 in first region, 1 in second region.
features = np.array([[1.0, 0.0], [-1.0, 0.0], [1.0, 2.0], [0.0, 2.0]],
dtype=float)
# Misconfigured number of features; there are only 4 features, but
# sum(num_features_per_region) = 5.
num_features_per_region = np.array([3, 2])
config = aggregation_config_pb2.AggregationConfig()
config.codebook_size = 5
config.feature_dimensionality = 2
config.aggregation_type = aggregation_config_pb2.AggregationConfig.VLAD
config.codebook_path = self._codebook_path
config.use_regional_aggregation = True
# Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess:
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
sess, config)
with self.assertRaisesRegex(
ValueError,
r'Incorrect arguments: sum\(num_features_per_region\) and '
r'features.shape\[0\] are different'):
extractor.Extract(features, num_features_per_region)
def testComputeAsmkWorks(self):
# Construct inputs.
# 3 2-D features.
features = np.array([[1.0, 0.0], [-1.0, 0.0], [1.0, 2.0]], dtype=float)
config = aggregation_config_pb2.AggregationConfig()
config.codebook_size = 5
config.feature_dimensionality = 2
config.aggregation_type = aggregation_config_pb2.AggregationConfig.ASMK
config.codebook_path = self._codebook_path
config.num_assignments = 1
# Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess:
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
sess, config)
asmk, visual_words = extractor.Extract(features)
# Define expected results.
exp_asmk = [-0.707107, 0.707107, 0.707107, 0.707107]
exp_visual_words = [3, 4]
# Compare actual and expected results.
self.assertAllClose(asmk, exp_asmk)
self.assertAllEqual(visual_words, exp_visual_words)
def testComputeAsmkStarWorks(self):
# Construct inputs.
# 3 2-D features.
features = np.array([[1.0, 0.0], [-1.0, 0.0], [1.0, 2.0]], dtype=float)
config = aggregation_config_pb2.AggregationConfig()
config.codebook_size = 5
config.feature_dimensionality = 2
config.aggregation_type = aggregation_config_pb2.AggregationConfig.ASMK_STAR
config.codebook_path = self._codebook_path
config.num_assignments = 1
# Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess:
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
sess, config)
asmk_star, visual_words = extractor.Extract(features)
# Define expected results.
exp_asmk_star = [64, 192]
exp_visual_words = [3, 4]
# Compare actual and expected results.
self.assertAllEqual(asmk_star, exp_asmk_star)
self.assertAllEqual(visual_words, exp_visual_words)
def testComputeAsmkMultipleAssignmentWorks(self):
# Construct inputs.
# 3 2-D features.
features = np.array([[1.0, 0.0], [-1.0, 0.0], [1.0, 2.0]], dtype=float)
config = aggregation_config_pb2.AggregationConfig()
config.codebook_size = 5
config.feature_dimensionality = 2
config.aggregation_type = aggregation_config_pb2.AggregationConfig.ASMK
config.codebook_path = self._codebook_path
config.num_assignments = 3
# Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess:
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
sess, config)
asmk, visual_words = extractor.Extract(features)
# Define expected results.
exp_asmk = [0.707107, 0.707107, 0.0, 1.0, -0.707107, 0.707107]
exp_visual_words = [0, 2, 3]
# Compare actual and expected results.
self.assertAllClose(asmk, exp_asmk)
self.assertAllEqual(visual_words, exp_visual_words)
def testComputeRasmkWorks(self):
# Construct inputs.
# 4 2-D features: 3 in first region, 1 in second region.
features = np.array([[1.0, 0.0], [-1.0, 0.0], [1.0, 2.0], [0.0, 2.0]],
dtype=float)
num_features_per_region = np.array([3, 1])
config = aggregation_config_pb2.AggregationConfig()
config.codebook_size = 5
config.feature_dimensionality = 2
config.aggregation_type = aggregation_config_pb2.AggregationConfig.ASMK
config.codebook_path = self._codebook_path
config.num_assignments = 1
config.use_regional_aggregation = True
# Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess:
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
sess, config)
rasmk, visual_words = extractor.Extract(features, num_features_per_region)
# Define expected results.
exp_rasmk = [-0.707107, 0.707107, 0.361261, 0.932465]
exp_visual_words = [3, 4]
# Compare actual and expected results.
self.assertAllClose(rasmk, exp_rasmk)
self.assertAllEqual(visual_words, exp_visual_words)
def testComputeRasmkStarWorks(self):
# Construct inputs.
# 4 2-D features: 3 in first region, 1 in second region.
features = np.array([[1.0, 0.0], [-1.0, 0.0], [1.0, 2.0], [0.0, 2.0]],
dtype=float)
num_features_per_region = np.array([3, 1])
config = aggregation_config_pb2.AggregationConfig()
config.codebook_size = 5
config.feature_dimensionality = 2
config.aggregation_type = aggregation_config_pb2.AggregationConfig.ASMK_STAR
config.codebook_path = self._codebook_path
config.num_assignments = 1
config.use_regional_aggregation = True
# Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess:
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
sess, config)
rasmk_star, visual_words = extractor.Extract(features,
num_features_per_region)
# Define expected results.
exp_rasmk_star = [64, 192]
exp_visual_words = [3, 4]
# Compare actual and expected results.
self.assertAllEqual(rasmk_star, exp_rasmk_star)
self.assertAllEqual(visual_words, exp_visual_words)
def testComputeUnknownAggregation(self):
# Construct inputs.
config = aggregation_config_pb2.AggregationConfig()
config.codebook_size = 5
config.feature_dimensionality = 2
config.aggregation_type = 0
config.codebook_path = self._codebook_path
config.use_regional_aggregation = True
# Run tested function.
with tf.Graph().as_default() as g, self.session(graph=g) as sess:
with self.assertRaisesRegex(ValueError, 'Invalid aggregation type'):
feature_aggregation_extractor.ExtractAggregatedRepresentation(
sess, config)
if __name__ == '__main__':
tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment