DELF feature aggregation extraction: VLAD, ASMK, ASMK* (#6780)

* Initial feature aggregation code for Detect-to-Retrieve paper. PiperOrigin-RevId: 246043144 * Add support for ASMK/ASMK*/R-ASMK/R-ASMK*. PiperOrigin-RevId: 247337028 * Add DatumProto uint32 field, and limit datum_io to uint32 and float32/float64 types. Also, introduce DatumPairProto, to be used for ASMK variants. Functions to read/write in this new format are added and tested. PiperOrigin-RevId: 247515205 * Add batching option to feature aggregation extraction. PiperOrigin-RevId: 247614627 * Script to perform local feature aggregation, with associated configs. Also small edits to the aggregation extractor, for better handling of input features / avoiding OOM. PiperOrigin-RevId: 248150750 * Tests to check that aggregation using regions with no local features works. PiperOrigin-RevId: 248153275 * Include new library/proto for aggregation * Merged commit includes the following changes: PiperOrigin-RevId: 248176511 * Merged commit includes the following changes: 248194572 by Andre Araujo: Change tf.tensor_scatter_nd_add --> tf.compat.v1.tensor_scatter_add to make it compatible with TF 1.X. -- PiperOrigin-RevId: 248194572

DELF feature aggregation extraction: VLAD, ASMK, ASMK* (#6780)
* Initial feature aggregation code for Detect-to-Retrieve paper. PiperOrigin-RevId: 246043144 * Add support for ASMK/ASMK*/R-ASMK/R-ASMK*. PiperOrigin-RevId: 247337028 * Add DatumProto uint32 field, and limit datum_io to uint32 and float32/float64 types. Also, introduce DatumPairProto, to be used for ASMK variants. Functions to read/write in this new format are added and tested. PiperOrigin-RevId: 247515205 * Add batching option to feature aggregation extraction. PiperOrigin-RevId: 247614627 * Script to perform local feature aggregation, with associated configs. Also small edits to the aggregation extractor, for better handling of input features / avoiding OOM. PiperOrigin-RevId: 248150750 * Tests to check that aggregation using regions with no local features works. PiperOrigin-RevId: 248153275 * Include new library/proto for aggregation * Merged commit includes the following changes: PiperOrigin-RevId: 248176511 * Merged commit includes the following changes: 248194572 by Andre Araujo: Change tf.tensor_scatter_nd_add --> tf.compat.v1.tensor_scatter_add to make it compatible with TF 1.X. -- PiperOrigin-RevId: 248194572
a27bad9b · André Araujo · aquariusjay · 8d97814e · a27bad9b · a27bad9b
Commit a27bad9b authored May 14, 2019 by André Araujo Committed by aquariusjay May 14, 2019
13 changed files
--- a/research/delf/delf/__init__.py
+++ b/research/delf/delf/__init__.py
@@ -18,6 +18,7 @@ from __future__ import division
 from __future__ import print_function

 # pylint: disable=unused-import
+from delf.protos import aggregation_config_pb2
 from delf.protos import box_pb2
 from delf.protos import datum_pb2
 from delf.protos import delf_config_pb2
@@ -26,6 +27,7 @@ from delf.python import box_io
 from delf.python import datum_io
 from delf.python import delf_v1
 from delf.python import detect_to_retrieve
+from delf.python import feature_aggregation_extractor
 from delf.python import feature_extractor
 from delf.python import feature_io
 from delf.python.examples import extract_boxes

--- a/research/delf/delf/protos/aggregation_config.proto
+++ b/research/delf/delf/protos/aggregation_config.proto
+// Protocol buffer for feature aggregation configuration.
+//
+// Used for both extraction and comparison of aggregated representations. Note
+// that some options are only relevant for the former or the latter.
+//
+// For more details, please refer to the paper:
+// "Detect-to-Retrieve: Efficient Regional Aggregation for Image Search",
+// Proc. CVPR'19 (https://arxiv.org/abs/1812.01584).
+
+syntax = "proto2";
+
+package delf.protos;
+
+message AggregationConfig {
+  // Number of codewords (ie, visual words) in the codebook.
+  optional int32 codebook_size = 1 [default = 65536];
+
+  // Dimensionality of local features (eg, 128 for DELF used in
+  // Detect-to-Retrieve paper).
+  optional int32 feature_dimensionality = 2 [default = 128];
+
+  // Type of aggregation to use.
+  // For example, to use R-ASMK*, `aggregation_type` should be set to ASMK_STAR
+  // and `use_regional_aggregation` should be set to true.
+  enum AggregationType {
+    INVALID = 0;
+    VLAD = 1;
+    ASMK = 2;
+    ASMK_STAR = 3;
+  }
+  optional AggregationType aggregation_type = 3 [default = ASMK_STAR];
+
+  // L2 normalization option.
+  // - For vanilla aggregated kernels (eg, VLAD/ASMK/ASMK*), this should be
+  //   set to true.
+  // - For regional aggregated kernels (ie, if `use_regional_aggregation` is
+  //   true, leading to R-VLAD/R-ASMK/R-ASMK*), this should be set to false.
+  // Note that it is used differently depending on the `aggregation_type`:
+  // - For VLAD, this option is only used for extraction.
+  // - For ASMK/ASMK*, this option is only used for comparisons.
+  optional bool use_l2_normalization = 4 [default = true];
+
+  // Additional options used only for extraction.
+  // - Path to codebook checkpoint for aggregation.
+  optional string codebook_path = 5;
+  // - Number of visual words to assign each feature.
+  optional int32 num_assignments = 6 [default = 1];
+  // - Whether to use regional aggregation.
+  optional bool use_regional_aggregation = 7 [default = false];
+  // - Batch size to use for local features when computing aggregated
+  //   representations. Particularly useful if `codebook_size` and
+  //   `feature_dimensionality` are large, to avoid OOM. A value of zero or
+  //   lower indicates that no batching is used.
+  optional int32 feature_batch_size = 10 [default = 100];
+
+  // Additional options used only for comparison.
+  // Only relevant if `aggregation_type` is ASMK or ASMK_STAR.
+  // - Power-law exponent for similarity of visual word descriptors.
+  optional float alpha = 8 [default = 3.0];
+  // - Threshold above which similarity of visual word descriptors are
+  //   considered; below this, similarity is set to zero.
+  optional float tau = 9 [default = 0.0];
+}
--- a/research/delf/delf/protos/datum.proto
+++ b/research/delf/delf/protos/datum.proto
@@ -39,15 +39,28 @@ message DatumShape {
  repeated int64 dim = 1 [packed = true];
 }

-// FloatList is the container of tensor values. The tensor values are saved as
-// a list of floating point values.
+// FloatList is a container of tensor values, which are saved as a list of
+// floating point values.
 message FloatList {
  repeated float value = 1 [packed = true];
 }

+// Uint32List is a container of tensor values, which are saved as a list of
+// uint32 values.
+message Uint32List {
+  repeated uint32 value = 1 [packed = true];
+}
+
 message DatumProto {
  optional DatumShape shape = 1;
  oneof kind_oneof {
    FloatList float_list = 2;
+    Uint32List uint32_list = 3;
  }
 }
+
+// Groups two DatumProto's.
+message DatumPairProto {
+  optional DatumProto first = 1;
+  optional DatumProto second = 2;
+}
--- a/research/delf/delf/python/datum_io.py
+++ b/research/delf/delf/python/datum_io.py
@@ -17,7 +17,7 @@
 DatumProto is protocol buffer used to serialize tensor with arbitrary shape.
 Please refer to datum.proto for details.

-Support read and write of DatumProto from/to numpy array and file.
+Support read and write of DatumProto from/to NumPy array and file.
 """

 from __future__ import absolute_import
@@ -31,37 +31,95 @@ from delf import datum_pb2


 def ArrayToDatum(arr):
-  """Converts numpy array to DatumProto.
+  """Converts NumPy array to DatumProto.
+
+  Supports arrays of types:
+    - float16 (it is converted into a float32 in DatumProto)
+    - float32
+    - float64 (it is converted into a float32 in DatumProto)
+    - uint8 (it is converted into a uint32 in DatumProto)
+    - uint16 (it is converted into a uint32 in DatumProto)
+    - uint32
+    - uint64 (it is converted into a uint32 in DatumProto)

  Args:
-    arr: Numpy array of arbitrary shape.
+    arr: NumPy array of arbitrary shape.

  Returns:
    datum: DatumProto object.
+
+  Raises:
+    ValueError: If array type is unsupported.
  """
  datum = datum_pb2.DatumProto()
-  datum.float_list.value.extend(arr.astype(float).flat)
+  if arr.dtype in ('float16', 'float32', 'float64'):
+    datum.float_list.value.extend(arr.astype('float32').flat)
+  elif arr.dtype in ('uint8', 'uint16', 'uint32', 'uint64'):
+    datum.uint32_list.value.extend(arr.astype('uint32').flat)
+  else:
+    raise ValueError('Unsupported array type: %s' % arr.dtype)
+
  datum.shape.dim.extend(arr.shape)
  return datum


+def ArraysToDatumPair(arr_1, arr_2):
+  """Converts numpy arrays to DatumPairProto.
+
+  Supports same formats as `ArrayToDatum`, see documentation therein.
+
+  Args:
+    arr_1: NumPy array of arbitrary shape.
+    arr_2: NumPy array of arbitrary shape.
+
+  Returns:
+    datum_pair: DatumPairProto object.
+  """
+  datum_pair = datum_pb2.DatumPairProto()
+  datum_pair.first.CopyFrom(ArrayToDatum(arr_1))
+  datum_pair.second.CopyFrom(ArrayToDatum(arr_2))
+
+  return datum_pair
+
+
 def DatumToArray(datum):
-  """Converts data saved in DatumProto to numpy array.
+  """Converts data saved in DatumProto to NumPy array.

  Args:
    datum: DatumProto object.

  Returns:
-    Numpy array of arbitrary shape.
+    NumPy array of arbitrary shape.
  """
-  return np.array(datum.float_list.value).astype(float).reshape(datum.shape.dim)
+  if datum.HasField('float_list'):
+    return np.array(datum.float_list.value).astype('float32').reshape(
+        datum.shape.dim)
+  elif datum.HasField('uint32_list'):
+    return np.array(datum.uint32_list.value).astype('uint32').reshape(
+        datum.shape.dim)
+  else:
+    raise ValueError('Input DatumProto does not have float_list or uint32_list')
+
+
+def DatumPairToArrays(datum_pair):
+  """Converts data saved in DatumPairProto to NumPy arrays.
+
+  Args:
+    datum_pair: DatumPairProto object.
+
+  Returns:
+    Two NumPy arrays of arbitrary shape.
+  """
+  first_datum = DatumToArray(datum_pair.first)
+  second_datum = DatumToArray(datum_pair.second)
+  return first_datum, second_datum


 def SerializeToString(arr):
-  """Converts numpy array to serialized DatumProto.
+  """Converts NumPy array to serialized DatumProto.

  Args:
-    arr: Numpy array of arbitrary shape.
+    arr: NumPy array of arbitrary shape.

  Returns:
    Serialized DatumProto string.
@@ -70,20 +128,48 @@ def SerializeToString(arr):
  return datum.SerializeToString()


+def SerializePairToString(arr_1, arr_2):
+  """Converts pair of NumPy arrays to serialized DatumPairProto.
+
+  Args:
+    arr_1: NumPy array of arbitrary shape.
+    arr_2: NumPy array of arbitrary shape.
+
+  Returns:
+    Serialized DatumPairProto string.
+  """
+  datum_pair = ArraysToDatumPair(arr_1, arr_2)
+  return datum_pair.SerializeToString()
+
+
 def ParseFromString(string):
-  """Converts serialized DatumProto string to numpy array.
+  """Converts serialized DatumProto string to NumPy array.

  Args:
    string: Serialized DatumProto string.

  Returns:
-    Numpy array.
+    NumPy array.
  """
  datum = datum_pb2.DatumProto()
  datum.ParseFromString(string)
  return DatumToArray(datum)


+def ParsePairFromString(string):
+  """Converts serialized DatumPairProto string to NumPy arrays.
+
+  Args:
+    string: Serialized DatumProto string.
+
+  Returns:
+    Two NumPy arrays.
+  """
+  datum_pair = datum_pb2.DatumPairProto()
+  datum_pair.ParseFromString(string)
+  return DatumPairToArrays(datum_pair)
+
+
 def ReadFromFile(file_path):
  """Helper function to load data from a DatumProto format in a file.

@@ -91,19 +177,45 @@ def ReadFromFile(file_path):
    file_path: Path to file containing data.

  Returns:
-    data: Numpy array.
+    data: NumPy array.
  """
-  with tf.gfile.FastGFile(file_path, 'rb') as f:
+  with tf.gfile.GFile(file_path, 'rb') as f:
    return ParseFromString(f.read())


+def ReadPairFromFile(file_path):
+  """Helper function to load data from a DatumPairProto format in a file.
+
+  Args:
+    file_path: Path to file containing data.
+
+  Returns:
+    Two NumPy arrays.
+  """
+  with tf.gfile.GFile(file_path, 'rb') as f:
+    return ParsePairFromString(f.read())
+
+
 def WriteToFile(data, file_path):
  """Helper function to write data to a file in DatumProto format.

  Args:
-    data: Numpy array.
+    data: NumPy array.
    file_path: Path to file that will be written.
  """
  serialized_data = SerializeToString(data)
-  with tf.gfile.FastGFile(file_path, 'w') as f:
+  with tf.gfile.GFile(file_path, 'w') as f:
+    f.write(serialized_data)
+
+
+def WritePairToFile(arr_1, arr_2, file_path):
+  """Helper function to write pair of arrays to a file in DatumPairProto format.
+
+  Args:
+    arr_1: NumPy array of arbitrary shape.
+    arr_2: NumPy array of arbitrary shape.
+    file_path: Path to file that will be written.
+  """
+  serialized_data = SerializePairToString(arr_1, arr_2)
+  with tf.gfile.GFile(file_path, 'w') as f:
    f.write(serialized_data)
--- a/research/delf/delf/python/datum_io_test.py
+++ b/research/delf/delf/python/datum_io_test.py
@@ -40,24 +40,32 @@ class DatumIoTest(tf.test.TestCase):
    retrieved_data = datum_io.ParseFromString(serialized)
    self.assertTrue(np.array_equal(original_data, retrieved_data))

+  # This test covers the following functions: ArrayToDatum, SerializeToString,
+  # ParseFromString, DatumToArray.
  def testConversion2dWithType(self):
-    self.Conversion2dTestWithType(np.int8)
-    self.Conversion2dTestWithType(np.int16)
-    self.Conversion2dTestWithType(np.int32)
-    self.Conversion2dTestWithType(np.int64)
+    self.Conversion2dTestWithType(np.uint16)
+    self.Conversion2dTestWithType(np.uint32)
+    self.Conversion2dTestWithType(np.uint64)
    self.Conversion2dTestWithType(np.float16)
    self.Conversion2dTestWithType(np.float32)
    self.Conversion2dTestWithType(np.float64)

+  # This test covers the following functions: ArrayToDatum, SerializeToString,
+  # ParseFromString, DatumToArray.
  def testConversion3dWithType(self):
-    self.Conversion3dTestWithType(np.int8)
-    self.Conversion3dTestWithType(np.int16)
-    self.Conversion3dTestWithType(np.int32)
-    self.Conversion3dTestWithType(np.int64)
+    self.Conversion3dTestWithType(np.uint16)
+    self.Conversion3dTestWithType(np.uint32)
+    self.Conversion3dTestWithType(np.uint64)
    self.Conversion3dTestWithType(np.float16)
    self.Conversion3dTestWithType(np.float32)
    self.Conversion3dTestWithType(np.float64)

+  def testConversionWithUnsupportedType(self):
+    with self.assertRaisesRegex(ValueError, 'Unsupported array type'):
+      self.Conversion3dTestWithType(int)
+
+  # This test covers the following functions: ArrayToDatum, SerializeToString,
+  # WriteToFile, ReadFromFile, ParseFromString, DatumToArray.
  def testWriteAndReadToFile(self):
    data = np.array([[[-1.0, 125.0, -2.5], [14.5, 3.5, 0.0]],
                     [[20.0, 0.0, 30.0], [25.5, 36.0, 42.0]]])
@@ -67,6 +75,22 @@ class DatumIoTest(tf.test.TestCase):
    data_read = datum_io.ReadFromFile(filename)
    self.assertAllEqual(data_read, data)

+  # This test covers the following functions: ArraysToDatumPair,
+  # SerializePairToString, WritePairToFile, ReadPairFromFile,
+  # ParsePairFromString, DatumPairToArrays.
+  def testWriteAndReadPairToFile(self):
+    data_1 = np.array([[[-1.0, 125.0, -2.5], [14.5, 3.5, 0.0]],
+                       [[20.0, 0.0, 30.0], [25.5, 36.0, 42.0]]])
+    data_2 = np.array(
+        [[[255, 0, 5], [10, 300, 0]], [[20, 1, 100], [255, 360, 420]]],
+        dtype='uint32')
+    tmpdir = tf.test.get_temp_dir()
+    filename = os.path.join(tmpdir, 'test.datum_pair')
+    datum_io.WritePairToFile(data_1, data_2, filename)
+    data_read_1, data_read_2 = datum_io.ReadPairFromFile(filename)
+    self.assertAllEqual(data_read_1, data_1)
+    self.assertAllEqual(data_read_2, data_2)
+

 if __name__ == '__main__':
  tf.test.main()
--- a/research/delf/delf/python/detect_to_retrieve/cluster_delf_features.py
+++ b/research/delf/delf/python/detect_to_retrieve/cluster_delf_features.py
@@ -38,7 +38,7 @@ import tensorflow as tf

 from tensorflow.python.platform import app
 from delf import feature_io
-from delf.detect_to_retrieve import dataset
+from delf.python.detect_to_retrieve import dataset

 cmd_args = None


--- a/research/delf/delf/python/detect_to_retrieve/extract_aggregation.py
+++ b/research/delf/delf/python/detect_to_retrieve/extract_aggregation.py
+# Copyright 2019 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Extracts aggregation for images from Revisited Oxford/Paris datasets.
+
+The program checks if the aggregated representation for an image already exists,
+and skips computation for those.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+import csv
+import os
+import sys
+import time
+
+import numpy as np
+import tensorflow as tf
+
+from google.protobuf import text_format
+from tensorflow.python.platform import app
+from delf import aggregation_config_pb2
+from delf import datum_io
+from delf import feature_aggregation_extractor
+from delf import feature_io
+from delf.python.detect_to_retrieve import dataset
+
+cmd_args = None
+
+# Aliases for aggregation types.
+_VLAD = aggregation_config_pb2.AggregationConfig.VLAD
+_ASMK = aggregation_config_pb2.AggregationConfig.ASMK
+_ASMK_STAR = aggregation_config_pb2.AggregationConfig.ASMK_STAR
+
+# Extensions.
+_DELF_EXTENSION = '.delf'
+_VLAD_EXTENSION_SUFFIX = 'vlad'
+_ASMK_EXTENSION_SUFFIX = 'asmk'
+_ASMK_STAR_EXTENSION_SUFFIX = 'asmk_star'
+
+# Pace to report extraction log.
+_STATUS_CHECK_ITERATIONS = 50
+
+
+def _ReadMappingBasenameToBoxNames(input_path, index_image_names):
+  """Reads mapping from image name to DELF file names for each box.
+
+  Args:
+    input_path: Path to CSV file containing mapping.
+    index_image_names: List containing index image names, in order, for the
+      dataset under consideration.
+
+  Returns:
+    images_to_box_feature_files: Dict. key=string (image name); value=list of
+      strings (file names containing DELF features for boxes).
+  """
+  images_to_box_feature_files = {}
+  with tf.gfile.GFile(input_path, 'r') as f:
+    reader = csv.DictReader(f)
+    for row in reader:
+      index_image_name = index_image_names[int(row['index_image_id'])]
+      if index_image_name not in images_to_box_feature_files:
+        images_to_box_feature_files[index_image_name] = []
+
+      images_to_box_feature_files[index_image_name].append(row['name'])
+
+  return images_to_box_feature_files
+
+
+def main(argv):
+  if len(argv) > 1:
+    raise RuntimeError('Too many command-line arguments.')
+
+  # Read list of images from dataset file.
+  print('Reading list of images from dataset file...')
+  query_list, index_list, _ = dataset.ReadDatasetFile(
+      cmd_args.dataset_file_path)
+  if cmd_args.use_query_images:
+    image_list = query_list
+  else:
+    image_list = index_list
+  num_images = len(image_list)
+  print('done! Found %d images' % num_images)
+
+  # Parse AggregationConfig proto, and select output extension.
+  config = aggregation_config_pb2.AggregationConfig()
+  with tf.gfile.GFile(cmd_args.aggregation_config_path, 'r') as f:
+    text_format.Merge(f.read(), config)
+  output_extension = '.'
+  if config.use_regional_aggregation:
+    output_extension += 'r'
+  if config.aggregation_type == _VLAD:
+    output_extension += _VLAD_EXTENSION_SUFFIX
+  elif config.aggregation_type == _ASMK:
+    output_extension += _ASMK_EXTENSION_SUFFIX
+  elif config.aggregation_type == _ASMK_STAR:
+    output_extension += _ASMK_STAR_EXTENSION_SUFFIX
+  else:
+    raise ValueError('Invalid aggregation type: %d' % config.aggregation_type)
+
+  # Read index mapping path, if provided.
+  if cmd_args.index_mapping_path:
+    images_to_box_feature_files = _ReadMappingBasenameToBoxNames(
+        cmd_args.index_mapping_path, image_list)
+
+  # Create output directory if necessary.
+  if not os.path.exists(cmd_args.output_aggregation_dir):
+    os.makedirs(cmd_args.output_aggregation_dir)
+
+  with tf.Session() as sess:
+    extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
+        sess, config)
+
+    start = time.clock()
+    for i in range(num_images):
+      if i == 0:
+        print('Starting to extract aggregation from images...')
+      elif i % _STATUS_CHECK_ITERATIONS == 0:
+        elapsed = (time.clock() - start)
+        print('Processing image %d out of %d, last %d '
+              'images took %f seconds' %
+              (i, num_images, _STATUS_CHECK_ITERATIONS, elapsed))
+        start = time.clock()
+
+      image_name = image_list[i]
+
+      # Compose output file name, skip extraction for this image if it already
+      # exists.
+      output_aggregation_filename = os.path.join(
+          cmd_args.output_aggregation_dir, image_name + output_extension)
+      if tf.io.gfile.exists(output_aggregation_filename):
+        print('Skipping %s' % image_name)
+        continue
+
+      # Load DELF features.
+      if config.use_regional_aggregation:
+        if not cmd_args.index_mapping_path:
+          raise ValueError(
+              'Requested regional aggregation, but index_mapping_path was not '
+              'provided')
+        descriptors_list = []
+        num_features_per_box = []
+        for box_feature_file in images_to_box_feature_files[image_name]:
+          delf_filename = os.path.join(cmd_args.features_dir,
+                                       box_feature_file + _DELF_EXTENSION)
+          _, _, box_descriptors, _, _ = feature_io.ReadFromFile(delf_filename)
+          # If `box_descriptors` is empty, reshape it such that it can be
+          # concatenated with other descriptors.
+          if not box_descriptors.shape[0]:
+            box_descriptors = np.reshape(box_descriptors,
+                                         [0, config.feature_dimensionality])
+          descriptors_list.append(box_descriptors)
+          num_features_per_box.append(box_descriptors.shape[0])
+
+        descriptors = np.concatenate(descriptors_list)
+      else:
+        input_delf_filename = os.path.join(cmd_args.features_dir,
+                                           image_name + _DELF_EXTENSION)
+        _, _, descriptors, _, _ = feature_io.ReadFromFile(input_delf_filename)
+        num_features_per_box = None
+
+      # Extract and save aggregation. If using VLAD, only
+      # `aggregated_descriptors` needs to be saved.
+      (aggregated_descriptors,
+       feature_visual_words) = extractor.Extract(descriptors,
+                                                 num_features_per_box)
+      if config.aggregation_type == _VLAD:
+        datum_io.WriteToFile(aggregated_descriptors,
+                             output_aggregation_filename)
+      else:
+        datum_io.WritePairToFile(aggregated_descriptors,
+                                 feature_visual_words.astype('uint32'),
+                                 output_aggregation_filename)
+
+
+if __name__ == '__main__':
+  parser = argparse.ArgumentParser()
+  parser.register('type', 'bool', lambda v: v.lower() == 'true')
+  parser.add_argument(
+      '--aggregation_config_path',
+      type=str,
+      default='/tmp/aggregation_config.pbtxt',
+      help="""
+      Path to AggregationConfig proto text file with configuration to be used
+      for extraction.
+      """)
+  parser.add_argument(
+      '--dataset_file_path',
+      type=str,
+      default='/tmp/gnd_roxford5k.mat',
+      help="""
+      Dataset file for Revisited Oxford or Paris dataset, in .mat format.
+      """)
+  parser.add_argument(
+      '--use_query_images',
+      type=bool,
+      default=False,
+      help="""
+      If True, processes the query images of the dataset. If False, processes
+      the database (ie, index) images.
+      """)
+  parser.add_argument(
+      '--features_dir',
+      type=str,
+      default='/tmp/features',
+      help="""
+      Directory where query image features are located, all in .delf format.
+      """)
+  parser.add_argument(
+      '--index_mapping_path',
+      type=str,
+      default='',
+      help="""
+      Optional CSV file which maps each .delf file name to the index image ID
+      and detected box ID. If regional aggregation is performed, this should be
+      set. Otherwise, this is ignored.
+      Usually this file is obtained as an output from the
+      `extract_index_boxes_and_features.py` script.
+      """)
+  parser.add_argument(
+      '--output_aggregation_dir',
+      type=str,
+      default='/tmp/aggregation',
+      help="""
+      Directory where aggregation output will be written to. Each image's
+      features will be written to a file with same name, and extension replaced
+      by one of
+      ['.vlad', '.asmk', '.asmk_star', '.rvlad', '.rasmk', '.rasmk_star'].
+      """)
+  cmd_args, unparsed = parser.parse_known_args()
+  app.run(main=main, argv=[sys.argv[0]] + unparsed)
--- a/research/delf/delf/python/detect_to_retrieve/extract_index_boxes_and_features.py
+++ b/research/delf/delf/python/detect_to_retrieve/extract_index_boxes_and_features.py
@@ -44,7 +44,7 @@ from tensorflow.python.platform import app
 from delf import delf_config_pb2
 from delf import box_io
 from delf import feature_io
-from delf.detect_to_retrieve import dataset
+from delf.python.detect_to_retrieve import dataset
 from delf import extract_boxes
 from delf import extract_features


--- a/research/delf/delf/python/detect_to_retrieve/extract_query_features.py
+++ b/research/delf/delf/python/detect_to_retrieve/extract_query_features.py
@@ -39,7 +39,7 @@ from google.protobuf import text_format
 from tensorflow.python.platform import app
 from delf import delf_config_pb2
 from delf import feature_io
-from delf.detect_to_retrieve import dataset
+from delf.python.detect_to_retrieve import dataset
 from delf import extract_features

 cmd_args = None

--- a/research/delf/delf/python/detect_to_retrieve/index_aggregation_config.pbtxt
+++ b/research/delf/delf/python/detect_to_retrieve/index_aggregation_config.pbtxt
+codebook_size: 65536
+feature_dimensionality: 128
+aggregation_type: ASMK_STAR
+use_l2_normalization: false
+codebook_path: "parameters/k65536_codebook_tfckpt/codebook"
+num_assignments: 1
+use_regional_aggregation: true
+feature_batch_size: 100
+alpha: 3.0
+tau: 0.0
--- a/research/delf/delf/python/detect_to_retrieve/query_aggregation_config.pbtxt
+++ b/research/delf/delf/python/detect_to_retrieve/query_aggregation_config.pbtxt
+codebook_size: 65536
+feature_dimensionality: 128
+aggregation_type: ASMK_STAR
+codebook_path: "parameters/k65536_codebook_tfckpt/codebook"
+num_assignments: 1
+use_regional_aggregation: false
+feature_batch_size: 100
--- a/research/delf/delf/python/feature_aggregation_extractor.py
+++ b/research/delf/delf/python/feature_aggregation_extractor.py
+# Copyright 2019 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Local feature aggregation extraction.
+
+For more details, please refer to the paper:
+"Detect-to-Retrieve: Efficient Regional Aggregation for Image Search",
+Proc. CVPR'19 (https://arxiv.org/abs/1812.01584).
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import tensorflow as tf
+
+from delf import aggregation_config_pb2
+
+_NORM_SQUARED_TOLERANCE = 1e-12
+
+# Aliases for aggregation types.
+_VLAD = aggregation_config_pb2.AggregationConfig.VLAD
+_ASMK = aggregation_config_pb2.AggregationConfig.ASMK
+_ASMK_STAR = aggregation_config_pb2.AggregationConfig.ASMK_STAR
+
+
+class ExtractAggregatedRepresentation(object):
+  """Class for extraction of aggregated local feature representation.
+
+  Args:
+    sess: TensorFlow session to use.
+    aggregation_config: AggregationConfig object defining type of aggregation to
+      use.
+
+  Raises:
+    ValueError: If aggregation type is invalid.
+  """
+
+  def __init__(self, sess, aggregation_config):
+    self._sess = sess
+    self._codebook_size = aggregation_config.codebook_size
+    self._feature_dimensionality = aggregation_config.feature_dimensionality
+    self._aggregation_type = aggregation_config.aggregation_type
+    self._feature_batch_size = aggregation_config.feature_batch_size
+
+    # Inputs to extraction function.
+    self._features = tf.compat.v1.placeholder(tf.float32, [None, None])
+    self._num_features_per_region = tf.compat.v1.placeholder(tf.int32, [None])
+
+    # Load codebook into graph.
+    codebook = tf.compat.v1.get_variable(
+        "codebook",
+        shape=[
+            aggregation_config.codebook_size,
+            aggregation_config.feature_dimensionality
+        ])
+    tf.compat.v1.train.init_from_checkpoint(
+        aggregation_config.codebook_path, {
+            tf.contrib.factorization.KMeansClustering.CLUSTER_CENTERS_VAR_NAME:
+                codebook
+        })
+
+    # Construct extraction graph based on desired options.
+    if self._aggregation_type == _VLAD:
+      # Feature visual words are unused in the case of VLAD, so just return
+      # dummy constant.
+      self._feature_visual_words = tf.constant(-1, dtype=tf.int32)
+      if aggregation_config.use_regional_aggregation:
+        self._aggregated_descriptors = self._ComputeRvlad(
+            self._features,
+            self._num_features_per_region,
+            codebook,
+            use_l2_normalization=aggregation_config.use_l2_normalization,
+            num_assignments=aggregation_config.num_assignments)
+      else:
+        self._aggregated_descriptors = self._ComputeVlad(
+            self._features,
+            codebook,
+            use_l2_normalization=aggregation_config.use_l2_normalization,
+            num_assignments=aggregation_config.num_assignments)
+    elif (self._aggregation_type == _ASMK or
+          self._aggregation_type == _ASMK_STAR):
+      if aggregation_config.use_regional_aggregation:
+        (self._aggregated_descriptors,
+         self._feature_visual_words) = self._ComputeRasmk(
+             self._features,
+             self._num_features_per_region,
+             codebook,
+             num_assignments=aggregation_config.num_assignments)
+      else:
+        (self._aggregated_descriptors,
+         self._feature_visual_words) = self._ComputeAsmk(
+             self._features,
+             codebook,
+             num_assignments=aggregation_config.num_assignments)
+    else:
+      raise ValueError("Invalid aggregation type: %d" % self._aggregation_type)
+
+    # Initialize variables in the TF graph.
+    sess.run(tf.compat.v1.global_variables_initializer())
+
+  def Extract(self, features, num_features_per_region=None):
+    """Extracts aggregated representation.
+
+    Args:
+      features: [N, D] float numpy array with N local feature descriptors.
+      num_features_per_region: Required only if computing regional aggregated
+        representations, otherwise optional. List of number of features per
+        region, such that sum(num_features_per_region) = N. It indicates which
+        features correspond to each region.
+
+    Returns:
+      aggregated_descriptors: 1-D numpy array.
+      feature_visual_words: Used only for ASMK/ASMK* aggregation type. 1-D
+        numpy array denoting visual words corresponding to the
+        `aggregated_descriptors`.
+
+    Raises:
+      ValueError: If inputs are misconfigured.
+    """
+    if num_features_per_region is None:
+      # Use dummy value since it is unused.
+      num_features_per_region = []
+    else:
+      if len(num_features_per_region
+            ) and sum(num_features_per_region) != features.shape[0]:
+        raise ValueError(
+            "Incorrect arguments: sum(num_features_per_region) and "
+            "features.shape[0] are different: %d vs %d" %
+            (sum(num_features_per_region), features.shape[0]))
+
+    aggregated_descriptors, feature_visual_words = self._sess.run(
+        [self._aggregated_descriptors, self._feature_visual_words],
+        feed_dict={
+            self._features: features,
+            self._num_features_per_region: num_features_per_region
+        })
+
+    # If using ASMK*/RASMK*, binarize the aggregated descriptors.
+    if self._aggregation_type == _ASMK_STAR:
+      reshaped_aggregated_descriptors = np.reshape(
+          aggregated_descriptors, [-1, self._feature_dimensionality])
+      packed_descriptors = np.packbits(
+          reshaped_aggregated_descriptors > 0, axis=1)
+      aggregated_descriptors = np.reshape(packed_descriptors, [-1])
+
+    return aggregated_descriptors, feature_visual_words
+
+  def _ComputeVlad(self,
+                   features,
+                   codebook,
+                   use_l2_normalization=True,
+                   num_assignments=1):
+    """Compute VLAD representation.
+
+    Args:
+      features: [N, D] float tensor.
+      codebook: [K, D] float tensor.
+      use_l2_normalization: If False, does not L2-normalize after aggregation.
+      num_assignments: Number of visual words to assign a feature to.
+
+    Returns:
+      vlad: [K*D] float tensor.
+    """
+
+    def _ComputeVladEmptyFeatures():
+      """Computes VLAD if `features` is empty.
+
+      Returns:
+        [K*D] all-zeros tensor.
+      """
+      return tf.zeros([self._codebook_size * self._feature_dimensionality],
+                      dtype=tf.float32)
+
+    def _ComputeVladNonEmptyFeatures():
+      """Computes VLAD if `features` is not empty.
+
+      Returns:
+        [K*D] tensor with VLAD descriptor.
+      """
+      num_features = tf.shape(features)[0]
+
+      # Find nearest visual words for each feature. Possibly batch the local
+      # features to avoid OOM.
+      if self._feature_batch_size <= 0:
+        actual_batch_size = num_features
+      else:
+        actual_batch_size = self._feature_batch_size
+
+      def _BatchNearestVisualWords(ind, selected_visual_words):
+        """Compute nearest neighbor visual words for a batch of features.
+
+        Args:
+          ind: Integer index denoting feature.
+          selected_visual_words: Partial set of visual words.
+
+        Returns:
+          output_ind: Next index.
+          output_selected_visual_words: Updated set of visual words, including
+            the visual words for the new batch.
+        """
+        # Handle case of last batch, where there may be fewer than
+        # `actual_batch_size` features.
+        batch_size_to_use = tf.cond(
+            tf.greater(ind + actual_batch_size, num_features),
+            true_fn=lambda: num_features - ind,
+            false_fn=lambda: actual_batch_size)
+
+        # Denote B = batch_size_to_use.
+        # K*B x D.
+        tiled_features = tf.reshape(
+            tf.tile(
+                tf.slice(features, [ind, 0],
+                         [batch_size_to_use, self._feature_dimensionality]),
+                [1, self._codebook_size]), [-1, self._feature_dimensionality])
+        # K*B x D.
+        tiled_codebook = tf.reshape(
+            tf.tile(tf.reshape(codebook, [1, -1]), [batch_size_to_use, 1]),
+            [-1, self._feature_dimensionality])
+        # B x K.
+        squared_distances = tf.reshape(
+            tf.reduce_sum(
+                tf.math.squared_difference(tiled_features, tiled_codebook),
+                axis=1), [batch_size_to_use, self._codebook_size])
+        # B x K.
+        nearest_visual_words = tf.argsort(squared_distances)
+        # B x num_assignments.
+        batch_selected_visual_words = tf.slice(
+            nearest_visual_words, [0, 0], [batch_size_to_use, num_assignments])
+        selected_visual_words = tf.concat(
+            [selected_visual_words, batch_selected_visual_words], axis=0)
+
+        return ind + batch_size_to_use, selected_visual_words
+
+      ind_batch = tf.constant(0, dtype=tf.int32)
+      keep_going = lambda j, selected_visual_words: tf.less(j, num_features)
+      selected_visual_words = tf.zeros([0, num_assignments], dtype=tf.int32)
+      _, selected_visual_words = tf.while_loop(
+          cond=keep_going,
+          body=_BatchNearestVisualWords,
+          loop_vars=[ind_batch, selected_visual_words],
+          shape_invariants=[
+              ind_batch.get_shape(),
+              tf.TensorShape([None, num_assignments])
+          ],
+          parallel_iterations=1,
+          back_prop=False)
+
+      # Helper function to collect residuals for relevant visual words.
+      def _ConstructVladFromAssignments(ind, vlad):
+        """Add contributions of a feature to a VLAD descriptor.
+
+        Args:
+          ind: Integer index denoting feature.
+          vlad: Partial VLAD descriptor.
+
+        Returns:
+          output_ind: Next index (ie, ind+1).
+          output_vlad: VLAD descriptor updated to take into account contribution
+            from ind-th feature.
+        """
+        return ind + 1, tf.compat.v1.tensor_scatter_add(
+            vlad, tf.expand_dims(selected_visual_words[ind], axis=1),
+            tf.tile(
+                tf.expand_dims(features[ind], axis=0), [num_assignments, 1]) -
+            tf.gather(codebook, selected_visual_words[ind]))
+
+      ind_vlad = tf.constant(0, dtype=tf.int32)
+      keep_going = lambda j, vlad: tf.less(j, num_features)
+      vlad = tf.zeros([self._codebook_size, self._feature_dimensionality],
+                      dtype=tf.float32)
+      _, vlad = tf.while_loop(
+          cond=keep_going,
+          body=_ConstructVladFromAssignments,
+          loop_vars=[ind_vlad, vlad],
+          back_prop=False)
+
+      vlad = tf.reshape(vlad,
+                        [self._codebook_size * self._feature_dimensionality])
+      if use_l2_normalization:
+        vlad = tf.math.l2_normalize(vlad, epsilon=_NORM_SQUARED_TOLERANCE)
+
+      return vlad
+
+    return tf.cond(
+        tf.greater(tf.size(features), 0),
+        true_fn=_ComputeVladNonEmptyFeatures,
+        false_fn=_ComputeVladEmptyFeatures)
+
+  def _ComputeRvlad(self,
+                    features,
+                    num_features_per_region,
+                    codebook,
+                    use_l2_normalization=False,
+                    num_assignments=1):
+    """Compute R-VLAD representation.
+
+    Args:
+      features: [N, D] float tensor.
+      num_features_per_region: [R] int tensor. Contains number of features per
+        region, such that sum(num_features_per_region) = N. It indicates which
+        features correspond to each region.
+      codebook: [K, D] float tensor.
+      use_l2_normalization: If True, performs L2-normalization after regional
+        aggregation; if False (default), performs componentwise division by R
+        after regional aggregation.
+      num_assignments: Number of visual words to assign a feature to.
+
+    Returns:
+      rvlad: [K*D] float tensor.
+    """
+
+    def _ComputeRvladEmptyRegions():
+      """Computes R-VLAD if `num_features_per_region` is empty.
+
+      Returns:
+        [K*D] all-zeros tensor.
+      """
+      return tf.zeros([self._codebook_size * self._feature_dimensionality],
+                      dtype=tf.float32)
+
+    def _ComputeRvladNonEmptyRegions():
+      """Computes R-VLAD if `num_features_per_region` is not empty.
+
+      Returns:
+        [K*D] tensor with R-VLAD descriptor.
+      """
+
+      # Helper function to compose initial R-VLAD from image regions.
+      def _ConstructRvladFromVlad(ind, rvlad):
+        """Add contributions from different regions into R-VLAD.
+
+        Args:
+          ind: Integer index denoting region.
+          rvlad: Partial R-VLAD descriptor.
+
+        Returns:
+          output_ind: Next index (ie, ind+1).
+          output_rvlad: R-VLAD descriptor updated to take into account
+            contribution from ind-th region.
+        """
+        return ind + 1, rvlad + self._ComputeVlad(
+            tf.slice(
+                features, [tf.reduce_sum(num_features_per_region[:ind]), 0],
+                [num_features_per_region[ind], self._feature_dimensionality]),
+            codebook,
+            num_assignments=num_assignments)
+
+      i = tf.constant(0, dtype=tf.int32)
+      num_regions = tf.shape(num_features_per_region)[0]
+      keep_going = lambda j, rvlad: tf.less(j, num_regions)
+      rvlad = tf.zeros([self._codebook_size * self._feature_dimensionality],
+                       dtype=tf.float32)
+      _, rvlad = tf.while_loop(
+          cond=keep_going,
+          body=_ConstructRvladFromVlad,
+          loop_vars=[i, rvlad],
+          back_prop=False,
+          parallel_iterations=1)
+
+      if use_l2_normalization:
+        rvlad = tf.math.l2_normalize(rvlad, epsilon=_NORM_SQUARED_TOLERANCE)
+      else:
+        rvlad /= tf.cast(num_regions, dtype=tf.float32)
+
+      return rvlad
+
+    return tf.cond(
+        tf.greater(tf.size(num_features_per_region), 0),
+        true_fn=_ComputeRvladNonEmptyRegions,
+        false_fn=_ComputeRvladEmptyRegions)
+
+  def _PerCentroidNormalization(self, unnormalized_vector):
+    """Perform per-centroid normalization.
+
+    Args:
+      unnormalized_vector: [KxD] float tensor.
+
+    Returns:
+      per_centroid_normalized_vector: [KxD] float tensor, with normalized
+        aggregated residuals. Some residuals may be all-zero.
+      visual_words: Int tensor containing indices of visual words which are
+        present for the set of features.
+    """
+    unnormalized_vector = tf.reshape(
+        unnormalized_vector,
+        [self._codebook_size, self._feature_dimensionality])
+    per_centroid_norms = tf.norm(unnormalized_vector, axis=1)
+
+    visual_words = tf.reshape(
+        tf.where(
+            tf.greater(per_centroid_norms, tf.sqrt(_NORM_SQUARED_TOLERANCE))),
+        [-1])
+
+    per_centroid_normalized_vector = tf.math.l2_normalize(
+        unnormalized_vector, axis=1, epsilon=_NORM_SQUARED_TOLERANCE)
+
+    return per_centroid_normalized_vector, visual_words
+
+  def _ComputeAsmk(self, features, codebook, num_assignments=1):
+    """Compute ASMK representation.
+
+    Args:
+      features: [N, D] float tensor.
+      codebook: [K, D] float tensor.
+      num_assignments: Number of visual words to assign a feature to.
+
+    Returns:
+      normalized_residuals: 1-dimensional float tensor with concatenated
+        residuals which are non-zero. Note that the dimensionality is
+        input-dependent.
+      visual_words: 1-dimensional int tensor of sorted visual word ids.
+        Dimensionality is shape(normalized_residuals)[0] / D.
+    """
+    unnormalized_vlad = self._ComputeVlad(
+        features,
+        codebook,
+        use_l2_normalization=False,
+        num_assignments=num_assignments)
+
+    per_centroid_normalized_vlad, visual_words = self._PerCentroidNormalization(
+        unnormalized_vlad)
+
+    normalized_residuals = tf.reshape(
+        tf.gather(per_centroid_normalized_vlad, visual_words),
+        [tf.shape(visual_words)[0] * self._feature_dimensionality])
+
+    return normalized_residuals, visual_words
+
+  def _ComputeRasmk(self,
+                    features,
+                    num_features_per_region,
+                    codebook,
+                    num_assignments=1):
+    """Compute R-ASMK representation.
+
+    Args:
+      features: [N, D] float tensor.
+      num_features_per_region: [R] int tensor. Contains number of features per
+        region, such that sum(num_features_per_region) = N. It indicates which
+        features correspond to each region.
+      codebook: [K, D] float tensor.
+      num_assignments: Number of visual words to assign a feature to.
+
+    Returns:
+      normalized_residuals: 1-dimensional float tensor with concatenated
+        residuals which are non-zero. Note that the dimensionality is
+        input-dependent.
+      visual_words: 1-dimensional int tensor of sorted visual word ids.
+        Dimensionality is shape(normalized_residuals)[0] / D.
+    """
+    unnormalized_rvlad = self._ComputeRvlad(
+        features,
+        num_features_per_region,
+        codebook,
+        use_l2_normalization=False,
+        num_assignments=num_assignments)
+
+    (per_centroid_normalized_rvlad,
+     visual_words) = self._PerCentroidNormalization(unnormalized_rvlad)
+
+    normalized_residuals = tf.reshape(
+        tf.gather(per_centroid_normalized_rvlad, visual_words),
+        [tf.shape(visual_words)[0] * self._feature_dimensionality])
+
+    return normalized_residuals, visual_words
--- a/research/delf/delf/python/feature_aggregation_extractor_test.py
+++ b/research/delf/delf/python/feature_aggregation_extractor_test.py
+# Copyright 2019 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for DELF feature aggregation."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+import numpy as np
+import tensorflow as tf
+
+from delf import aggregation_config_pb2
+from delf import feature_aggregation_extractor
+
+
+class FeatureAggregationTest(tf.test.TestCase):
+
+  def _CreateCodebook(self, checkpoint_path):
+    """Creates codebook used in tests.
+
+    Args:
+      checkpoint_path: Directory where codebook is saved to.
+    """
+    with tf.Graph().as_default() as g, self.session(graph=g) as sess:
+      codebook = tf.Variable(
+          [[0.5, 0.5], [0.0, 0.0], [1.0, 0.0], [-0.5, -0.5], [0.0, 1.0]],
+          name='clusters')
+      saver = tf.compat.v1.train.Saver([codebook])
+      sess.run(tf.compat.v1.global_variables_initializer())
+      saver.save(sess, checkpoint_path)
+
+  def setUp(self):
+    self._codebook_path = os.path.join(tf.compat.v1.test.get_temp_dir(),
+                                       'test_codebook')
+    self._CreateCodebook(self._codebook_path)
+
+  def testComputeNormalizedVladWorks(self):
+    # Construct inputs.
+    # 3 2-D features.
+    features = np.array([[1.0, 0.0], [-1.0, 0.0], [1.0, 2.0]], dtype=float)
+    config = aggregation_config_pb2.AggregationConfig()
+    config.codebook_size = 5
+    config.feature_dimensionality = 2
+    config.aggregation_type = aggregation_config_pb2.AggregationConfig.VLAD
+    config.use_l2_normalization = True
+    config.codebook_path = self._codebook_path
+    config.num_assignments = 1
+
+    # Run tested function.
+    with tf.Graph().as_default() as g, self.session(graph=g) as sess:
+      extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
+          sess, config)
+      vlad, extra_output = extractor.Extract(features)
+
+    # Define expected results.
+    exp_vlad = [
+        0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.316228, 0.316228, 0.632456, 0.632456
+    ]
+    exp_extra_output = -1
+
+    # Compare actual and expected results.
+    self.assertAllClose(vlad, exp_vlad)
+    self.assertAllEqual(extra_output, exp_extra_output)
+
+  def testComputeNormalizedVladWithBatchingWorks(self):
+    # Construct inputs.
+    # 3 2-D features.
+    features = np.array([[1.0, 0.0], [-1.0, 0.0], [1.0, 2.0]], dtype=float)
+    config = aggregation_config_pb2.AggregationConfig()
+    config.codebook_size = 5
+    config.feature_dimensionality = 2
+    config.aggregation_type = aggregation_config_pb2.AggregationConfig.VLAD
+    config.use_l2_normalization = True
+    config.codebook_path = self._codebook_path
+    config.num_assignments = 1
+    config.feature_batch_size = 2
+
+    # Run tested function.
+    with tf.Graph().as_default() as g, self.session(graph=g) as sess:
+      extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
+          sess, config)
+      vlad, extra_output = extractor.Extract(features)
+
+    # Define expected results.
+    exp_vlad = [
+        0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.316228, 0.316228, 0.632456, 0.632456
+    ]
+    exp_extra_output = -1
+
+    # Compare actual and expected results.
+    self.assertAllClose(vlad, exp_vlad)
+    self.assertAllEqual(extra_output, exp_extra_output)
+
+  def testComputeUnnormalizedVladWorks(self):
+    # Construct inputs.
+    # 3 2-D features.
+    features = np.array([[1.0, 0.0], [-1.0, 0.0], [1.0, 2.0]], dtype=float)
+    config = aggregation_config_pb2.AggregationConfig()
+    config.codebook_size = 5
+    config.feature_dimensionality = 2
+    config.aggregation_type = aggregation_config_pb2.AggregationConfig.VLAD
+    config.use_l2_normalization = False
+    config.codebook_path = self._codebook_path
+    config.num_assignments = 1
+
+    # Run tested function.
+    with tf.Graph().as_default() as g, self.session(graph=g) as sess:
+      extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
+          sess, config)
+      vlad, extra_output = extractor.Extract(features)
+
+    # Define expected results.
+    exp_vlad = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.5, 0.5, 1.0, 1.0]
+    exp_extra_output = -1
+
+    # Compare actual and expected results.
+    self.assertAllEqual(vlad, exp_vlad)
+    self.assertAllEqual(extra_output, exp_extra_output)
+
+  def testComputeUnnormalizedVladMultipleAssignmentWorks(self):
+    # Construct inputs.
+    # 3 2-D features.
+    features = np.array([[1.0, 0.0], [-1.0, 0.0], [1.0, 2.0]], dtype=float)
+    config = aggregation_config_pb2.AggregationConfig()
+    config.codebook_size = 5
+    config.feature_dimensionality = 2
+    config.aggregation_type = aggregation_config_pb2.AggregationConfig.VLAD
+    config.use_l2_normalization = False
+    config.codebook_path = self._codebook_path
+    config.num_assignments = 3
+
+    # Run tested function.
+    with tf.Graph().as_default() as g, self.session(graph=g) as sess:
+      extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
+          sess, config)
+      vlad, extra_output = extractor.Extract(features)
+
+    # Define expected results.
+    exp_vlad = [1.0, 1.0, 0.0, 0.0, 0.0, 2.0, -0.5, 0.5, 0.0, 0.0]
+    exp_extra_output = -1
+
+    # Compare actual and expected results.
+    self.assertAllEqual(vlad, exp_vlad)
+    self.assertAllEqual(extra_output, exp_extra_output)
+
+  def testComputeVladEmptyFeaturesWorks(self):
+    # Construct inputs.
+    # Empty feature array.
+    features = np.array([[]])
+    config = aggregation_config_pb2.AggregationConfig()
+    config.codebook_size = 5
+    config.feature_dimensionality = 2
+    config.aggregation_type = aggregation_config_pb2.AggregationConfig.VLAD
+    config.codebook_path = self._codebook_path
+
+    # Run tested function.
+    with tf.Graph().as_default() as g, self.session(graph=g) as sess:
+      extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
+          sess, config)
+      vlad, extra_output = extractor.Extract(features)
+
+    # Define expected results.
+    exp_vlad = np.zeros([10], dtype=float)
+    exp_extra_output = -1
+
+    # Compare actual and expected results.
+    self.assertAllEqual(vlad, exp_vlad)
+    self.assertAllEqual(extra_output, exp_extra_output)
+
+  def testComputeUnnormalizedRvladWorks(self):
+    # Construct inputs.
+    # 4 2-D features: 3 in first region, 1 in second region.
+    features = np.array([[1.0, 0.0], [-1.0, 0.0], [1.0, 2.0], [0.0, 2.0]],
+                        dtype=float)
+    num_features_per_region = np.array([3, 1])
+    config = aggregation_config_pb2.AggregationConfig()
+    config.codebook_size = 5
+    config.feature_dimensionality = 2
+    config.aggregation_type = aggregation_config_pb2.AggregationConfig.VLAD
+    config.use_l2_normalization = False
+    config.codebook_path = self._codebook_path
+    config.num_assignments = 1
+    config.use_regional_aggregation = True
+
+    # Run tested function.
+    with tf.Graph().as_default() as g, self.session(graph=g) as sess:
+      extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
+          sess, config)
+      rvlad, extra_output = extractor.Extract(features, num_features_per_region)
+
+    # Define expected results.
+    exp_rvlad = [
+        0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.158114, 0.158114, 0.316228, 0.816228
+    ]
+    exp_extra_output = -1
+
+    # Compare actual and expected results.
+    self.assertAllClose(rvlad, exp_rvlad)
+    self.assertAllEqual(extra_output, exp_extra_output)
+
+  def testComputeNormalizedRvladWorks(self):
+    # Construct inputs.
+    # 4 2-D features: 3 in first region, 1 in second region.
+    features = np.array([[1.0, 0.0], [-1.0, 0.0], [1.0, 2.0], [0.0, 2.0]],
+                        dtype=float)
+    num_features_per_region = np.array([3, 1])
+    config = aggregation_config_pb2.AggregationConfig()
+    config.codebook_size = 5
+    config.feature_dimensionality = 2
+    config.aggregation_type = aggregation_config_pb2.AggregationConfig.VLAD
+    config.use_l2_normalization = True
+    config.codebook_path = self._codebook_path
+    config.num_assignments = 1
+    config.use_regional_aggregation = True
+
+    # Run tested function.
+    with tf.Graph().as_default() as g, self.session(graph=g) as sess:
+      extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
+          sess, config)
+      rvlad, extra_output = extractor.Extract(features, num_features_per_region)
+
+    # Define expected results.
+    exp_rvlad = [
+        0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.175011, 0.175011, 0.350021, 0.903453
+    ]
+    exp_extra_output = -1
+
+    # Compare actual and expected results.
+    self.assertAllClose(rvlad, exp_rvlad)
+    self.assertAllEqual(extra_output, exp_extra_output)
+
+  def testComputeRvladEmptyRegionsWorks(self):
+    # Construct inputs.
+    # Empty feature array.
+    features = np.array([[]])
+    num_features_per_region = np.array([])
+    config = aggregation_config_pb2.AggregationConfig()
+    config.codebook_size = 5
+    config.feature_dimensionality = 2
+    config.aggregation_type = aggregation_config_pb2.AggregationConfig.VLAD
+    config.codebook_path = self._codebook_path
+    config.use_regional_aggregation = True
+
+    # Run tested function.
+    with tf.Graph().as_default() as g, self.session(graph=g) as sess:
+      extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
+          sess, config)
+      rvlad, extra_output = extractor.Extract(features, num_features_per_region)
+
+    # Define expected results.
+    exp_rvlad = np.zeros([10], dtype=float)
+    exp_extra_output = -1
+
+    # Compare actual and expected results.
+    self.assertAllEqual(rvlad, exp_rvlad)
+    self.assertAllEqual(extra_output, exp_extra_output)
+
+  def testComputeUnnormalizedRvladSomeEmptyRegionsWorks(self):
+    # Construct inputs.
+    # 4 2-D features: 0 in first region, 3 in second region, 0 in third region,
+    # 1 in fourth region.
+    features = np.array([[1.0, 0.0], [-1.0, 0.0], [1.0, 2.0], [0.0, 2.0]],
+                        dtype=float)
+    num_features_per_region = np.array([0, 3, 0, 1])
+    config = aggregation_config_pb2.AggregationConfig()
+    config.codebook_size = 5
+    config.feature_dimensionality = 2
+    config.aggregation_type = aggregation_config_pb2.AggregationConfig.VLAD
+    config.use_l2_normalization = False
+    config.codebook_path = self._codebook_path
+    config.num_assignments = 1
+    config.use_regional_aggregation = True
+
+    # Run tested function.
+    with tf.Graph().as_default() as g, self.session(graph=g) as sess:
+      extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
+          sess, config)
+      rvlad, extra_output = extractor.Extract(features, num_features_per_region)
+
+    # Define expected results.
+    exp_rvlad = [
+        0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.079057, 0.079057, 0.158114, 0.408114
+    ]
+    exp_extra_output = -1
+
+    # Compare actual and expected results.
+    self.assertAllClose(rvlad, exp_rvlad)
+    self.assertAllEqual(extra_output, exp_extra_output)
+
+  def testComputeNormalizedRvladSomeEmptyRegionsWorks(self):
+    # Construct inputs.
+    # 4 2-D features: 0 in first region, 3 in second region, 0 in third region,
+    # 1 in fourth region.
+    features = np.array([[1.0, 0.0], [-1.0, 0.0], [1.0, 2.0], [0.0, 2.0]],
+                        dtype=float)
+    num_features_per_region = np.array([0, 3, 0, 1])
+    config = aggregation_config_pb2.AggregationConfig()
+    config.codebook_size = 5
+    config.feature_dimensionality = 2
+    config.aggregation_type = aggregation_config_pb2.AggregationConfig.VLAD
+    config.use_l2_normalization = True
+    config.codebook_path = self._codebook_path
+    config.num_assignments = 1
+    config.use_regional_aggregation = True
+
+    # Run tested function.
+    with tf.Graph().as_default() as g, self.session(graph=g) as sess:
+      extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
+          sess, config)
+      rvlad, extra_output = extractor.Extract(features, num_features_per_region)
+
+    # Define expected results.
+    exp_rvlad = [
+        0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.175011, 0.175011, 0.350021, 0.903453
+    ]
+    exp_extra_output = -1
+
+    # Compare actual and expected results.
+    self.assertAllClose(rvlad, exp_rvlad)
+    self.assertAllEqual(extra_output, exp_extra_output)
+
+  def testComputeRvladMisconfiguredFeatures(self):
+    # Construct inputs.
+    # 4 2-D features: 3 in first region, 1 in second region.
+    features = np.array([[1.0, 0.0], [-1.0, 0.0], [1.0, 2.0], [0.0, 2.0]],
+                        dtype=float)
+    # Misconfigured number of features; there are only 4 features, but
+    # sum(num_features_per_region) = 5.
+    num_features_per_region = np.array([3, 2])
+    config = aggregation_config_pb2.AggregationConfig()
+    config.codebook_size = 5
+    config.feature_dimensionality = 2
+    config.aggregation_type = aggregation_config_pb2.AggregationConfig.VLAD
+    config.codebook_path = self._codebook_path
+    config.use_regional_aggregation = True
+
+    # Run tested function.
+    with tf.Graph().as_default() as g, self.session(graph=g) as sess:
+      extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
+          sess, config)
+      with self.assertRaisesRegex(
+          ValueError,
+          r'Incorrect arguments: sum\(num_features_per_region\) and '
+          r'features.shape\[0\] are different'):
+        extractor.Extract(features, num_features_per_region)
+
+  def testComputeAsmkWorks(self):
+    # Construct inputs.
+    # 3 2-D features.
+    features = np.array([[1.0, 0.0], [-1.0, 0.0], [1.0, 2.0]], dtype=float)
+    config = aggregation_config_pb2.AggregationConfig()
+    config.codebook_size = 5
+    config.feature_dimensionality = 2
+    config.aggregation_type = aggregation_config_pb2.AggregationConfig.ASMK
+    config.codebook_path = self._codebook_path
+    config.num_assignments = 1
+
+    # Run tested function.
+    with tf.Graph().as_default() as g, self.session(graph=g) as sess:
+      extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
+          sess, config)
+      asmk, visual_words = extractor.Extract(features)
+
+    # Define expected results.
+    exp_asmk = [-0.707107, 0.707107, 0.707107, 0.707107]
+    exp_visual_words = [3, 4]
+
+    # Compare actual and expected results.
+    self.assertAllClose(asmk, exp_asmk)
+    self.assertAllEqual(visual_words, exp_visual_words)
+
+  def testComputeAsmkStarWorks(self):
+    # Construct inputs.
+    # 3 2-D features.
+    features = np.array([[1.0, 0.0], [-1.0, 0.0], [1.0, 2.0]], dtype=float)
+    config = aggregation_config_pb2.AggregationConfig()
+    config.codebook_size = 5
+    config.feature_dimensionality = 2
+    config.aggregation_type = aggregation_config_pb2.AggregationConfig.ASMK_STAR
+    config.codebook_path = self._codebook_path
+    config.num_assignments = 1
+
+    # Run tested function.
+    with tf.Graph().as_default() as g, self.session(graph=g) as sess:
+      extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
+          sess, config)
+      asmk_star, visual_words = extractor.Extract(features)
+
+    # Define expected results.
+    exp_asmk_star = [64, 192]
+    exp_visual_words = [3, 4]
+
+    # Compare actual and expected results.
+    self.assertAllEqual(asmk_star, exp_asmk_star)
+    self.assertAllEqual(visual_words, exp_visual_words)
+
+  def testComputeAsmkMultipleAssignmentWorks(self):
+    # Construct inputs.
+    # 3 2-D features.
+    features = np.array([[1.0, 0.0], [-1.0, 0.0], [1.0, 2.0]], dtype=float)
+    config = aggregation_config_pb2.AggregationConfig()
+    config.codebook_size = 5
+    config.feature_dimensionality = 2
+    config.aggregation_type = aggregation_config_pb2.AggregationConfig.ASMK
+    config.codebook_path = self._codebook_path
+    config.num_assignments = 3
+
+    # Run tested function.
+    with tf.Graph().as_default() as g, self.session(graph=g) as sess:
+      extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
+          sess, config)
+      asmk, visual_words = extractor.Extract(features)
+
+    # Define expected results.
+    exp_asmk = [0.707107, 0.707107, 0.0, 1.0, -0.707107, 0.707107]
+    exp_visual_words = [0, 2, 3]
+
+    # Compare actual and expected results.
+    self.assertAllClose(asmk, exp_asmk)
+    self.assertAllEqual(visual_words, exp_visual_words)
+
+  def testComputeRasmkWorks(self):
+    # Construct inputs.
+    # 4 2-D features: 3 in first region, 1 in second region.
+    features = np.array([[1.0, 0.0], [-1.0, 0.0], [1.0, 2.0], [0.0, 2.0]],
+                        dtype=float)
+    num_features_per_region = np.array([3, 1])
+    config = aggregation_config_pb2.AggregationConfig()
+    config.codebook_size = 5
+    config.feature_dimensionality = 2
+    config.aggregation_type = aggregation_config_pb2.AggregationConfig.ASMK
+    config.codebook_path = self._codebook_path
+    config.num_assignments = 1
+    config.use_regional_aggregation = True
+
+    # Run tested function.
+    with tf.Graph().as_default() as g, self.session(graph=g) as sess:
+      extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
+          sess, config)
+      rasmk, visual_words = extractor.Extract(features, num_features_per_region)
+
+    # Define expected results.
+    exp_rasmk = [-0.707107, 0.707107, 0.361261, 0.932465]
+    exp_visual_words = [3, 4]
+
+    # Compare actual and expected results.
+    self.assertAllClose(rasmk, exp_rasmk)
+    self.assertAllEqual(visual_words, exp_visual_words)
+
+  def testComputeRasmkStarWorks(self):
+    # Construct inputs.
+    # 4 2-D features: 3 in first region, 1 in second region.
+    features = np.array([[1.0, 0.0], [-1.0, 0.0], [1.0, 2.0], [0.0, 2.0]],
+                        dtype=float)
+    num_features_per_region = np.array([3, 1])
+    config = aggregation_config_pb2.AggregationConfig()
+    config.codebook_size = 5
+    config.feature_dimensionality = 2
+    config.aggregation_type = aggregation_config_pb2.AggregationConfig.ASMK_STAR
+    config.codebook_path = self._codebook_path
+    config.num_assignments = 1
+    config.use_regional_aggregation = True
+
+    # Run tested function.
+    with tf.Graph().as_default() as g, self.session(graph=g) as sess:
+      extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
+          sess, config)
+      rasmk_star, visual_words = extractor.Extract(features,
+                                                   num_features_per_region)
+
+    # Define expected results.
+    exp_rasmk_star = [64, 192]
+    exp_visual_words = [3, 4]
+
+    # Compare actual and expected results.
+    self.assertAllEqual(rasmk_star, exp_rasmk_star)
+    self.assertAllEqual(visual_words, exp_visual_words)
+
+  def testComputeUnknownAggregation(self):
+    # Construct inputs.
+    config = aggregation_config_pb2.AggregationConfig()
+    config.codebook_size = 5
+    config.feature_dimensionality = 2
+    config.aggregation_type = 0
+    config.codebook_path = self._codebook_path
+    config.use_regional_aggregation = True
+
+    # Run tested function.
+    with tf.Graph().as_default() as g, self.session(graph=g) as sess:
+      with self.assertRaisesRegex(ValueError, 'Invalid aggregation type'):
+        feature_aggregation_extractor.ExtractAggregatedRepresentation(
+            sess, config)
+
+
+if __name__ == '__main__':
+  tf.test.main()