Commit 7cd29f8c authored by André Araujo's avatar André Araujo Committed by aquariusjay
Browse files

Some refactoring + Google Landmarks dataset scripts (#7014)

* Merged commit includes the following changes:
253126424  by Andre Araujo:

    Scripts to compute metrics for Google Landmarks dataset.

    Also, a small fix to metric in retrieval case: avoids duplicate predicted images.

--
253118971  by Andre Araujo:

    Metrics for Google Landmarks dataset.

--
253106953  by Andre Araujo:

    Library to read files from Google Landmarks challenges.

--
250700636  by Andre Araujo:

    Handle case of aggregation extraction with empty set of input features.

--
250516819  by Andre Araujo:

    Add minimum size for DELF extractor.

--
250435822  by Andre Araujo:

    Add max_image_size/min_image_size for open-source DELF proto / module.

--
250414606  by Andre Araujo:

    Refactor extract_aggregation to allow reuse with different datasets.

--
250356863  by Andre Araujo:

    Remove unnecessary cmd_args variable from boxes_and_features_extraction.

--
249783379  by Andre Araujo:

    Create directory for writing mapping file if it does not exist.

--
249581591  by Andre Araujo:

    Refactor scripts to extract boxes and features from images in Revisited datasets.
    Also, change tf.logging.info --> print for easier logging in open source code.

--
249511821  by Andre Araujo:

    Small change to function for file/directory handling.

--
249289499  by Andre Araujo:

    Internal change.

--

PiperOrigin-RevId: 253126424

* Updating DELF init to adjust to latest changes

* Editing init files for python packages

* Edit D2R dataset reader to work with py3.

PiperOrigin-RevId: 253135576

* DELF package: fix import ordering
parent d8a09064
...@@ -26,11 +26,12 @@ from delf.protos import feature_pb2 ...@@ -26,11 +26,12 @@ from delf.protos import feature_pb2
from delf.python import box_io from delf.python import box_io
from delf.python import datum_io from delf.python import datum_io
from delf.python import delf_v1 from delf.python import delf_v1
from delf.python import detect_to_retrieve
from delf.python import feature_aggregation_extractor from delf.python import feature_aggregation_extractor
from delf.python import feature_aggregation_similarity from delf.python import feature_aggregation_similarity
from delf.python import feature_extractor from delf.python import feature_extractor
from delf.python import feature_io from delf.python import feature_io
from delf.python.examples import extract_boxes from delf.python.examples import detector
from delf.python.examples import extract_features from delf.python.examples import extractor
from delf.python import detect_to_retrieve
from delf.python import google_landmarks_dataset
# pylint: enable=unused-import # pylint: enable=unused-import
...@@ -61,4 +61,19 @@ message DelfConfig { ...@@ -61,4 +61,19 @@ message DelfConfig {
// Configuration used for DELF local features. // Configuration used for DELF local features.
optional DelfLocalFeatureConfig delf_local_config = 3; optional DelfLocalFeatureConfig delf_local_config = 3;
// The maximum/minimum image size (in terms of height or width) to be used
// when extracting DELF features. If the height *OR* width is larger than
// max_image_size, it will be resized to max_image_size, and the other
// dimension will be resized by preserving the aspect ratio. Similar logic
// applies to min_image_size, if both height *AND* width are smaller than
// min_image_size, the larger side is set to min_image_size. If set to -1
// (default), no image resizing is performed on the corresponding criteria.
// When using local features, note that the feature locations and scales will
// be consistent with the original image input size.
// Note that when both options are specified (which is a valid and legit use
// case), as long as max_image_size >= min_image_size, there's no conflicting
// scenario (i.e. never triggers both enlarging / shrinking).
// Bilinear interpolation is used.
optional int32 max_image_size = 4 [default = -1];
optional int32 min_image_size = 5 [default = -1];
} }
...@@ -18,5 +18,7 @@ from __future__ import division ...@@ -18,5 +18,7 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
# pylint: disable=unused-import # pylint: disable=unused-import
from delf.python.detect_to_retrieve import aggregation_extraction
from delf.python.detect_to_retrieve import boxes_and_features_extraction
from delf.python.detect_to_retrieve import dataset from delf.python.detect_to_retrieve import dataset
# pylint: enable=unused-import # pylint: enable=unused-import
# Copyright 2019 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Library to extract/save feature aggregation."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import csv
import os
import time
import numpy as np
import tensorflow as tf
from google.protobuf import text_format
from delf import aggregation_config_pb2
from delf import datum_io
from delf import feature_aggregation_extractor
from delf import feature_io
# Aliases for aggregation types.
_VLAD = aggregation_config_pb2.AggregationConfig.VLAD
_ASMK = aggregation_config_pb2.AggregationConfig.ASMK
_ASMK_STAR = aggregation_config_pb2.AggregationConfig.ASMK_STAR
# Extensions.
_DELF_EXTENSION = '.delf'
_VLAD_EXTENSION_SUFFIX = 'vlad'
_ASMK_EXTENSION_SUFFIX = 'asmk'
_ASMK_STAR_EXTENSION_SUFFIX = 'asmk_star'
# Pace to report extraction log.
_STATUS_CHECK_ITERATIONS = 50
def _ReadMappingBasenameToBoxNames(input_path, index_image_names):
"""Reads mapping from image name to DELF file names for each box.
Args:
input_path: Path to CSV file containing mapping.
index_image_names: List containing index image names, in order, for the
dataset under consideration.
Returns:
images_to_box_feature_files: Dict. key=string (image name); value=list of
strings (file names containing DELF features for boxes).
"""
images_to_box_feature_files = {}
with tf.gfile.GFile(input_path, 'r') as f:
reader = csv.DictReader(f)
for row in reader:
index_image_name = index_image_names[int(row['index_image_id'])]
if index_image_name not in images_to_box_feature_files:
images_to_box_feature_files[index_image_name] = []
images_to_box_feature_files[index_image_name].append(row['name'])
return images_to_box_feature_files
def ExtractAggregatedRepresentationsToFiles(image_names, features_dir,
aggregation_config_path,
mapping_path,
output_aggregation_dir):
"""Extracts aggregated feature representations, saving them to files.
It checks if the aggregated representation for an image already exists,
and skips computation for those.
Args:
image_names: List of image names. These are used to compose input file names
for the feature files, and the output file names for aggregated
representations.
features_dir: Directory where DELF features are located.
aggregation_config_path: Path to AggregationConfig proto text file with
configuration to be used for extraction.
mapping_path: Optional CSV file which maps each .delf file name to the index
image ID and detected box ID. If regional aggregation is performed, this
should be set. Otherwise, this is ignored.
output_aggregation_dir: Directory where aggregation output will be written
to.
Raises:
ValueError: If AggregationConfig is malformed, or `mapping_path` is
missing.
"""
num_images = len(image_names)
# Parse AggregationConfig proto, and select output extension.
config = aggregation_config_pb2.AggregationConfig()
with tf.gfile.GFile(aggregation_config_path, 'r') as f:
text_format.Merge(f.read(), config)
output_extension = '.'
if config.use_regional_aggregation:
output_extension += 'r'
if config.aggregation_type == _VLAD:
output_extension += _VLAD_EXTENSION_SUFFIX
elif config.aggregation_type == _ASMK:
output_extension += _ASMK_EXTENSION_SUFFIX
elif config.aggregation_type == _ASMK_STAR:
output_extension += _ASMK_STAR_EXTENSION_SUFFIX
else:
raise ValueError('Invalid aggregation type: %d' % config.aggregation_type)
# Read index mapping path, if provided.
if mapping_path:
images_to_box_feature_files = _ReadMappingBasenameToBoxNames(
mapping_path, image_names)
# Create output directory if necessary.
if not tf.gfile.Exists(output_aggregation_dir):
tf.gfile.MakeDirs(output_aggregation_dir)
with tf.Session() as sess:
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
sess, config)
start = time.clock()
for i in range(num_images):
if i == 0:
print('Starting to extract aggregation from images...')
elif i % _STATUS_CHECK_ITERATIONS == 0:
elapsed = (time.clock() - start)
print('Processing image %d out of %d, last %d '
'images took %f seconds' %
(i, num_images, _STATUS_CHECK_ITERATIONS, elapsed))
start = time.clock()
image_name = image_names[i]
# Compose output file name, skip extraction for this image if it already
# exists.
output_aggregation_filename = os.path.join(output_aggregation_dir,
image_name + output_extension)
if tf.io.gfile.exists(output_aggregation_filename):
print('Skipping %s' % image_name)
continue
# Load DELF features.
if config.use_regional_aggregation:
if not mapping_path:
raise ValueError(
'Requested regional aggregation, but mapping_path was not '
'provided')
descriptors_list = []
num_features_per_box = []
for box_feature_file in images_to_box_feature_files[image_name]:
delf_filename = os.path.join(features_dir,
box_feature_file + _DELF_EXTENSION)
_, _, box_descriptors, _, _ = feature_io.ReadFromFile(delf_filename)
# If `box_descriptors` is empty, reshape it such that it can be
# concatenated with other descriptors.
if not box_descriptors.shape[0]:
box_descriptors = np.reshape(box_descriptors,
[0, config.feature_dimensionality])
descriptors_list.append(box_descriptors)
num_features_per_box.append(box_descriptors.shape[0])
descriptors = np.concatenate(descriptors_list)
else:
input_delf_filename = os.path.join(features_dir,
image_name + _DELF_EXTENSION)
_, _, descriptors, _, _ = feature_io.ReadFromFile(input_delf_filename)
# If `descriptors` is empty, reshape it to avoid extraction failure.
if not descriptors.shape[0]:
descriptors = np.reshape(descriptors,
[0, config.feature_dimensionality])
num_features_per_box = None
# Extract and save aggregation. If using VLAD, only
# `aggregated_descriptors` needs to be saved.
(aggregated_descriptors,
feature_visual_words) = extractor.Extract(descriptors,
num_features_per_box)
if config.aggregation_type == _VLAD:
datum_io.WriteToFile(aggregated_descriptors,
output_aggregation_filename)
else:
datum_io.WritePairToFile(aggregated_descriptors,
feature_visual_words.astype('uint32'),
output_aggregation_filename)
# Copyright 2019 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Library to extract/save boxes and DELF features."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import csv
import math
import os
import time
import numpy as np
from PIL import Image
from PIL import ImageFile
import tensorflow as tf
from google.protobuf import text_format
from delf import delf_config_pb2
from delf import box_io
from delf import feature_io
from delf import detector
from delf import extractor
# Extension of feature files.
_BOX_EXTENSION = '.boxes'
_DELF_EXTENSION = '.delf'
# Pace to report extraction log.
_STATUS_CHECK_ITERATIONS = 100
# To avoid crashing for truncated (corrupted) images.
ImageFile.LOAD_TRUNCATED_IMAGES = True
def _PilLoader(path):
"""Helper function to read image with PIL.
Args:
path: Path to image to be loaded.
Returns:
PIL image in RGB format.
"""
with tf.gfile.GFile(path, 'rb') as f:
img = Image.open(f)
return img.convert('RGB')
def _WriteMappingBasenameToIds(index_names_ids_and_boxes, output_path):
"""Helper function to write CSV mapping from DELF file name to IDs.
Args:
index_names_ids_and_boxes: List containing 3-element lists with name, image
ID and box ID.
output_path: Output CSV path.
"""
with tf.gfile.GFile(output_path, 'w') as f:
csv_writer = csv.DictWriter(
f, fieldnames=['name', 'index_image_id', 'box_id'])
csv_writer.writeheader()
for name_imid_boxid in index_names_ids_and_boxes:
csv_writer.writerow({
'name': name_imid_boxid[0],
'index_image_id': name_imid_boxid[1],
'box_id': name_imid_boxid[2],
})
def ExtractBoxesAndFeaturesToFiles(image_names, image_paths, delf_config_path,
detector_model_dir, detector_thresh,
output_features_dir, output_boxes_dir,
output_mapping):
"""Extracts boxes and features, saving them to files.
Boxes are saved to <image_name>.boxes files. DELF features are extracted for
the entire image and saved into <image_name>.delf files. In addition, DELF
features are extracted for each high-confidence bounding box in the image, and
saved into files named <image_name>_0.delf, <image_name>_1.delf, etc.
It checks if descriptors/boxes already exist, and skips computation for those.
Args:
image_names: List of image names. These are used to compose output file
names for boxes and features.
image_paths: List of image paths. image_paths[i] is the path for the image
named by image_names[i]. `image_names` and `image_paths` must have the
same number of elements.
delf_config_path: Path to DelfConfig proto text file.
detector_model_dir: Directory where detector SavedModel is located.
detector_thresh: Threshold used to decide if an image's detected box
undergoes feature extraction.
output_features_dir: Directory where DELF features will be written to.
output_boxes_dir: Directory where detected boxes will be written to.
output_mapping: CSV file which maps each .delf file name to the image ID and
detected box ID.
Raises:
ValueError: If len(image_names) and len(image_paths) are different.
"""
num_images = len(image_names)
if len(image_paths) != num_images:
raise ValueError(
'image_names and image_paths have different number of items')
# Parse DelfConfig proto.
config = delf_config_pb2.DelfConfig()
with tf.gfile.GFile(delf_config_path, 'r') as f:
text_format.Merge(f.read(), config)
# Create output directories if necessary.
if not tf.gfile.Exists(output_features_dir):
tf.gfile.MakeDirs(output_features_dir)
if not tf.gfile.Exists(output_boxes_dir):
tf.gfile.MakeDirs(output_boxes_dir)
if not tf.gfile.Exists(os.path.dirname(output_mapping)):
tf.gfile.MakeDirs(os.path.dirname(output_mapping))
names_ids_and_boxes = []
with tf.Graph().as_default():
with tf.Session() as sess:
# Initialize variables, construct detector and DELF extractor.
init_op = tf.global_variables_initializer()
sess.run(init_op)
detector_fn = detector.MakeDetector(
sess, detector_model_dir, import_scope='detector')
delf_extractor_fn = extractor.MakeExtractor(
sess, config, import_scope='extractor_delf')
start = time.clock()
for i in range(num_images):
if i == 0:
print('Starting to extract features/boxes...')
elif i % _STATUS_CHECK_ITERATIONS == 0:
elapsed = (time.clock() - start)
print('Processing image %d out of %d, last %d '
'images took %f seconds' %
(i, num_images, _STATUS_CHECK_ITERATIONS, elapsed))
start = time.clock()
image_name = image_names[i]
output_feature_filename_whole_image = os.path.join(
output_features_dir, image_name + _DELF_EXTENSION)
output_box_filename = os.path.join(output_boxes_dir,
image_name + _BOX_EXTENSION)
pil_im = _PilLoader(image_paths[i])
width, height = pil_im.size
# Extract and save boxes.
if tf.gfile.Exists(output_box_filename):
print('Skipping box computation for %s' % image_name)
(boxes_out, scores_out,
class_indices_out) = box_io.ReadFromFile(output_box_filename)
else:
(boxes_out, scores_out,
class_indices_out) = detector_fn(np.expand_dims(pil_im, 0))
# Using only one image per batch.
boxes_out = boxes_out[0]
scores_out = scores_out[0]
class_indices_out = class_indices_out[0]
box_io.WriteToFile(output_box_filename, boxes_out, scores_out,
class_indices_out)
# Select boxes with scores greater than threshold. Those will be the
# ones with extracted DELF features (besides the whole image, whose DELF
# features are extracted in all cases).
num_delf_files = 1
selected_boxes = []
for box_ind, box in enumerate(boxes_out):
if scores_out[box_ind] >= detector_thresh:
selected_boxes.append(box)
num_delf_files += len(selected_boxes)
# Extract and save DELF features.
for delf_file_ind in range(num_delf_files):
if delf_file_ind == 0:
box_name = image_name
output_feature_filename = output_feature_filename_whole_image
else:
box_name = image_name + '_' + str(delf_file_ind - 1)
output_feature_filename = os.path.join(output_features_dir,
box_name + _DELF_EXTENSION)
names_ids_and_boxes.append([box_name, i, delf_file_ind - 1])
if tf.gfile.Exists(output_feature_filename):
print('Skipping DELF computation for %s' % box_name)
continue
if delf_file_ind >= 1:
bbox_for_cropping = selected_boxes[delf_file_ind - 1]
bbox_for_cropping_pil_convention = [
int(math.floor(bbox_for_cropping[1] * width)),
int(math.floor(bbox_for_cropping[0] * height)),
int(math.ceil(bbox_for_cropping[3] * width)),
int(math.ceil(bbox_for_cropping[2] * height))
]
pil_cropped_im = pil_im.crop(bbox_for_cropping_pil_convention)
im = np.array(pil_cropped_im)
else:
im = np.array(pil_im)
(locations_out, descriptors_out, feature_scales_out,
attention_out) = delf_extractor_fn(im)
feature_io.WriteToFile(output_feature_filename, locations_out,
feature_scales_out, descriptors_out,
attention_out)
# Save mapping from output DELF name to image id and box id.
_WriteMappingBasenameToIds(names_ids_and_boxes, output_mapping)
...@@ -70,14 +70,14 @@ def main(argv): ...@@ -70,14 +70,14 @@ def main(argv):
raise RuntimeError('Too many command-line arguments.') raise RuntimeError('Too many command-line arguments.')
# Process output directory. # Process output directory.
if os.path.exists(cmd_args.output_cluster_dir): if tf.gfile.Exists(cmd_args.output_cluster_dir):
raise RuntimeError( raise RuntimeError(
'output_cluster_dir = %s already exists. This may indicate that a ' 'output_cluster_dir = %s already exists. This may indicate that a '
'previous run already wrote checkpoints in this directory, which would ' 'previous run already wrote checkpoints in this directory, which would '
'lead to incorrect training. Please re-run this script by specifying an' 'lead to incorrect training. Please re-run this script by specifying an'
' inexisting directory.' % cmd_args.output_cluster_dir) ' inexisting directory.' % cmd_args.output_cluster_dir)
else: else:
os.makedirs(cmd_args.output_cluster_dir) tf.gfile.MakeDirs(cmd_args.output_cluster_dir)
# Read list of index images from dataset file. # Read list of index images from dataset file.
print('Reading list of index images from dataset file...') print('Reading list of index images from dataset file...')
......
...@@ -23,63 +23,14 @@ from __future__ import division ...@@ -23,63 +23,14 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import argparse import argparse
import csv
import os
import sys import sys
import time
import numpy as np
import tensorflow as tf
from google.protobuf import text_format
from tensorflow.python.platform import app from tensorflow.python.platform import app
from delf import aggregation_config_pb2 from delf.python.detect_to_retrieve import aggregation_extraction
from delf import datum_io
from delf import feature_aggregation_extractor
from delf import feature_io
from delf.python.detect_to_retrieve import dataset from delf.python.detect_to_retrieve import dataset
cmd_args = None cmd_args = None
# Aliases for aggregation types.
_VLAD = aggregation_config_pb2.AggregationConfig.VLAD
_ASMK = aggregation_config_pb2.AggregationConfig.ASMK
_ASMK_STAR = aggregation_config_pb2.AggregationConfig.ASMK_STAR
# Extensions.
_DELF_EXTENSION = '.delf'
_VLAD_EXTENSION_SUFFIX = 'vlad'
_ASMK_EXTENSION_SUFFIX = 'asmk'
_ASMK_STAR_EXTENSION_SUFFIX = 'asmk_star'
# Pace to report extraction log.
_STATUS_CHECK_ITERATIONS = 50
def _ReadMappingBasenameToBoxNames(input_path, index_image_names):
"""Reads mapping from image name to DELF file names for each box.
Args:
input_path: Path to CSV file containing mapping.
index_image_names: List containing index image names, in order, for the
dataset under consideration.
Returns:
images_to_box_feature_files: Dict. key=string (image name); value=list of
strings (file names containing DELF features for boxes).
"""
images_to_box_feature_files = {}
with tf.gfile.GFile(input_path, 'r') as f:
reader = csv.DictReader(f)
for row in reader:
index_image_name = index_image_names[int(row['index_image_id'])]
if index_image_name not in images_to_box_feature_files:
images_to_box_feature_files[index_image_name] = []
images_to_box_feature_files[index_image_name].append(row['name'])
return images_to_box_feature_files
def main(argv): def main(argv):
if len(argv) > 1: if len(argv) > 1:
...@@ -96,95 +47,12 @@ def main(argv): ...@@ -96,95 +47,12 @@ def main(argv):
num_images = len(image_list) num_images = len(image_list)
print('done! Found %d images' % num_images) print('done! Found %d images' % num_images)
# Parse AggregationConfig proto, and select output extension. aggregation_extraction.ExtractAggregatedRepresentationsToFiles(
config = aggregation_config_pb2.AggregationConfig() image_names=image_list,
with tf.gfile.GFile(cmd_args.aggregation_config_path, 'r') as f: features_dir=cmd_args.features_dir,
text_format.Merge(f.read(), config) aggregation_config_path=cmd_args.aggregation_config_path,
output_extension = '.' mapping_path=cmd_args.index_mapping_path,
if config.use_regional_aggregation: output_aggregation_dir=cmd_args.output_aggregation_dir)
output_extension += 'r'
if config.aggregation_type == _VLAD:
output_extension += _VLAD_EXTENSION_SUFFIX
elif config.aggregation_type == _ASMK:
output_extension += _ASMK_EXTENSION_SUFFIX
elif config.aggregation_type == _ASMK_STAR:
output_extension += _ASMK_STAR_EXTENSION_SUFFIX
else:
raise ValueError('Invalid aggregation type: %d' % config.aggregation_type)
# Read index mapping path, if provided.
if cmd_args.index_mapping_path:
images_to_box_feature_files = _ReadMappingBasenameToBoxNames(
cmd_args.index_mapping_path, image_list)
# Create output directory if necessary.
if not os.path.exists(cmd_args.output_aggregation_dir):
os.makedirs(cmd_args.output_aggregation_dir)
with tf.Session() as sess:
extractor = feature_aggregation_extractor.ExtractAggregatedRepresentation(
sess, config)
start = time.clock()
for i in range(num_images):
if i == 0:
print('Starting to extract aggregation from images...')
elif i % _STATUS_CHECK_ITERATIONS == 0:
elapsed = (time.clock() - start)
print('Processing image %d out of %d, last %d '
'images took %f seconds' %
(i, num_images, _STATUS_CHECK_ITERATIONS, elapsed))
start = time.clock()
image_name = image_list[i]
# Compose output file name, skip extraction for this image if it already
# exists.
output_aggregation_filename = os.path.join(
cmd_args.output_aggregation_dir, image_name + output_extension)
if tf.io.gfile.exists(output_aggregation_filename):
print('Skipping %s' % image_name)
continue
# Load DELF features.
if config.use_regional_aggregation:
if not cmd_args.index_mapping_path:
raise ValueError(
'Requested regional aggregation, but index_mapping_path was not '
'provided')
descriptors_list = []
num_features_per_box = []
for box_feature_file in images_to_box_feature_files[image_name]:
delf_filename = os.path.join(cmd_args.features_dir,
box_feature_file + _DELF_EXTENSION)
_, _, box_descriptors, _, _ = feature_io.ReadFromFile(delf_filename)
# If `box_descriptors` is empty, reshape it such that it can be
# concatenated with other descriptors.
if not box_descriptors.shape[0]:
box_descriptors = np.reshape(box_descriptors,
[0, config.feature_dimensionality])
descriptors_list.append(box_descriptors)
num_features_per_box.append(box_descriptors.shape[0])
descriptors = np.concatenate(descriptors_list)
else:
input_delf_filename = os.path.join(cmd_args.features_dir,
image_name + _DELF_EXTENSION)
_, _, descriptors, _, _ = feature_io.ReadFromFile(input_delf_filename)
num_features_per_box = None
# Extract and save aggregation. If using VLAD, only
# `aggregated_descriptors` needs to be saved.
(aggregated_descriptors,
feature_visual_words) = extractor.Extract(descriptors,
num_features_per_box)
if config.aggregation_type == _VLAD:
datum_io.WriteToFile(aggregated_descriptors,
output_aggregation_filename)
else:
datum_io.WritePairToFile(aggregated_descriptors,
feature_visual_words.astype('uint32'),
output_aggregation_filename)
if __name__ == '__main__': if __name__ == '__main__':
...@@ -218,7 +86,7 @@ if __name__ == '__main__': ...@@ -218,7 +86,7 @@ if __name__ == '__main__':
type=str, type=str,
default='/tmp/features', default='/tmp/features',
help=""" help="""
Directory where query image features are located, all in .delf format. Directory where image features are located, all in .delf format.
""") """)
parser.add_argument( parser.add_argument(
'--index_mapping_path', '--index_mapping_path',
......
...@@ -28,195 +28,44 @@ from __future__ import division ...@@ -28,195 +28,44 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import argparse import argparse
import csv
import math
import os import os
import sys import sys
import time
import numpy as np
from PIL import Image
from PIL import ImageFile
import tensorflow as tf
from google.protobuf import text_format
from tensorflow.python.platform import app from tensorflow.python.platform import app
from delf import delf_config_pb2 from delf.python.detect_to_retrieve import boxes_and_features_extraction
from delf import box_io
from delf import feature_io
from delf.python.detect_to_retrieve import dataset from delf.python.detect_to_retrieve import dataset
from delf import extract_boxes
from delf import extract_features
cmd_args = None cmd_args = None
# Extension of feature files.
_BOX_EXTENSION = '.boxes'
_DELF_EXTENSION = '.delf'
_IMAGE_EXTENSION = '.jpg' _IMAGE_EXTENSION = '.jpg'
# Pace to report extraction log.
_STATUS_CHECK_ITERATIONS = 100
# To avoid crashing for truncated (corrupted) images.
ImageFile.LOAD_TRUNCATED_IMAGES = True
def _PilLoader(path):
"""Helper function to read image with PIL.
Args:
path: Path to image to be loaded.
Returns:
PIL image in RGB format.
"""
with tf.gfile.GFile(path, 'rb') as f:
img = Image.open(f)
return img.convert('RGB')
def _WriteMappingBasenameToIds(index_names_ids_and_boxes, output_path):
"""Helper function to write CSV mapping from DELF file name to IDs.
Args:
index_names_ids_and_boxes: List containing 3-element lists with name, image
ID and box ID.
output_path: Output CSV path.
"""
with tf.gfile.GFile(output_path, 'w') as f:
csv_writer = csv.DictWriter(
f, fieldnames=['name', 'index_image_id', 'box_id'])
csv_writer.writeheader()
for name_imid_boxid in index_names_ids_and_boxes:
csv_writer.writerow({
'name': name_imid_boxid[0],
'index_image_id': name_imid_boxid[1],
'box_id': name_imid_boxid[2],
})
def main(argv): def main(argv):
if len(argv) > 1: if len(argv) > 1:
raise RuntimeError('Too many command-line arguments.') raise RuntimeError('Too many command-line arguments.')
tf.logging.set_verbosity(tf.logging.INFO)
# Read list of index images from dataset file. # Read list of index images from dataset file.
tf.logging.info('Reading list of index images from dataset file...') print('Reading list of index images from dataset file...')
_, index_list, _ = dataset.ReadDatasetFile(cmd_args.dataset_file_path) _, index_list, _ = dataset.ReadDatasetFile(cmd_args.dataset_file_path)
num_images = len(index_list) num_images = len(index_list)
tf.logging.info('done! Found %d images', num_images) print('done! Found %d images' % num_images)
# Parse DelfConfig proto. # Compose list of image paths.
config = delf_config_pb2.DelfConfig() image_paths = [
with tf.gfile.GFile(cmd_args.delf_config_path, 'r') as f: os.path.join(cmd_args.images_dir, index_image_name + _IMAGE_EXTENSION)
text_format.Merge(f.read(), config) for index_image_name in index_list
]
# Create output directories if necessary.
if not os.path.exists(cmd_args.output_features_dir): # Extract boxes/features and save them to files.
os.makedirs(cmd_args.output_features_dir) boxes_and_features_extraction.ExtractBoxesAndFeaturesToFiles(
if not os.path.exists(cmd_args.output_boxes_dir): image_names=index_list,
os.makedirs(cmd_args.output_boxes_dir) image_paths=image_paths,
delf_config_path=cmd_args.delf_config_path,
index_names_ids_and_boxes = [] detector_model_dir=cmd_args.detector_model_dir,
with tf.Graph().as_default(): detector_thresh=cmd_args.detector_thresh,
with tf.Session() as sess: output_features_dir=cmd_args.output_features_dir,
# Initialize variables, construct detector and DELF extractor. output_boxes_dir=cmd_args.output_boxes_dir,
init_op = tf.global_variables_initializer() output_mapping=cmd_args.output_index_mapping)
sess.run(init_op)
detector_fn = extract_boxes.MakeDetector(
sess, cmd_args.detector_model_dir, import_scope='detector')
delf_extractor_fn = extract_features.MakeExtractor(
sess, config, import_scope='extractor_delf')
start = time.clock()
for i in range(num_images):
if i == 0:
print('Starting to extract features/boxes from index images...')
elif i % _STATUS_CHECK_ITERATIONS == 0:
elapsed = (time.clock() - start)
print('Processing index image %d out of %d, last %d '
'images took %f seconds' %
(i, num_images, _STATUS_CHECK_ITERATIONS, elapsed))
start = time.clock()
index_image_name = index_list[i]
input_image_filename = os.path.join(cmd_args.images_dir,
index_image_name + _IMAGE_EXTENSION)
output_feature_filename_whole_image = os.path.join(
cmd_args.output_features_dir, index_image_name + _DELF_EXTENSION)
output_box_filename = os.path.join(cmd_args.output_boxes_dir,
index_image_name + _BOX_EXTENSION)
pil_im = _PilLoader(input_image_filename)
width, height = pil_im.size
# Extract and save boxes.
if tf.gfile.Exists(output_box_filename):
tf.logging.info('Skipping box computation for %s', index_image_name)
(boxes_out, scores_out,
class_indices_out) = box_io.ReadFromFile(output_box_filename)
else:
(boxes_out, scores_out,
class_indices_out) = detector_fn(np.expand_dims(pil_im, 0))
# Using only one image per batch.
boxes_out = boxes_out[0]
scores_out = scores_out[0]
class_indices_out = class_indices_out[0]
box_io.WriteToFile(output_box_filename, boxes_out, scores_out,
class_indices_out)
# Select boxes with scores greater than threshold. Those will be the
# ones with extracted DELF features (besides the whole image, whose DELF
# features are extracted in all cases).
num_delf_files = 1
selected_boxes = []
for box_ind, box in enumerate(boxes_out):
if scores_out[box_ind] >= cmd_args.detector_thresh:
selected_boxes.append(box)
num_delf_files += len(selected_boxes)
# Extract and save DELF features.
for delf_file_ind in range(num_delf_files):
if delf_file_ind == 0:
index_box_name = index_image_name
output_feature_filename = output_feature_filename_whole_image
else:
index_box_name = index_image_name + '_' + str(delf_file_ind - 1)
output_feature_filename = os.path.join(
cmd_args.output_features_dir, index_box_name + _DELF_EXTENSION)
index_names_ids_and_boxes.append(
[index_box_name, i, delf_file_ind - 1])
if tf.gfile.Exists(output_feature_filename):
tf.logging.info('Skipping DELF computation for %s', index_box_name)
continue
if delf_file_ind >= 1:
bbox_for_cropping = selected_boxes[delf_file_ind - 1]
bbox_for_cropping_pil_convention = [
int(math.floor(bbox_for_cropping[1] * width)),
int(math.floor(bbox_for_cropping[0] * height)),
int(math.ceil(bbox_for_cropping[3] * width)),
int(math.ceil(bbox_for_cropping[2] * height))
]
pil_cropped_im = pil_im.crop(bbox_for_cropping_pil_convention)
im = np.array(pil_cropped_im)
else:
im = np.array(pil_im)
(locations_out, descriptors_out, feature_scales_out,
attention_out) = delf_extractor_fn(im)
feature_io.WriteToFile(output_feature_filename, locations_out,
feature_scales_out, descriptors_out,
attention_out)
# Save mapping from output DELF name to index image id and box id.
_WriteMappingBasenameToIds(index_names_ids_and_boxes,
cmd_args.output_index_mapping)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -40,7 +40,7 @@ from tensorflow.python.platform import app ...@@ -40,7 +40,7 @@ from tensorflow.python.platform import app
from delf import delf_config_pb2 from delf import delf_config_pb2
from delf import feature_io from delf import feature_io
from delf.python.detect_to_retrieve import dataset from delf.python.detect_to_retrieve import dataset
from delf import extract_features from delf import extractor
cmd_args = None cmd_args = None
...@@ -85,15 +85,15 @@ def main(argv): ...@@ -85,15 +85,15 @@ def main(argv):
text_format.Merge(f.read(), config) text_format.Merge(f.read(), config)
# Create output directory if necessary. # Create output directory if necessary.
if not os.path.exists(cmd_args.output_features_dir): if not tf.gfile.Exists(cmd_args.output_features_dir):
os.makedirs(cmd_args.output_features_dir) tf.gfile.MakeDirs(cmd_args.output_features_dir)
with tf.Graph().as_default(): with tf.Graph().as_default():
with tf.Session() as sess: with tf.Session() as sess:
# Initialize variables, construct DELF extractor. # Initialize variables, construct DELF extractor.
init_op = tf.global_variables_initializer() init_op = tf.global_variables_initializer()
sess.run(init_op) sess.run(init_op)
extractor_fn = extract_features.MakeExtractor(sess, config) extractor_fn = extractor.MakeExtractor(sess, config)
start = time.clock() start = time.clock()
for i in range(num_images): for i in range(num_images):
......
...@@ -368,8 +368,8 @@ def main(argv): ...@@ -368,8 +368,8 @@ def main(argv):
print('done! Retrieval for query %d took %f seconds' % (i, elapsed)) print('done! Retrieval for query %d took %f seconds' % (i, elapsed))
# Create output directory if necessary. # Create output directory if necessary.
if not os.path.exists(cmd_args.output_dir): if not tf.gfile.Exists(cmd_args.output_dir):
os.makedirs(cmd_args.output_dir) tf.gfile.MakeDirs(cmd_args.output_dir)
# Compute metrics. # Compute metrics.
medium_metrics = dataset.ComputeMetrics(ranks_before_gv, medium_ground_truth, medium_metrics = dataset.ComputeMetrics(ranks_before_gv, medium_ground_truth,
......
# Copyright 2019 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Module to construct object detector function."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
def MakeDetector(sess, model_dir, import_scope=None):
"""Creates a function to detect objects in an image.
Args:
sess: TensorFlow session to use.
model_dir: Directory where SavedModel is located.
import_scope: Optional scope to use for model.
Returns:
Function that receives an image and returns detection results.
"""
tf.saved_model.loader.load(
sess, [tf.saved_model.tag_constants.SERVING],
model_dir,
import_scope=import_scope)
import_scope_prefix = import_scope + '/' if import_scope is not None else ''
input_images = sess.graph.get_tensor_by_name('%sinput_images:0' %
import_scope_prefix)
boxes = sess.graph.get_tensor_by_name('%sdetection_boxes:0' %
import_scope_prefix)
scores = sess.graph.get_tensor_by_name('%sdetection_scores:0' %
import_scope_prefix)
class_indices = sess.graph.get_tensor_by_name('%sdetection_classes:0' %
import_scope_prefix)
def DetectorFn(images):
"""Receives an image and returns detected boxes.
Args:
images: Uint8 array with shape (batch, height, width 3) containing a batch
of RGB images.
Returns:
Tuple (boxes, scores, class_indices).
"""
return sess.run([boxes, scores, class_indices],
feed_dict={input_images: images})
return DetectorFn
...@@ -34,6 +34,7 @@ import tensorflow as tf ...@@ -34,6 +34,7 @@ import tensorflow as tf
from tensorflow.python.platform import app from tensorflow.python.platform import app
from delf import box_io from delf import box_io
from delf import detector
cmd_args = None cmd_args = None
...@@ -63,47 +64,6 @@ def _ReadImageList(list_path): ...@@ -63,47 +64,6 @@ def _ReadImageList(list_path):
return image_paths return image_paths
def MakeDetector(sess, model_dir, import_scope=None):
"""Creates a function to detect objects in an image.
Args:
sess: TensorFlow session to use.
model_dir: Directory where SavedModel is located.
import_scope: Optional scope to use for model.
Returns:
Function that receives an image and returns detection results.
"""
tf.saved_model.loader.load(
sess, [tf.saved_model.tag_constants.SERVING],
model_dir,
import_scope=import_scope)
import_scope_prefix = import_scope + '/' if import_scope is not None else ''
input_images = sess.graph.get_tensor_by_name('%sinput_images:0' %
import_scope_prefix)
boxes = sess.graph.get_tensor_by_name('%sdetection_boxes:0' %
import_scope_prefix)
scores = sess.graph.get_tensor_by_name('%sdetection_scores:0' %
import_scope_prefix)
class_indices = sess.graph.get_tensor_by_name('%sdetection_classes:0' %
import_scope_prefix)
def DetectorFn(images):
"""Receives an image and returns detected boxes.
Args:
images: Uint8 array with shape (batch, height, width 3) containing a batch
of RGB images.
Returns:
Tuple (boxes, scores, class_indices).
"""
return sess.run([boxes, scores, class_indices],
feed_dict={input_images: images})
return DetectorFn
def _FilterBoxesByScore(boxes, scores, class_indices, score_threshold): def _FilterBoxesByScore(boxes, scores, class_indices, score_threshold):
"""Filter boxes based on detection scores. """Filter boxes based on detection scores.
...@@ -179,10 +139,10 @@ def main(argv): ...@@ -179,10 +139,10 @@ def main(argv):
tf.logging.info('done! Found %d images', num_images) tf.logging.info('done! Found %d images', num_images)
# Create output directories if necessary. # Create output directories if necessary.
if not os.path.exists(cmd_args.output_dir): if not tf.gfile.Exists(cmd_args.output_dir):
os.makedirs(cmd_args.output_dir) tf.gfile.MakeDirs(cmd_args.output_dir)
if cmd_args.output_viz_dir and not os.path.exists(cmd_args.output_viz_dir): if cmd_args.output_viz_dir and not tf.gfile.Exists(cmd_args.output_viz_dir):
os.makedirs(cmd_args.output_viz_dir) tf.gfile.MakeDirs(cmd_args.output_viz_dir)
# Tell TensorFlow that the model will be built into the default Graph. # Tell TensorFlow that the model will be built into the default Graph.
with tf.Graph().as_default(): with tf.Graph().as_default():
...@@ -197,7 +157,7 @@ def main(argv): ...@@ -197,7 +157,7 @@ def main(argv):
init_op = tf.global_variables_initializer() init_op = tf.global_variables_initializer()
sess.run(init_op) sess.run(init_op)
detector_fn = MakeDetector(sess, cmd_args.detector_path) detector_fn = detector.MakeDetector(sess, cmd_args.detector_path)
# Start input enqueue threads. # Start input enqueue threads.
coord = tf.train.Coordinator() coord = tf.train.Coordinator()
......
...@@ -32,8 +32,8 @@ import tensorflow as tf ...@@ -32,8 +32,8 @@ import tensorflow as tf
from google.protobuf import text_format from google.protobuf import text_format
from tensorflow.python.platform import app from tensorflow.python.platform import app
from delf import delf_config_pb2 from delf import delf_config_pb2
from delf import feature_extractor
from delf import feature_io from delf import feature_io
from delf import extractor
cmd_args = None cmd_args = None
...@@ -59,64 +59,6 @@ def _ReadImageList(list_path): ...@@ -59,64 +59,6 @@ def _ReadImageList(list_path):
return image_paths return image_paths
def MakeExtractor(sess, config, import_scope=None):
"""Creates a function to extract features from an image.
Args:
sess: TensorFlow session to use.
config: DelfConfig proto containing the model configuration.
import_scope: Optional scope to use for model.
Returns:
Function that receives an image and returns features.
"""
tf.saved_model.loader.load(
sess, [tf.saved_model.tag_constants.SERVING],
config.model_path,
import_scope=import_scope)
import_scope_prefix = import_scope + '/' if import_scope is not None else ''
input_image = sess.graph.get_tensor_by_name('%sinput_image:0' %
import_scope_prefix)
input_score_threshold = sess.graph.get_tensor_by_name('%sinput_abs_thres:0' %
import_scope_prefix)
input_image_scales = sess.graph.get_tensor_by_name('%sinput_scales:0' %
import_scope_prefix)
input_max_feature_num = sess.graph.get_tensor_by_name(
'%sinput_max_feature_num:0' % import_scope_prefix)
boxes = sess.graph.get_tensor_by_name('%sboxes:0' % import_scope_prefix)
raw_descriptors = sess.graph.get_tensor_by_name('%sfeatures:0' %
import_scope_prefix)
feature_scales = sess.graph.get_tensor_by_name('%sscales:0' %
import_scope_prefix)
attention_with_extra_dim = sess.graph.get_tensor_by_name('%sscores:0' %
import_scope_prefix)
attention = tf.reshape(attention_with_extra_dim,
[tf.shape(attention_with_extra_dim)[0]])
locations, descriptors = feature_extractor.DelfFeaturePostProcessing(
boxes, raw_descriptors, config)
def ExtractorFn(image):
"""Receives an image and returns DELF features.
Args:
image: Uint8 array with shape (height, width 3) containing the RGB image.
Returns:
Tuple (locations, descriptors, feature_scales, attention)
"""
return sess.run(
[locations, descriptors, feature_scales, attention],
feed_dict={
input_image: image,
input_score_threshold: config.delf_local_config.score_threshold,
input_image_scales: list(config.image_scales),
input_max_feature_num: config.delf_local_config.max_feature_num
})
return ExtractorFn
def main(unused_argv): def main(unused_argv):
tf.logging.set_verbosity(tf.logging.INFO) tf.logging.set_verbosity(tf.logging.INFO)
...@@ -132,8 +74,8 @@ def main(unused_argv): ...@@ -132,8 +74,8 @@ def main(unused_argv):
text_format.Merge(f.read(), config) text_format.Merge(f.read(), config)
# Create output directory if necessary. # Create output directory if necessary.
if not os.path.exists(cmd_args.output_dir): if not tf.gfile.Exists(cmd_args.output_dir):
os.makedirs(cmd_args.output_dir) tf.gfile.MakeDirs(cmd_args.output_dir)
# Tell TensorFlow that the model will be built into the default Graph. # Tell TensorFlow that the model will be built into the default Graph.
with tf.Graph().as_default(): with tf.Graph().as_default():
...@@ -147,7 +89,7 @@ def main(unused_argv): ...@@ -147,7 +89,7 @@ def main(unused_argv):
init_op = tf.global_variables_initializer() init_op = tf.global_variables_initializer()
sess.run(init_op) sess.run(init_op)
extractor_fn = MakeExtractor(sess, config) extractor_fn = extractor.MakeExtractor(sess, config)
# Start input enqueue threads. # Start input enqueue threads.
coord = tf.train.Coordinator() coord = tf.train.Coordinator()
......
# Copyright 2019 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Module to construct DELF feature extractor."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from PIL import Image
import tensorflow as tf
from delf import feature_extractor
# Minimum dimensions below which DELF features are not extracted (empty
# features are returned). This applies after any resizing is performed.
_MIN_HEIGHT = 10
_MIN_WIDTH = 10
def ResizeImage(image, config):
"""Resizes image according to config.
Args:
image: Uint8 array with shape (height, width, 3).
config: DelfConfig proto containing the model configuration.
Returns:
resized_image: Uint8 array with resized image.
scale_factor: Float with factor used for resizing (If upscaling, larger than
1; if downscaling, smaller than 1).
Raises:
ValueError: If `image` has incorrect number of dimensions/channels.
"""
if image.ndim != 3:
raise ValueError('image has incorrect number of dimensions: %d' %
image.ndims)
height, width, channels = image.shape
if channels != 3:
raise ValueError('image has incorrect number of channels: %d' % channels)
if config.max_image_size != -1 and (width > config.max_image_size or
height > config.max_image_size):
scale_factor = config.max_image_size / max(width, height)
elif config.min_image_size != -1 and (width < config.min_image_size and
height < config.min_image_size):
scale_factor = config.min_image_size / max(width, height)
else:
# No resizing needed, early return.
return image, 1.0
new_shape = (int(width * scale_factor), int(height * scale_factor))
pil_image = Image.fromarray(image)
resized_image = np.array(pil_image.resize(new_shape, resample=Image.BILINEAR))
return resized_image, scale_factor
def MakeExtractor(sess, config, import_scope=None):
"""Creates a function to extract features from an image.
Args:
sess: TensorFlow session to use.
config: DelfConfig proto containing the model configuration.
import_scope: Optional scope to use for model.
Returns:
Function that receives an image and returns features.
"""
tf.saved_model.loader.load(
sess, [tf.saved_model.tag_constants.SERVING],
config.model_path,
import_scope=import_scope)
import_scope_prefix = import_scope + '/' if import_scope is not None else ''
input_image = sess.graph.get_tensor_by_name('%sinput_image:0' %
import_scope_prefix)
input_score_threshold = sess.graph.get_tensor_by_name('%sinput_abs_thres:0' %
import_scope_prefix)
input_image_scales = sess.graph.get_tensor_by_name('%sinput_scales:0' %
import_scope_prefix)
input_max_feature_num = sess.graph.get_tensor_by_name(
'%sinput_max_feature_num:0' % import_scope_prefix)
boxes = sess.graph.get_tensor_by_name('%sboxes:0' % import_scope_prefix)
raw_descriptors = sess.graph.get_tensor_by_name('%sfeatures:0' %
import_scope_prefix)
feature_scales = sess.graph.get_tensor_by_name('%sscales:0' %
import_scope_prefix)
attention_with_extra_dim = sess.graph.get_tensor_by_name('%sscores:0' %
import_scope_prefix)
attention = tf.reshape(attention_with_extra_dim,
[tf.shape(attention_with_extra_dim)[0]])
locations, descriptors = feature_extractor.DelfFeaturePostProcessing(
boxes, raw_descriptors, config)
def ExtractorFn(image):
"""Receives an image and returns DELF features.
If image is too small, returns empty set of features.
Args:
image: Uint8 array with shape (height, width, 3) containing the RGB image.
Returns:
Tuple (locations, descriptors, feature_scales, attention)
"""
resized_image, scale_factor = ResizeImage(image, config)
# If the image is too small, returns empty features.
if resized_image.shape[0] < _MIN_HEIGHT or resized_image.shape[
1] < _MIN_WIDTH:
return np.array([]), np.array([]), np.array([]), np.array([])
(locations_out, descriptors_out, feature_scales_out,
attention_out) = sess.run(
[locations, descriptors, feature_scales, attention],
feed_dict={
input_image: resized_image,
input_score_threshold: config.delf_local_config.score_threshold,
input_image_scales: list(config.image_scales),
input_max_feature_num: config.delf_local_config.max_feature_num
})
rescaled_locations_out = locations_out / scale_factor
return (rescaled_locations_out, descriptors_out, feature_scales_out,
attention_out)
return ExtractorFn
# Copyright 2019 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for DELF feature extractor."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from absl.testing import parameterized
import numpy as np
import tensorflow as tf
from delf import delf_config_pb2
from delf import extractor
class ExtractorTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.named_parameters(
('Max-1Min-1', -1, -1, [4, 2, 3], 1.0),
('Max2Min-1', 2, -1, [2, 1, 3], 0.5),
('Max8Min-1', 8, -1, [4, 2, 3], 1.0),
('Max-1Min1', -1, 1, [4, 2, 3], 1.0),
('Max-1Min8', -1, 8, [8, 4, 3], 2.0),
('Max16Min8', 16, 8, [8, 4, 3], 2.0),
('Max2Min2', 2, 2, [2, 1, 3], 0.5),
)
def testResizeImageWorks(self, max_image_size, min_image_size, expected_shape,
expected_scale_factor):
# Construct image of size 4x2x3.
image = np.array([[[0, 0, 0], [1, 1, 1]], [[2, 2, 2], [3, 3, 3]],
[[4, 4, 4], [5, 5, 5]], [[6, 6, 6], [7, 7, 7]]],
dtype='uint8')
# Set up config.
config = delf_config_pb2.DelfConfig(
max_image_size=max_image_size, min_image_size=min_image_size)
resized_image, scale_factor = extractor.ResizeImage(image, config)
self.assertAllEqual(resized_image.shape, expected_shape)
self.assertAllClose(scale_factor, expected_scale_factor)
if __name__ == '__main__':
tf.test.main()
...@@ -30,10 +30,10 @@ import sys ...@@ -30,10 +30,10 @@ import sys
import matplotlib.image as mpimg import matplotlib.image as mpimg
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import numpy as np import numpy as np
from scipy.spatial import cKDTree from scipy import spatial
from skimage.feature import plot_matches from skimage import feature
from skimage.measure import ransac from skimage import measure
from skimage.transform import AffineTransform from skimage import transform
import tensorflow as tf import tensorflow as tf
from tensorflow.python.platform import app from tensorflow.python.platform import app
...@@ -58,7 +58,7 @@ def main(unused_argv): ...@@ -58,7 +58,7 @@ def main(unused_argv):
tf.logging.info("Loaded image 2's %d features" % num_features_2) tf.logging.info("Loaded image 2's %d features" % num_features_2)
# Find nearest-neighbor matches using a KD tree. # Find nearest-neighbor matches using a KD tree.
d1_tree = cKDTree(descriptors_1) d1_tree = spatial.cKDTree(descriptors_1)
_, indices = d1_tree.query( _, indices = d1_tree.query(
descriptors_2, distance_upper_bound=_DISTANCE_THRESHOLD) descriptors_2, distance_upper_bound=_DISTANCE_THRESHOLD)
...@@ -75,12 +75,11 @@ def main(unused_argv): ...@@ -75,12 +75,11 @@ def main(unused_argv):
]) ])
# Perform geometric verification using RANSAC. # Perform geometric verification using RANSAC.
_, inliers = ransac( _, inliers = measure.ransac((locations_1_to_use, locations_2_to_use),
(locations_1_to_use, locations_2_to_use), transform.AffineTransform,
AffineTransform, min_samples=3,
min_samples=3, residual_threshold=20,
residual_threshold=20, max_trials=1000)
max_trials=1000)
tf.logging.info('Found %d inliers' % sum(inliers)) tf.logging.info('Found %d inliers' % sum(inliers))
...@@ -89,7 +88,7 @@ def main(unused_argv): ...@@ -89,7 +88,7 @@ def main(unused_argv):
img_1 = mpimg.imread(cmd_args.image_1_path) img_1 = mpimg.imread(cmd_args.image_1_path)
img_2 = mpimg.imread(cmd_args.image_2_path) img_2 = mpimg.imread(cmd_args.image_2_path)
inlier_idxs = np.nonzero(inliers)[0] inlier_idxs = np.nonzero(inliers)[0]
plot_matches( feature.plot_matches(
ax, ax,
img_1, img_1,
img_2, img_2,
......
# Copyright 2019 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Computes metrics for Google Landmarks Recognition dataset predictions.
Metrics are written to stdout.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import sys
from tensorflow.python.platform import app
from delf.python.google_landmarks_dataset import dataset_file_io
from delf.python.google_landmarks_dataset import metrics
cmd_args = None
def main(argv):
if len(argv) > 1:
raise RuntimeError('Too many command-line arguments.')
# Read solution.
print('Reading solution...')
public_solution, private_solution, ignored_ids = dataset_file_io.ReadSolution(
cmd_args.solution_path, dataset_file_io.RECOGNITION_TASK_ID)
print('done!')
# Read predictions.
print('Reading predictions...')
public_predictions, private_predictions = dataset_file_io.ReadPredictions(
cmd_args.predictions_path, set(public_solution.keys()),
set(private_solution.keys()), set(ignored_ids),
dataset_file_io.RECOGNITION_TASK_ID)
print('done!')
# Global Average Precision.
print('**********************************************')
print('(Public) Global Average Precision: %f' %
metrics.GlobalAveragePrecision(public_predictions, public_solution))
print('(Private) Global Average Precision: %f' %
metrics.GlobalAveragePrecision(private_predictions, private_solution))
# Global Average Precision ignoring non-landmark queries.
print('**********************************************')
print(
'(Public) Global Average Precision ignoring non-landmark queries: %f' %
metrics.GlobalAveragePrecision(
public_predictions, public_solution, ignore_non_gt_test_images=True))
print(
'(Private) Global Average Precision ignoring non-landmark queries: %f' %
metrics.GlobalAveragePrecision(
private_predictions, private_solution,
ignore_non_gt_test_images=True))
# Top-1 accuracy.
print('**********************************************')
print('(Public) Top-1 accuracy: %.2f' %
(100.0 * metrics.Top1Accuracy(public_predictions, public_solution)))
print('(Private) Top-1 accuracy: %.2f' %
(100.0 * metrics.Top1Accuracy(private_predictions, private_solution)))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.register('type', 'bool', lambda v: v.lower() == 'true')
parser.add_argument(
'--predictions_path',
type=str,
default='/tmp/predictions.csv',
help="""
Path to CSV predictions file, formatted with columns 'id,landmarks' (the
file should include a header).
""")
parser.add_argument(
'--solution_path',
type=str,
default='/tmp/solution.csv',
help="""
Path to CSV solution file, formatted with columns 'id,landmarks,Usage'
(the file should include a header).
""")
cmd_args, unparsed = parser.parse_known_args()
app.run(main=main, argv=[sys.argv[0]] + unparsed)
# Copyright 2019 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Computes metrics for Google Landmarks Retrieval dataset predictions.
Metrics are written to stdout.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import sys
from tensorflow.python.platform import app
from delf.python.google_landmarks_dataset import dataset_file_io
from delf.python.google_landmarks_dataset import metrics
cmd_args = None
def main(argv):
if len(argv) > 1:
raise RuntimeError('Too many command-line arguments.')
# Read solution.
print('Reading solution...')
public_solution, private_solution, ignored_ids = dataset_file_io.ReadSolution(
cmd_args.solution_path, dataset_file_io.RETRIEVAL_TASK_ID)
print('done!')
# Read predictions.
print('Reading predictions...')
public_predictions, private_predictions = dataset_file_io.ReadPredictions(
cmd_args.predictions_path, set(public_solution.keys()),
set(private_solution.keys()), set(ignored_ids),
dataset_file_io.RETRIEVAL_TASK_ID)
print('done!')
# Mean average precision.
print('**********************************************')
print('(Public) Mean Average Precision: %f' %
metrics.MeanAveragePrecision(public_predictions, public_solution))
print('(Private) Mean Average Precision: %f' %
metrics.MeanAveragePrecision(private_predictions, private_solution))
# Mean precision@k.
print('**********************************************')
public_precisions = 100.0 * metrics.MeanPrecisions(public_predictions,
public_solution)
private_precisions = 100.0 * metrics.MeanPrecisions(private_predictions,
private_solution)
print('(Public) Mean precisions: P@1: %.2f, P@5: %.2f, P@10: %.2f, '
'P@50: %.2f, P@100: %.2f' %
(public_precisions[0], public_precisions[4], public_precisions[9],
public_precisions[49], public_precisions[99]))
print('(Private) Mean precisions: P@1: %.2f, P@5: %.2f, P@10: %.2f, '
'P@50: %.2f, P@100: %.2f' %
(private_precisions[0], private_precisions[4], private_precisions[9],
private_precisions[49], private_precisions[99]))
# Mean/median position of first correct.
print('**********************************************')
public_mean_position, public_median_position = metrics.MeanMedianPosition(
public_predictions, public_solution)
private_mean_position, private_median_position = metrics.MeanMedianPosition(
private_predictions, private_solution)
print('(Public) Mean position: %.2f, median position: %.2f' %
(public_mean_position, public_median_position))
print('(Private) Mean position: %.2f, median position: %.2f' %
(private_mean_position, private_median_position))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.register('type', 'bool', lambda v: v.lower() == 'true')
parser.add_argument(
'--predictions_path',
type=str,
default='/tmp/predictions.csv',
help="""
Path to CSV predictions file, formatted with columns 'id,images' (the
file should include a header).
""")
parser.add_argument(
'--solution_path',
type=str,
default='/tmp/solution.csv',
help="""
Path to CSV solution file, formatted with columns 'id,images,Usage'
(the file should include a header).
""")
cmd_args, unparsed = parser.parse_known_args()
app.run(main=main, argv=[sys.argv[0]] + unparsed)
# Copyright 2019 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""IO module for files from Landmark recognition/retrieval challenges."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import csv
import tensorflow as tf
RECOGNITION_TASK_ID = 'recognition'
RETRIEVAL_TASK_ID = 'retrieval'
def ReadSolution(file_path, task):
"""Reads solution from file, for a given task.
Args:
file_path: Path to CSV file with solution. File contains a header.
task: Type of challenge task. Supported values: 'recognition', 'retrieval'.
Returns:
public_solution: Dict mapping test image ID to list of ground-truth IDs, for
the Public subset of test images. If `task` == 'recognition', the IDs are
integers corresponding to landmark IDs. If `task` == 'retrieval', the IDs
are strings corresponding to index image IDs.
private_solution: Same as `public_solution`, but for the private subset of
test images.
ignored_ids: List of test images that are ignored in scoring.
Raises:
ValueError: If Usage field is not Public, Private or Ignored; or if `task`
is not supported.
"""
public_solution = {}
private_solution = {}
ignored_ids = []
with tf.gfile.GFile(file_path, 'r') as csv_file:
reader = csv.reader(csv_file)
next(reader, None) # Skip header.
for row in reader:
test_id = row[0]
if row[2] == 'Ignored':
ignored_ids.append(test_id)
else:
ground_truth_ids = []
if task == RECOGNITION_TASK_ID:
if row[1]:
for landmark_id in row[1].split(' '):
ground_truth_ids.append(int(landmark_id))
elif task == RETRIEVAL_TASK_ID:
for image_id in row[1].split(' '):
ground_truth_ids.append(image_id)
else:
raise ValueError('Unrecognized task: %s' % task)
if row[2] == 'Public':
public_solution[test_id] = ground_truth_ids
elif row[2] == 'Private':
private_solution[test_id] = ground_truth_ids
else:
raise ValueError('Test image %s has unrecognized Usage tag %s' %
(row[0], row[2]))
return public_solution, private_solution, ignored_ids
def ReadPredictions(file_path, public_ids, private_ids, ignored_ids, task):
"""Reads predictions from file, for a given task.
Args:
file_path: Path to CSV file with predictions. File contains a header.
public_ids: Set (or list) of test image IDs in Public subset of test images.
private_ids: Same as `public_ids`, but for the private subset of test
images.
ignored_ids: Set (or list) of test image IDs that are ignored in scoring and
are associated to no ground-truth.
task: Type of challenge task. Supported values: 'recognition', 'retrieval'.
Returns:
public_predictions: Dict mapping test image ID to prediction, for the Public
subset of test images. If `task` == 'recognition', the prediction is a
dict with keys 'class' (integer) and 'score' (float). If `task` ==
'retrieval', the prediction is a list of strings corresponding to index
image IDs.
private_predictions: Same as `public_predictions`, but for the private
subset of test images.
Raises:
ValueError:
- If test image ID is unrecognized/repeated;
- If `task` is not supported;
- If prediction is malformed.
"""
public_predictions = {}
private_predictions = {}
with tf.gfile.GFile(file_path, 'r') as csv_file:
reader = csv.reader(csv_file)
next(reader, None) # Skip header.
for row in reader:
# Skip row if empty.
if not row:
continue
test_id = row[0]
# Makes sure this query has not yet been seen.
if test_id in public_predictions:
raise ValueError('Test image %s is repeated.' % test_id)
if test_id in private_predictions:
raise ValueError('Test image %s is repeated' % test_id)
# If ignored, skip it.
if test_id in ignored_ids:
continue
# Only parse result if there is a prediction.
if row[1]:
prediction_split = row[1].split(' ')
# Remove empty spaces at end (if any).
if not prediction_split[-1]:
prediction_split = prediction_split[:-1]
if task == RECOGNITION_TASK_ID:
if len(prediction_split) != 2:
raise ValueError('Prediction is malformed: there should only be 2 '
'elements in second column, but found %d for test '
'image %s' % (len(prediction_split), test_id))
landmark_id = int(prediction_split[0])
score = float(prediction_split[1])
prediction_entry = {'class': landmark_id, 'score': score}
elif task == RETRIEVAL_TASK_ID:
prediction_entry = prediction_split
else:
raise ValueError('Unrecognized task: %s' % task)
if test_id in public_ids:
public_predictions[test_id] = prediction_entry
elif test_id in private_ids:
private_predictions[test_id] = prediction_entry
else:
raise ValueError('test_id %s is unrecognized' % test_id)
return public_predictions, private_predictions
# Copyright 2019 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for dataset file IO module."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import tensorflow as tf
from delf.python.google_landmarks_dataset import dataset_file_io
class DatasetFileIoTest(tf.test.TestCase):
def testReadRecognitionSolutionWorks(self):
# Define inputs.
file_path = os.path.join(tf.test.get_temp_dir(), 'recognition_solution.csv')
with tf.gfile.GFile(file_path, 'w') as f:
f.write('id,landmarks,Usage\n')
f.write('0123456789abcdef,0 12,Public\n')
f.write('0223456789abcdef,,Public\n')
f.write('0323456789abcdef,100,Ignored\n')
f.write('0423456789abcdef,1,Private\n')
f.write('0523456789abcdef,,Ignored\n')
# Run tested function.
(public_solution, private_solution,
ignored_ids) = dataset_file_io.ReadSolution(
file_path, dataset_file_io.RECOGNITION_TASK_ID)
# Define expected results.
expected_public_solution = {
'0123456789abcdef': [0, 12],
'0223456789abcdef': []
}
expected_private_solution = {
'0423456789abcdef': [1],
}
expected_ignored_ids = ['0323456789abcdef', '0523456789abcdef']
# Compare actual and expected results.
self.assertEqual(public_solution, expected_public_solution)
self.assertEqual(private_solution, expected_private_solution)
self.assertEqual(ignored_ids, expected_ignored_ids)
def testReadRetrievalSolutionWorks(self):
# Define inputs.
file_path = os.path.join(tf.test.get_temp_dir(), 'retrieval_solution.csv')
with tf.gfile.GFile(file_path, 'w') as f:
f.write('id,images,Usage\n')
f.write('0123456789abcdef,None,Ignored\n')
f.write('0223456789abcdef,fedcba9876543210 fedcba9876543200,Public\n')
f.write('0323456789abcdef,fedcba9876543200,Private\n')
f.write('0423456789abcdef,fedcba9876543220,Private\n')
f.write('0523456789abcdef,None,Ignored\n')
# Run tested function.
(public_solution, private_solution,
ignored_ids) = dataset_file_io.ReadSolution(
file_path, dataset_file_io.RETRIEVAL_TASK_ID)
# Define expected results.
expected_public_solution = {
'0223456789abcdef': ['fedcba9876543210', 'fedcba9876543200'],
}
expected_private_solution = {
'0323456789abcdef': ['fedcba9876543200'],
'0423456789abcdef': ['fedcba9876543220'],
}
expected_ignored_ids = ['0123456789abcdef', '0523456789abcdef']
# Compare actual and expected results.
self.assertEqual(public_solution, expected_public_solution)
self.assertEqual(private_solution, expected_private_solution)
self.assertEqual(ignored_ids, expected_ignored_ids)
def testReadRecognitionPredictionsWorks(self):
# Define inputs.
file_path = os.path.join(tf.test.get_temp_dir(),
'recognition_predictions.csv')
with tf.gfile.GFile(file_path, 'w') as f:
f.write('id,landmarks\n')
f.write('0123456789abcdef,12 0.1 \n')
f.write('0423456789abcdef,0 19.0\n')
f.write('0223456789abcdef,\n')
f.write('\n')
f.write('0523456789abcdef,14 0.01\n')
public_ids = ['0123456789abcdef', '0223456789abcdef']
private_ids = ['0423456789abcdef']
ignored_ids = ['0323456789abcdef', '0523456789abcdef']
# Run tested function.
public_predictions, private_predictions = dataset_file_io.ReadPredictions(
file_path, public_ids, private_ids, ignored_ids,
dataset_file_io.RECOGNITION_TASK_ID)
# Define expected results.
expected_public_predictions = {
'0123456789abcdef': {
'class': 12,
'score': 0.1
}
}
expected_private_predictions = {
'0423456789abcdef': {
'class': 0,
'score': 19.0
}
}
# Compare actual and expected results.
self.assertEqual(public_predictions, expected_public_predictions)
self.assertEqual(private_predictions, expected_private_predictions)
def testReadRetrievalPredictionsWorks(self):
# Define inputs.
file_path = os.path.join(tf.test.get_temp_dir(),
'retrieval_predictions.csv')
with tf.gfile.GFile(file_path, 'w') as f:
f.write('id,images\n')
f.write('0123456789abcdef,fedcba9876543250 \n')
f.write('0423456789abcdef,fedcba9876543260\n')
f.write('0223456789abcdef,fedcba9876543210 fedcba9876543200 '
'fedcba9876543220\n')
f.write('\n')
f.write('0523456789abcdef,\n')
public_ids = ['0223456789abcdef']
private_ids = ['0323456789abcdef', '0423456789abcdef']
ignored_ids = ['0123456789abcdef', '0523456789abcdef']
# Run tested function.
public_predictions, private_predictions = dataset_file_io.ReadPredictions(
file_path, public_ids, private_ids, ignored_ids,
dataset_file_io.RETRIEVAL_TASK_ID)
# Define expected results.
expected_public_predictions = {
'0223456789abcdef': [
'fedcba9876543210', 'fedcba9876543200', 'fedcba9876543220'
]
}
expected_private_predictions = {'0423456789abcdef': ['fedcba9876543260']}
# Compare actual and expected results.
self.assertEqual(public_predictions, expected_public_predictions)
self.assertEqual(private_predictions, expected_private_predictions)
if __name__ == '__main__':
tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment